2010年6月3日木曜日

htmlタグを除いてマルチバイト150文字にtruncateする関数

htmlタグを除き、さらにマルチバイト文字150文字を取り出す。


/********************************************************************************************
myTruncate() : 
- 用途 : truncate string considering HTML tag.
- 引数 : html string, maxlength, url
- 戻値 : truncated string
********************************************************************************************/
function myTruncate($html, $maxLength, $url) {
    $printedLength = 0;
    $position = 0;
    $tags = array();
    $printstr = '';

    mb_internal_encoding("UTF-8");

    // while ($printedLength < $maxLength && preg_match('{</?([a-z]+)[^>]*>|&#?[a-zA-Z0-9]+;}', $html, $match, PREG_OFFSET_CAPTURE, $position))
    // ここの preg_match を等価なマルチバイト処理に変更すればうまくいく(たぶん)
    while ($printedLength < $maxLength && $this->mb_preg_match('{</?([^>]+)>|&#?[a-zA-Z0-9]+;}', $html, $match, PREG_OFFSET_CAPTURE, $position))
        {
            list($tag, $tagPosition) = $match[0];

            // Print text leading up to the tag.
            $str = mb_substr($html, $position, $tagPosition - $position);
            if ($printedLength + mb_strlen($str) > $maxLength)
                {
                    //print(mb_substr($str, 0, $maxLength - $printedLength));
                    $printstr .= mb_substr($str, 0, $maxLength - $printedLength);
                    $printedLength = $maxLength;
                    break;
                }

            //print($str);
            $printstr .= $str;
            $printedLength += mb_strlen($str);

            if ($tag[0] == '&')
                {
                    // Handle the entity.
                    //print($tag);
                    $printstr .= $tag;
                    $printedLength++;
                }
            else
                {
                    // Handle the tag.
                    $tagName = $match[1][0];
                    $tagName = mb_ereg_replace(' .*', '', $tagName);
                    if ($tag[1] == '/')
                        {
                            // This is a closing tag.

                            $openingTag = array_pop($tags);
                            if($openingTag != $tagName) die;
                            assert($openingTag == $tagName); // check that tags are properly nested.

                            //print($tag);
                            $printstr .= $tag;
                        }
                    else if ($tag[mb_strlen($tag) - 2] == '/')
                        {
                            // Self-closing tag.
                            //print($tag);
                            $printstr .= $tag;
                        }
                    else
                        {
                            // Opening tag.
                            //print($tag);
                            $printstr .= $tag;
                            $tags[] = $tagName;
                        }
                }

            // Continue after the tag.
            $position = $tagPosition + mb_strlen($tag);
        }

    // Print any remaining text.
    if ($printedLength < $maxLength && $position < mb_strlen($html))
        //print(mb_substr($html, $position, $maxLength - $printedLength));
        $printstr .= mb_substr($html, $position, $maxLength - $printedLength);

    // Close any open tags.
    while (!empty($tags)) //printf('</%s>', array_pop($tags));
        $printstr .= sprintf('</%s>', array_pop($tags));

    if(mb_strlen($html) > mb_strlen($printstr)){
        $readmore = '<p class="nav"><a href="' . $url . '" title="続きを読む">続きを読む</a></p>';
    } else {
        $readmore = '';
    }

    return '<p>' . $printstr . '</p>' . $readmore;
}

0 件のコメント:

コメントを投稿