htmlタグを除き、さらにマルチバイト文字150文字を取り出す。
/********************************************************************************************
myTruncate() :
- 用途 : truncate string considering HTML tag.
- 引数 : html string, maxlength, url
- 戻値 : truncated string
********************************************************************************************/
function myTruncate($html, $maxLength, $url) {
$printedLength = 0;
$position = 0;
$tags = array();
$printstr = '';
mb_internal_encoding("UTF-8");
// while ($printedLength < $maxLength && preg_match('{</?([a-z]+)[^>]*>|&#?[a-zA-Z0-9]+;}', $html, $match, PREG_OFFSET_CAPTURE, $position))
// ここの preg_match を等価なマルチバイト処理に変更すればうまくいく(たぶん)
while ($printedLength < $maxLength && $this->mb_preg_match('{</?([^>]+)>|&#?[a-zA-Z0-9]+;}', $html, $match, PREG_OFFSET_CAPTURE, $position))
{
list($tag, $tagPosition) = $match[0];
// Print text leading up to the tag.
$str = mb_substr($html, $position, $tagPosition - $position);
if ($printedLength + mb_strlen($str) > $maxLength)
{
//print(mb_substr($str, 0, $maxLength - $printedLength));
$printstr .= mb_substr($str, 0, $maxLength - $printedLength);
$printedLength = $maxLength;
break;
}
//print($str);
$printstr .= $str;
$printedLength += mb_strlen($str);
if ($tag[0] == '&')
{
// Handle the entity.
//print($tag);
$printstr .= $tag;
$printedLength++;
}
else
{
// Handle the tag.
$tagName = $match[1][0];
$tagName = mb_ereg_replace(' .*', '', $tagName);
if ($tag[1] == '/')
{
// This is a closing tag.
$openingTag = array_pop($tags);
if($openingTag != $tagName) die;
assert($openingTag == $tagName); // check that tags are properly nested.
//print($tag);
$printstr .= $tag;
}
else if ($tag[mb_strlen($tag) - 2] == '/')
{
// Self-closing tag.
//print($tag);
$printstr .= $tag;
}
else
{
// Opening tag.
//print($tag);
$printstr .= $tag;
$tags[] = $tagName;
}
}
// Continue after the tag.
$position = $tagPosition + mb_strlen($tag);
}
// Print any remaining text.
if ($printedLength < $maxLength && $position < mb_strlen($html))
//print(mb_substr($html, $position, $maxLength - $printedLength));
$printstr .= mb_substr($html, $position, $maxLength - $printedLength);
// Close any open tags.
while (!empty($tags)) //printf('</%s>', array_pop($tags));
$printstr .= sprintf('</%s>', array_pop($tags));
if(mb_strlen($html) > mb_strlen($printstr)){
$readmore = '<p class="nav"><a href="' . $url . '" title="続きを読む">続きを読む</a></p>';
} else {
$readmore = '';
}
return '<p>' . $printstr . '</p>' . $readmore;
}