Cut HTML string


/ Published in: PHP
Save to your folder(s)

In PHP, it is easy to extract an excerpt of a text string with a given length limit. But if you want to extract an excerpt from HTML, the tags that may exist in the text string make it more complicated.

This class provides a solution to extract excerpts from HTML documents with a given text length limit without counting the length of any HTML tags.


Copy this code and paste it in your HTML
  1. <?php
  2.  
  3. /*
  4. In PHP, it is easy to extract an excerpt of a text string with a given length limit. But if you want to extract an excerpt from HTML, the tags that may exist in the text string make it more complicated.
  5.  
  6. This class provides a solution to extract excerpts from HTML documents with a given text length limit without counting the length of any HTML tags.
  7.  
  8. */
  9. // Author prajwala
  10. // email  [email protected]
  11. // Date   12/04/2009
  12. // version 1.0
  13.  
  14. class HtmlCutString{
  15.   function __construct($string, $limit){
  16.     // create dom element using the html string
  17.     $this->tempDiv = new DomDocument;
  18.     $this->tempDiv->loadXML('<div>'.$string.'</div>');
  19.     // keep the characters count till now
  20.     $this->charCount = 0;
  21.     $this->encoding = 'UTF-8';
  22.     // character limit need to check
  23.     $this->limit = $limit;
  24.   }
  25.   function cut(){
  26.     // create empty document to store new html
  27.     $this->newDiv = new DomDocument;
  28.     // cut the string by parsing through each element
  29.     $this->searchEnd($this->tempDiv->documentElement,$this->newDiv);
  30.     $newhtml = $this->newDiv->saveHTML();
  31.     return $newhtml;
  32.   }
  33.  
  34.   function deleteChildren($node) {
  35.     while (isset($node->firstChild)) {
  36.       $this->deleteChildren($node->firstChild);
  37.       $node->removeChild($node->firstChild);
  38.     }
  39.   } 
  40.   function searchEnd($parseDiv, $newParent){
  41.     foreach($parseDiv->childNodes as $ele){
  42.     // not text node
  43.     if($ele->nodeType != 3){
  44.       $newEle = $this->newDiv->importNode($ele,true);
  45.       if(count($ele->childNodes) === 0){
  46.         $newParent->appendChild($newEle);
  47.         continue;
  48.       }
  49.       $this->deleteChildren($newEle);
  50.       $newParent->appendChild($newEle);
  51.         $res = $this->searchEnd($ele,$newEle);
  52.         if($res)
  53.         return $res;
  54.         else{
  55.         continue;
  56.         }
  57.     }
  58.  
  59.     // the limit of the char count reached
  60.     if(mb_strlen($ele->nodeValue,$this->encoding)   $this->charCount >= $this->limit){
  61.       $newEle = $this->newDiv->importNode($ele);
  62.         $newEle->nodeValue = substr($newEle->nodeValue,0, $this->limit - $this->charCount);
  63.         $newParent->appendChild($newEle);
  64.         return true;
  65.     }
  66.     $newEle = $this->newDiv->importNode($ele);
  67.     $newParent->appendChild($newEle);
  68.     $this->charCount  = mb_strlen($newEle->nodeValue,$this->encoding);
  69.     }
  70.     return false;
  71.   }
  72. }
  73.  
  74. function cut_html_string($string, $limit){
  75.   $output = new HtmlCutString($string, $limit);
  76.   return $output->cut();
  77. }
  78.  
  79. ?>

URL: http://www.phpclasses.org/browse/file/26823.html

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.