Posted By

nigelnquande on 12/16/14


Tagged

php table html DOM text


Versions (?)

HTML Table to plain text


 / Published in: PHP
 

Use PHP's DOM parser to convert a table into plain text (including links with images)

  1. function html_table_to_plain($html, $id = NULL) {
  2. $plain_message = '';
  3. $DOM = new DOMDocument();
  4. $doc = $DOM->loadHTML($html);
  5. if ($doc === FALSE) { $plain_message = 'Failed to create DOM from HTML!'; throw new DOMException('Could not Load HTML into DOMDocument'); return $plain_message; }
  6. $titles_list = $DOM->getElementsByTagName('title');
  7. if ($titles_list->length) $plain_message = '# '. $titles_list->item(0)->nodeValue ."\n\n";
  8. $tables_list = $DOM->getElementsByTagName('table');
  9. if (is_string($id) && !empty($id)) $tables_list = $DOM->getElementById($id);
  10. if (!empty($tables_list)) {
  11. foreach ($tables_list as $table) {
  12. $plain_message .= "+==========\n";
  13. $rows = $table->getElementsByTagName('tr');
  14. if (!empty($rows)) {
  15. $r = 0;
  16. $rows_arr = iterator_to_array($rows);
  17. for ($r = 0; $r < count($rows_arr); $r++) {
  18. $headers = iterator_to_array($rows_arr[$r]->getElementsByTagName('th'));
  19. $cells = iterator_to_array($rows_arr[$r]->getElementsByTagName('td'));
  20. if (count($headers) > 0) {
  21. $plain_message .= '| ';
  22. foreach ($headers as $th) {
  23. $plain_message .= '__'. $th->nodeValue .'__ | ';
  24. }
  25. }
  26. if (count ($cells) > 0) {
  27. foreach ($cells as $cell) {
  28. $links = iterator_to_array($cell->getElementsByTagName('a'));
  29. if (count($links)) {
  30. foreach ($links as $l) {
  31. $plain_message .= '[';
  32. $imgs = iterator_to_array( $l->getElementsByTagName('img'));
  33. if (count($imgs) > 0) {
  34.  
  35. foreach ($imgs as $img) {
  36. $plain_message .='!['. $img->getAttribute('alt')
  37. .']('. $img->getAttribute('src') .')';
  38. }
  39. }
  40. else { $plain_message .= $l->nodeValue; }
  41. $plain_message .= ']('. $l->getAttribute('href') .')';
  42. }
  43. }
  44. else $plain_message .= $cell->nodeValue;
  45. $plain_message .= ' | ';
  46. }
  47. }
  48.  
  49. if ($r < (count($rows_arr) -1)) $plain_message .= "\n+----------\n";
  50. }
  51. }
  52. $plain_message .= "\n+==========\n";
  53. }
  54. }
  55.  
  56. $plain_message = preg_replace('|(?mi-Us)[ ]{2,}|', ' ', $plain_message);
  57.  
  58. return $plain_message ;
  59. }

Report this snippet  

You need to login to post a comment.