Revision: 68228
Updated Code
at December 16, 2014 01:13 by nigelnquande
Updated Code
function html_table_to_plain($html, $id = NULL) {
$plain_message = '';
$DOM = new DOMDocument();
$doc = $DOM->loadHTML($html);
if ($doc === FALSE) { $plain_message = 'Failed to create DOM from HTML!'; throw new DOMException('Could not Load HTML into DOMDocument'); return $plain_message; }
$titles_list = $DOM->getElementsByTagName('title');
if ($titles_list->length) $plain_message = '# '. $titles_list->item(0)->nodeValue ."\n\n";
$tables_list = $DOM->getElementsByTagName('table');
if (is_string($id) && !empty($id)) $tables_list = $DOM->getElementById($id);
if (!empty($tables_list)) {
foreach ($tables_list as $table) {
$plain_message .= "+==========\n";
$rows = $table->getElementsByTagName('tr');
if (!empty($rows)) {
$r = 0;
$rows_arr = iterator_to_array($rows);
for ($r = 0; $r < count($rows_arr); $r++) {
$headers = iterator_to_array($rows_arr[$r]->getElementsByTagName('th'));
$cells = iterator_to_array($rows_arr[$r]->getElementsByTagName('td'));
if (count($headers) > 0) {
$plain_message .= '| ';
foreach ($headers as $th) {
$plain_message .= '__'. $th->nodeValue .'__ | ';
}
}
if (count ($cells) > 0) {
foreach ($cells as $cell) {
$links = iterator_to_array($cell->getElementsByTagName('a'));
if (count($links)) {
foreach ($links as $l) {
$plain_message .= '[';
$imgs = iterator_to_array( $l->getElementsByTagName('img'));
if (count($imgs) > 0) {
foreach ($imgs as $img) {
$plain_message .=' .')';
}
}
else { $plain_message .= $l->nodeValue; }
$plain_message .= ']('. $l->getAttribute('href') .')';
}
}
else $plain_message .= $cell->nodeValue;
$plain_message .= ' | ';
}
}
if ($r < (count($rows_arr) -1)) $plain_message .= "\n+----------\n";
}
}
$plain_message .= "\n+==========\n";
}
}
$plain_message = preg_replace('|(?mi-Us)[ ]{2,}|', ' ', $plain_message);
return $plain_message ;
}
Revision: 68227
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at December 16, 2014 01:11 by nigelnquande
Initial Code
function html_table_to_plain($html, $id = NULL) {
$plain_message = '';
$DOM = new DOMDocument();
$doc = $DOM->loadHTML($html);
if ($doc === FALSE) { $plain_message = 'Failed to create DOM from HTML!'; throw new DOMException('Could not Load HTML into DOMDocument'); return $plain_message; }
$titles_list = $DOM->getElementsByTagName('title');
if ($titles_list->length) $plain_message = '# '. $titles_list->item(0)->nodeValue ."\n\n";
$tables_list = $DOM->getElementsByTagName('table');
if (is_string($id) && !empty($id)) $tables_list = $DOM->getElementById($id);
if (!empty($tables_list)) {
foreach ($tables_list as $table) {
$plain_message .= "+==========\n";
$rows = $table->getElementsByTagName('tr');
if (!empty($rows)) {
$r = 0;
$rows_arr = iterator_to_array($rows);
for ($r = 0; $r < count($rows_arr); $r++) {
$headers = iterator_to_array($rows_arr[$r]->getElementsByTagName('th'));
$cells = iterator_to_array($rows_arr[$r]->getElementsByTagName('td'));
if (count($headers) > 0) {
$plain_message .= '| ';
foreach ($headers as $th) {
$plain_message .= '__'. $th->nodeValue .'__ | ';
}
}
if (count ($cells) > 0) {
foreach ($cells as $cell) {
$links = iterator_to_array($cell->getElementsByTagName('a'));
if (count($links)) {
foreach ($links as $l) {
$plain_message .= '[';
$imgs = iterator_to_array( $l->getElementsByTagName('img'));
if (count($imgs) > 0) {
foreach ($imgs as $img) {
$plain_message .=' .')';
}
}
else { $plain_message .= $l->nodeValue; }
$plain_message .= ']('. $l->getAttribute('href') .')';
}
}
else $plain_message .= $cell->nodeValue;
$plain_message .= ' | ';
}
}
if ($r < (count($rows_arr) -1)) $plain_message .= "\n+----------\n";
}
}
$plain_message .= "\n+==========\n";
}
}
$plain_message = preg_replace('|(?mi-Us)[ ]{2,}|', ' ', $plain_message);
return $plain_message ;
}
Initial URL
Initial Description
Use PHP's DOM parser to convert a table into plain text (including links with images)
Initial Title
HTML Table to plain text
Initial Tags
php, table, html, DOM, text
Initial Language
PHP