Return to Snippet

Revision: 68226
at December 15, 2014 23:07 by nigelnquande


Updated Code
function html_to_plain($html) {
    $plain_message = str_replace(array('<br />', '<br>', '<p>', '</p>', '</title>'), "\n", $html);
		  $plain_message = str_replace(array("<table>", "</tr></table>"), "\n============", $plain_message);
		  $plain_message = str_replace("<tr>", "| ", $plain_message);
		  $plain_message = str_replace("</tr>", "\n-------------", $plain_message);
		  $plain_message = str_replace(array("<title>", '<h1>'),  "# ", $plain_message);
		  $plain_message = str_replace(array('<th>'), "__", $plain_message);
		  $plain_message = str_replace(array('</th>'), "__ | ", $plain_message);
		  $plain_message = str_replace(array('</td>'), " | ", $plain_message);
		  $plain_message = str_replace(array('<strong>', '</strong>'), '__', $plain_message);
		  $plain_message = str_replace(array('<em>', '</em>'), '_', $plain_message);
		  $plain_message = str_replace(array('<a'), '[', $plain_message);
		  $plain_message = str_replace(array('href="'), '](', $plain_message);
		  $plain_message = str_replace(array('<img src="'), '[', $plain_message);
		  $plain_message = str_replace(array('alt='), '', $plain_message);
		  $plain_message = str_replace(array('/>'), ']', $plain_message);
		  $plain_message = strip_tags($plain_message);
		  //$plain_message = str_replace("  ", ' ', $plain_message);
		  $plain_message = preg_replace('|(?mi-Us)[  ]{2,}|', ' ', $plain_message);
        
        return $plain_message ;
}

Revision: 68225
at December 15, 2014 20:06 by nigelnquande


Updated Code
function html_to_plain($html) {
    $plain_message = str_replace(array('<br />', '<br>', '<p>', '</p>', '</title>'), "\n", $html);
        $plain_message = str_replace(array("<table>", "</tr></table>", "</tr></tbody></table>"), "\n============", $plain_message);
	$plain_message = str_replace("<tr>", "| ", $plain_message);
	$plain_message = str_replace("</tr>", "\n-------------", $plain_message);
	$plain_message = str_replace(array("<title>", '<h1>'),  "# ", $plain_message);
	$plain_message = str_replace(array('</td>'), " | ", $plain_message);
	$plain_message = str_replace(array('<strong>', '</strong>'), '__', $plain_message);
	$plain_message = str_replace(array('<em>', '</em>'), '_', $plain_message);
	$plain_message = str_replace(array('<a href="', '<img src="'), '[', $plain_message);
	$plain_message = str_replace(array('alt='), '', $plain_message);
	$plain_message = str_replace(array('/>'), ']', $plain_message);
	$plain_message = strip_tags($plain_message);
	$plain_message = preg_replace('|(?mi-Us)[  ]{2,}|', ' ', $plain_message);
        
        return $plain_message ;
}

Revision: 68224
at December 15, 2014 19:59 by nigelnquande


Updated Code
function html_to_plain($html) {
    $plain_message = str_replace(array('<br />', '<br>', '<p>', '</p>', '</title>'), "\n", $html);
        $plain_message = str_replace(array("<table>", "</tr></table>", "</tr></tbody></table>"), "\n============", $plain_message);
	$plain_message = str_replace("<tr>", "| ", $plain_message);
	$plain_message = str_replace("</tr>", "\n-------------", $plain_message);
	$plain_message = str_replace(array("<title>", '<h1>'),  "# ", $plain_message);
	$plain_message = str_replace(array('</td>'), " | ", $plain_message);
	$plain_message = str_replace(array('<strong>', '</strong>'), '__', $plain_message);
	$plain_message = str_replace(array('<em>', '</em>'), '_', $plain_message);
	$plain_message = str_replace(array('<a href="', '<img src="'), '[', $plain_message);
	$plain_message = str_replace(array('alt='), '', $plain_message);
	$plain_message = str_replace(array('/>'), ']', $plain_message);
	$plain_message = strip_tags($plain_message);
	$plain_message = str_replace("  ", ' ', $plain_message);
        
        return $plain_message ;
}

Revision: 68223
at December 15, 2014 19:58 by nigelnquande


Updated Code
function html_to_plain($html) {
    $plain_message = str_replace(array('<br />', '<br>', '<p>', '</p>', '</title>'), "\n", $html);
        $plain_message = str_replace(array("<table>", "</tr></table>"), "\n============", $plain_message);
	$plain_message = str_replace("<tr>", "| ", $plain_message);
	$plain_message = str_replace("</tr>", "\n-------------", $plain_message);
	$plain_message = str_replace(array("<title>", '<h1>'),  "# ", $plain_message);
	$plain_message = str_replace(array('</td>'), " | ", $plain_message);
	$plain_message = str_replace(array('<strong>', '</strong>'), '__', $plain_message);
	$plain_message = str_replace(array('<em>', '</em>'), '_', $plain_message);
	$plain_message = str_replace(array('<a href="', '<img src="'), '[', $plain_message);
	$plain_message = str_replace(array('alt='), '', $plain_message);
	$plain_message = str_replace(array('/>'), ']', $plain_message);
	$plain_message = strip_tags($plain_message);
	$plain_message = str_replace("  ", ' ', $plain_message);
        
        return $plain_message ;
}

Revision: 68222
at December 15, 2014 19:55 by nigelnquande


Initial Code
function html_to_plain($html) {
		$plain_message = str_replace(array('<br />', '<br>', '<p>', '</p>', '</title>'), "\n", $html);
		  $plain_message = str_replace(array("<table>", "</tr></table>"), "\n============", $plain_message);
		  $plain_message = str_replace("<tr>", "| ", $plain_message);
		  $plain_message = str_replace("</tr>", "\n-------------", $plain_message);
		  $plain_message = str_replace(array("<title>", '<h1>'),  "# ", $plain_message);
		  $plain_message = str_replace(array('</td>'), " | ", $plain_message);
		  $plain_message = str_replace(array('<strong>', '</strong>'), '__', $plain_message);
		  $plain_message = str_replace(array('<em>', '</em>'), '_', $plain_message);
		  $plain_message = str_replace(array('<a href="', '<img src="'), '[', $plain_message);
		  $plain_message = str_replace(array('alt='), '', $plain_message);
		  $plain_message = str_replace(array('/>'), ']', $plain_message);
		  $plain_message = strip_tags($plain_message);
		  $plain_message = str_replace("  ", ' ', $plain_message);
        
        return $plain_message ;
    }

Initial URL

                                

Initial Description
This function takes HTML input and converts it to plain text. It needs improvement so that it converts multiple blank lines to a single blank line and converts an &lt;a ... &gt; link to the markup equivalent (same for images). It should be rewritten to using a DOM/XML parser.

Initial Title
HTML to Plain Text

Initial Tags
html, text, convert

Initial Language
PHP