Return to Snippet

Revision: 13603
at April 29, 2009 04:26 by iroybot


Initial Code
function linebreaker(&$s) {
        if (!preg_match('@<[^>]+\n@sm', $s))
            return str_replace("\n", "<br />\n", $s);
        $buffer = '';
        $inside = false;
        for ($i=0;$i<strlen($s);$i++) {
            $c = $s[$i];
            switch ($c) {
                case '<':
                    $inside = true;
                    $buffer .= $c;
                    break;
                case '>':
                    $inside = false;
                    $buffer .= $c;
                    break;
                case "\n":
                    $buffer .= ($inside ? "\n" : "<br />\n");
                    break;
                default:
                    $buffer .= $c;
            }
        }
        return $buffer;
    }

    function &paragrapher(&$s) {
        
        if (substr($s, 0, 4) == "\t<p>")
            return($s);    // already formatted
        
        // clean up bare &
        $s = preg_replace('@&(?![a-z0-9#]+;)@', '&amp;', $s);
        
        // code should be inside pre to be preformatted, let's leave it here for now anyway
        $block_tags = 'object|pre|p|dl|div|noscript|script|blockquote|form|table|td|th|ins|fieldset|address|h1|h2|h3|h4|h5|h6|ul|ol|li|code';
        
        $r = '@(?:
            # match paragraph mark
            ((?:\n\s*){2,})
        | # match block open tag
        (<                          # save tag
        (' . $block_tags . ')   # save tag name
        [^>]*>)
        | # match block close tag
        (</\s*(' . $block_tags . ')\s*>)
        )@smix';

        $m = array();
    
        preg_match_all($r, str_replace("\r", '', $s), $m, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);

        $matches = count($m);
    
        $s = str_replace("\r", '', $s);
    
        if ($matches == 0) {
            $buffer = "<p>" . linebreaker($s) . "</p>";
            return $buffer;
        } else {
            $p_parents = array('div'=>1, 'blockquote'=>1, 'td'=>1, 'th'=>1, 'ins'=>1, 'form'=>1, 'li'=>1);
            $buffer = '';
            $pre = false;
            $last_offset = 0;
            $tags = array();
            $tag = null;
            $tag_content = '';
            $reset_tag = null;
        
            foreach ($m as $match) {

                // grab the content from the latest match offset up to the current one
                $offset = $match[0][1];
                $slice = substr($s, $last_offset, $offset - $last_offset);
                $convert = is_null($tag) || (!$pre && isset($p_parents[$tag]));
                if ($convert) {
                    $slice = trim($slice);
                    $slice = linebreaker($slice);
                }
                $last_offset = $offset + strlen($match[0][0]);

                // now fill the buffer
                if (!empty($slice))
                    $buffer .= $convert ? "<p>$slice</p>\n" : "$slice";
            
                // set the current tag context
                switch (count($match)) {
                    case 2:
                        # paragraph mark
                        $buffer .= $pre ? "\n\n" : '';
                        break;
                    case 4:
                        # block open tag
                        $tag = strtolower($match[3][0]);
                        $tags[] = $tag;
                        if ($tag == 'pre' || $tag == 'script')
                            $pre = true;
                        $buffer .= $match[0][0] . ($pre ? '' : "\n");
                        break;
                    case 6:
                        # block close tag
                        array_pop($tags);
                        $tag = strtolower($match[5][0]);
                        if ($tag == 'pre' || $tag == 'script')
                            $pre = false;
                        $buffer .= $match[0][0] . ($pre ? '' : "\n");
                        if ($tag == 'pre')
                            $pre = false;
                        $tlen = count($tags);
                        if ($tlen > 0)
                            $tag = $tags[$tlen - 1];
                        else
                            $tag = $pre = null;
                        break;
                }

            }
        
            $tail = substr($s, $last_offset, strlen($s) - $last_offset);
            if ($pre) {
                // unlikely, but does not hurt to check
                $buffer .= $tail;
            } else {
                $tail = trim($tail);
                if (!empty($tail))
                    $buffer .= '<p>' . linebreaker($tail) . '</p>';
            }
        }
        
        return $buffer;
    }

Initial URL
http://lightpress-de.googlecode.com/svn/trunk/lightpress/classes/Frontend.php

Initial Description
Transform text (eg. from a WYSIWYG) into nicely formatted HTML paragraphs. (Credits: Ludo Magnocavallo's LightPress).

[this is a solid starting point - but a lot of HTML elements are missing]

Initial Title
PHP Paragrapher

Initial Tags
html

Initial Language
PHP