Changeset 1179
- Timestamp:
- 2008년 03월 14일 11시 42분 13초 (2 years ago)
- Files:
-
- 1 modified
-
trunk/plugins/Markdown.php (modified) (65 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/plugins/Markdown.php
r965 r1179 1 1 <?php 2 2 # 3 # Markdown - A text-to-HTML conversion tool for web writers3 # Markdown Extra - A text-to-HTML conversion tool for web writers 4 4 # 5 # PHP Markdown 6 # Copyright (c) 2004-200 6Michel Fortin5 # PHP Markdown & Extra 6 # Copyright (c) 2004-2007 Michel Fortin 7 7 # <http://www.michelf.com/projects/php-markdown/> 8 8 # … … 13 13 14 14 15 define( 'MARKDOWN_VERSION', "1.0.1e" ); # Thu 28 Dec 2006 15 define( 'MARKDOWN_VERSION', "1.0.1k" ); # Wed 26 Sep 2007 16 define( 'MARKDOWNEXTRA_VERSION', "1.1.7" ); # Wed 26 Sep 2007 16 17 17 18 … … 21 22 22 23 # Change to ">" for HTML output 23 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 24 25 25 26 # Define the width of a tab for code blocks. 26 define( 'MARKDOWN_TAB_WIDTH', 4 ); 27 @define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 29 # Optional title attribute for footnote links and backlinks. 30 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 33 # Optional class attribute for footnote links and backlinks. 34 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 37 38 # 39 # WordPress settings: 40 # 41 42 # Change to false to remove Markdown from posts and/or comments. 43 @define( 'MARKDOWN_WP_POSTS', true ); 44 @define( 'MARKDOWN_WP_COMMENTS', true ); 45 46 27 47 28 48 ### Standard Function Interface ### 29 49 30 define( 'MARKDOWN_PARSER_CLASS', 'Markdown_Parser' );50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 31 51 32 52 function Markdown($text) { … … 46 66 47 67 68 ### WordPress Plugin Interface ### 69 70 /* 71 Plugin Name: Markdown Extra 72 Plugin URI: http://www.michelf.com/projects/php-markdown/ 73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a> 74 Version: 1.1.7 75 Author: Michel Fortin 76 Author URI: http://www.michelf.com/ 77 */ 78 79 if (isset($wp_version)) { 80 # More details about how it works here: 81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> 82 83 # Post content and excerpts 84 # - Remove WordPress paragraph generator. 85 # - Run Markdown on excerpt, then remove all tags. 86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 87 if (MARKDOWN_WP_POSTS) { 88 remove_filter('the_content', 'wpautop'); 89 remove_filter('the_content_rss', 'wpautop'); 90 remove_filter('the_excerpt', 'wpautop'); 91 add_filter('the_content', 'Markdown', 6); 92 add_filter('the_content_rss', 'Markdown', 6); 93 add_filter('get_the_excerpt', 'Markdown', 6); 94 add_filter('get_the_excerpt', 'trim', 7); 95 add_filter('the_excerpt', 'mdwp_add_p'); 96 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 97 98 remove_filter('content_save_pre', 'balanceTags', 50); 99 remove_filter('excerpt_save_pre', 'balanceTags', 50); 100 add_filter('the_content', 'balanceTags', 50); 101 add_filter('get_the_excerpt', 'balanceTags', 9); 102 } 103 104 # Comments 105 # - Remove WordPress paragraph generator. 106 # - Remove WordPress auto-link generator. 107 # - Scramble important tags before passing them to the kses filter. 108 # - Run Markdown on excerpt then remove paragraph tags. 109 if (MARKDOWN_WP_COMMENTS) { 110 remove_filter('comment_text', 'wpautop', 30); 111 remove_filter('comment_text', 'make_clickable'); 112 add_filter('pre_comment_content', 'Markdown', 6); 113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 114 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 115 add_filter('get_comment_text', 'Markdown', 6); 116 add_filter('get_comment_excerpt', 'Markdown', 6); 117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 118 119 global $mdwp_hidden_tags, $mdwp_placeholders; 120 $mdwp_hidden_tags = explode(' ', 121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 122 $mdwp_placeholders = explode(' ', str_rot13( 123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 125 } 126 127 function mdwp_add_p($text) { 128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 129 $text = '<p>'.$text.'</p>'; 130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 131 } 132 return $text; 133 } 134 135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 136 137 function mdwp_hide_tags($text) { 138 global $mdwp_hidden_tags, $mdwp_placeholders; 139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 140 } 141 function mdwp_show_tags($text) { 142 global $mdwp_hidden_tags, $mdwp_placeholders; 143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 144 } 145 } 146 147 148 ### bBlog Plugin Info ### 149 150 function identify_modifier_markdown() { 151 return array( 152 'name' => 'markdown', 153 'type' => 'modifier', 154 'nicename' => 'PHP Markdown Extra', 155 'description' => 'A text-to-HTML conversion tool for web writers', 156 'authors' => 'Michel Fortin and John Gruber', 157 'licence' => 'GPL', 158 'version' => MARKDOWNEXTRA_VERSION, 159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>', 160 ); 161 } 162 163 164 ### Smarty Modifier Interface ### 165 166 function smarty_modifier_markdown($text) { 167 return Markdown($text); 168 } 169 170 171 ### Textile Compatibility Mode ### 172 173 # Rename this file to "classTextile.php" and it can replace Textile everywhere. 174 175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 176 # Try to include PHP SmartyPants. Should be in the same directory. 177 @include_once 'smartypants.php'; 178 # Fake Textile class. It calls Markdown instead. 179 class Textile { 180 function TextileThis($text, $lite='', $encode='') { 181 if ($lite == '' && $encode == '') $text = Markdown($text); 182 if (function_exists('SmartyPants')) $text = SmartyPants($text); 183 return $text; 184 } 185 # Fake restricted version: restrictions are not supported for now. 186 function TextileRestricted($text, $lite='', $noimage='') { 187 return $this->TextileThis($text, $lite); 188 } 189 # Workaround to ensure compatibility with TextPattern 4.0.3. 190 function blockLite($text) { return $text; } 191 } 192 } 193 194 195 48 196 # 49 197 # Markdown Parser Class … … 56 204 var $nested_brackets_depth = 6; 57 205 var $nested_brackets; 206 207 var $nested_url_parenthesis_depth = 4; 208 var $nested_url_parenthesis; 58 209 59 210 # Table of hash values for escaped characters: 60 211 var $escape_chars = '\`*_{}[]()>#+-.!'; 61 var $escape_table = array();62 var $backslash_escape_table = array();63 212 64 213 # Change to ">" for HTML output. 65 214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 66 215 var $tab_width = MARKDOWN_TAB_WIDTH; 216 217 # Change to `true` to disallow markup or entities. 218 var $no_markup = false; 219 var $no_entities = false; 67 220 68 221 … … 76 229 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 77 230 str_repeat('\])*', $this->nested_brackets_depth); 78 79 # Create an identical table but for escaped characters. 80 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) { 81 $hash = md5($char); 82 $this->escape_table[$char] = $hash; 83 $this->backslash_escape_table["\\$char"] = $hash; 84 } 231 232 $this->nested_url_parenthesis = 233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 85 235 86 236 # Sort document, block, and span gamut in ascendent priority order. … … 94 244 var $urls = array(); 95 245 var $titles = array(); 96 var $html_blocks = array(); 97 var $html_hashes = array(); # Contains both blocks and span hashes. 246 var $html_hashes = array(); 247 248 # Status flag to avoid invalid nesting. 249 var $in_anchor = false; 98 250 99 251 … … 111 263 $this->urls = array(); 112 264 $this->titles = array(); 113 $this->html_blocks = array();114 265 $this->html_hashes = array(); 115 266 116 267 # Standardize line endings: 117 268 # DOS to Unix and Mac to Unix 118 $text = str_replace(array("\r\n", "\r"), "\n", $text);269 $text = preg_replace('{\r\n?}', "\n", $text); 119 270 120 271 # Make sure $text ends with a couple of newlines: … … 130 281 # This makes subsequent regexen easier to write, because we can 131 282 # match consecutive blank lines with /\n+/ instead of something 132 # contorted like /[ \t]*\n+/ .133 $text = preg_replace('/^[ \t]+$/m', '', $text);283 # contorted like /[ ]*\n+/ . 284 $text = preg_replace('/^[ ]+$/m', '', $text); 134 285 135 286 # Run document gamut methods. … … 146 297 147 298 "runBasicBlockGamut" => 30, 148 "unescapeSpecialChars" => 90,149 299 ); 150 300 … … 160 310 $text = preg_replace_callback('{ 161 311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 162 [ \t]*312 [ ]* 163 313 \n? # maybe *one* newline 164 [ \t]*314 [ ]* 165 315 <?(\S+?)>? # url = $2 166 [ \t]*316 [ ]* 167 317 \n? # maybe one newline 168 [ \t]*318 [ ]* 169 319 (?: 170 320 (?<=\s) # lookbehind for whitespace … … 172 322 (.*?) # title = $3 173 323 [")] 174 [ \t]*324 [ ]* 175 325 )? # title is optional 176 326 (?:\n+|\Z) … … 190 340 191 341 function hashHTMLBlocks($text) { 342 if ($this->no_markup) return $text; 343 192 344 $less_than_tab = $this->tab_width - 1; 193 345 … … 198 350 # phrase emphasis, and spans. The list of tags we're looking for is 199 351 # hard-coded: 200 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 201 'script|noscript|form|fieldset|iframe|math|ins|del'; 352 # 353 # * List "a" is made of tags which can be both inline or block-level. 354 # These will be treated block-level when the start tag is alone on 355 # its line, otherwise they're not matched here and will be taken as 356 # inline later. 357 # * List "b" is made of tags which are always block-level; 358 # 359 $block_tags_a = 'ins|del'; 202 360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 203 361 'script|noscript|form|fieldset|iframe|math'; … … 226 384 <\2 # nested opening tag 227 385 '.$attr.' # attributes 228 (? :386 (?> 229 387 /> 230 388 | … … 239 397 )*', 240 398 $nested_tags_level); 399 $content2 = str_replace('\2', '\3', $content); 241 400 242 401 # First, look for nested blocks, e.g.: … … 251 410 # We need to do this before the next, more liberal match, because the next 252 411 # match will start at the first `<div>` and stop at the first `</div>`. 253 $text = preg_replace_callback('{ 254 ( # save in $1 255 ^ # start of line (with /m) 256 <('.$block_tags_a.')# start tag = $2 257 '.$attr.'>\n # attributes followed by > and \n 412 $text = preg_replace_callback('{(?> 413 (?> 414 (?<=\n\n) # Starting after a blank line 415 | # or 416 \A\n? # the beginning of the doc 417 ) 418 ( # save in $1 419 420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 421 # in between. 422 423 [ ]{0,'.$less_than_tab.'} 424 <('.$block_tags_b.')# start tag = $2 425 '.$attr.'> # attributes followed by > and \n 258 426 '.$content.' # content, support nesting 259 427 </\2> # the matching end tag 260 [ \t]* # trailing spaces/tabs428 [ ]* # trailing spaces/tabs 261 429 (?=\n+|\Z) # followed by a newline or end of document 262 ) 263 }xm', 264 array(&$this, '_hashHTMLBlocks_callback'), 265 $text); 266 267 # 268 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between. 269 # 270 $text = preg_replace_callback('{ 271 ( # save in $1 272 ^ # start of line (with /m) 273 <('.$block_tags_b.')# start tag = $2 274 '.$attr.'> # attributes followed by > 275 '.$content.' # content, support nesting 276 </\2> # the matching end tag 277 [ \t]* # trailing spaces/tabs 430 431 | # Special version for tags of group a. 432 433 [ ]{0,'.$less_than_tab.'} 434 <('.$block_tags_a.')# start tag = $3 435 '.$attr.'>[ ]*\n # attributes followed by > 436 '.$content2.' # content, support nesting 437 </\3> # the matching end tag 438 [ ]* # trailing spaces/tabs 278 439 (?=\n+|\Z) # followed by a newline or end of document 279 ) 280 }xm', 281 array(&$this, '_hashHTMLBlocks_callback'), 282 $text); 283 284 # Special case just for <hr />. It was easier to make a special case than 285 # to make the other regex more complicated. 286 $text = preg_replace_callback('{ 287 (?: 288 (?<=\n\n) # Starting after a blank line 289 | # or 290 \A\n? # the beginning of the doc 291 ) 292 ( # save in $1 440 441 | # Special case just for <hr />. It was easier to make a special 442 # case than to make the other regex more complicated. 443 293 444 [ ]{0,'.$less_than_tab.'} 294 445 <(hr) # start tag = $2 … … 296 447 ([^<>])*? # 297 448 /?> # the matching end tag 298 [ \t]*449 [ ]* 299 450 (?=\n{2,}|\Z) # followed by a blank line or end of document 300 ) 301 }x', 302 array(&$this, '_hashHTMLBlocks_callback'), 303 $text); 304 305 # Special case for standalone HTML comments: 306 $text = preg_replace_callback('{ 307 (?: 308 (?<=\n\n) # Starting after a blank line 309 | # or 310 \A\n? # the beginning of the doc 311 ) 312 ( # save in $1 451 452 | # Special case for standalone HTML comments: 453 313 454 [ ]{0,'.$less_than_tab.'} 314 455 (?s: 315 456 <!-- .*? --> 316 457 ) 317 [ \t]*458 [ ]* 318 459 (?=\n{2,}|\Z) # followed by a blank line or end of document 319 ) 320 }x', 321 array(&$this, '_hashHTMLBlocks_callback'), 322 $text); 323 324 # PHP and ASP-style processor instructions (<? and <%...%>) 325 $text = preg_replace_callback('{ 326 (?: 327 (?<=\n\n) # Starting after a blank line 328 | # or 329 \A\n? # the beginning of the doc 330 ) 331 ( # save in $1 460 461 | # PHP and ASP-style processor instructions (<? and <%) 462 332 463 [ ]{0,'.$less_than_tab.'} 333 464 (?s: … … 336 467 \2> 337 468 ) 338 [ \t]*469 [ ]* 339 470 (?=\n{2,}|\Z) # followed by a blank line or end of document 340 ) 341 }x', 471 472 ) 473 )}Sxmi', 342 474 array(&$this, '_hashHTMLBlocks_callback'), 343 475 $text); … … 350 482 return "\n\n$key\n\n"; 351 483 } 352 353 354 function hashBlock($text) { 355 # 356 # Called whenever a tag must be hashed when a function insert a block-level 357 # tag in $text, it pass through this function and is automaticaly escaped, 358 # which remove the need to call _HashHTMLBlocks at every step. 484 485 486 function hashPart($text, $boundary = 'X') { 487 # 488 # Called whenever a tag must be hashed when a function insert an atomic 489 # element in the text stream. Passing $text to through this function gives 490 # a unique text-token which will be reverted back when calling unhash. 491 # 492 # The $boundary argument specify what character should be used to surround 493 # the token. By convension, "B" is used for block elements that needs not 494 # to be wrapped into paragraph tags at the end, ":" is used for elements 495 # that are word separators and "S" is used for general span-level elements. 359 496 # 360 497 # Swap back any tag hash found in $text so we do not have to `unhash` … … 363 500 364 501 # Then hash the block. 365 $key = md5($text); 502 static $i = 0; 503 $key = "$boundary\x1A" . ++$i . $boundary; 366 504 $this->html_hashes[$key] = $text; 367 $this->html_blocks[$key] = $text;368 505 return $key; # String that will replace the tag. 369 506 } 370 507 371 508 372 function hashSpan($text) { 373 # 374 # Called whenever a tag must be hashed when a function insert a span-level 375 # element in $text, it pass through this function and is automaticaly 376 # escaped, blocking invalid nested overlap. 377 # 378 # Swap back any tag hash found in $text so we do not have to `unhash` 379 # multiple times at the end. 380 $text = $this->unhash($text); 381 382 # Then hash the span. 383 $key = md5($text); 384 $this->html_hashes[$key] = $text; 385 return $key; # String that will replace the span tag. 509 function hashBlock($text) { 510 # 511 # Shortcut function for hashPart with block-level boundaries. 512 # 513 return $this->hashPart($text, 'B'); 386 514 } 387 515 … … 434 562 # Do Horizontal Rules: 435 563 return preg_replace( 436 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx', 437 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx', 438 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'), 564 '{ 565 ^[ ]{0,3} # Leading space 566 ([-*_]) # $1: First marker 567 (?> # Repeated marker group 568 [ ]{0,2} # Zero, one, or two spaces. 569 \1 # Marker character 570 ){2,} # Group repeated at least twice 571 [ ]* # Tailing spaces 572 $ # End of line. 573 }mx', 439 574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 440 575 $text); … … 447 582 # tags like paragraphs, headers, and list items. 448 583 # 449 "escapeSpecialCharsWithinTagAttributes" => -20,450 "doCodeSpans" => -10,451 " encodeBackslashEscapes" => -5,584 # Process character escapes, code spans, and inline HTML 585 # in one shot. 586 "parseSpan" => -30, 452 587 453 588 # Process anchor and image tags. Images must come first, … … 480 615 function doHardBreaks($text) { 481 616 # Do hard breaks: 482 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n"); 483 return preg_replace('/ {2,}\n/', $br_tag, $text); 484 } 485 486 487 function escapeSpecialCharsWithinTagAttributes($text) { 488 # 489 # Within tags -- meaning between < and > -- encode [\ ` * _] so they 490 # don't conflict with their use in Markdown for code, italics and strong. 491 # We're replacing each such character with its corresponding MD5 checksum 492 # value; this is likely overkill, but it should prevent us from colliding 493 # with the escape values by accident. 494 # 495 $tokens = $this->tokenizeHTML($text); 496 $text = ''; # rebuild $text from the tokens 497 498 foreach ($tokens as $cur_token) { 499 if ($cur_token[0] == 'tag') { 500 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]); 501 $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]); 502 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]); 503 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]); 504 } 505 $text .= $cur_token[1]; 506 } 507 return $text; 617 return preg_replace_callback('/ {2,}\n/', 618 array(&$this, '_doHardBreaks_callback'), $text); 619 } 620 function _doHardBreaks_callback($matches) { 621 return $this->hashPart("<br$this->empty_element_suffix\n"); 508 622 } 509 623 … … 513 627 # Turn Markdown link shortcuts into XHTML <a> tags. 514 628 # 629 if ($this->in_anchor) return $text; 630 $this->in_anchor = true; 631 515 632 # 516 633 # First, handle reference-style links: [link text] [id] … … 541 658 \] 542 659 \( # literal paren 543 [ \t]* 544 <?(.*?)>? # href = $3 545 [ \t]* 546 ( # $4 547 ([\'"]) # quote char = $5 548 (.*?) # Title = $6 549 \5 # matching quote 550 [ \t]* # ignore any spaces/tabs between closing quote and ) 660 [ ]* 661 (?: 662 <(\S*)> # href = $3 663 | 664 ('.$this->nested_url_parenthesis.') # href = $4 665 ) 666 [ ]* 667 ( # $5 668 ([\'"]) # quote char = $6 669 (.*?) # Title = $7 670 \6 # matching quote 671 [ ]* # ignore any spaces/tabs between closing quote and ) 551 672 )? # title is optional 552 673 \) … … 569 690 // array(&$this, '_doAnchors_reference_callback'), $text); 570 691 692 $this->in_anchor = false; 571 693 return $text; 572 694 } … … 598 720 $link_text = $this->runSpanGamut($link_text); 599 721 $result .= ">$link_text</a>"; 600 $result = $this->hash Span($result);722 $result = $this->hashPart($result); 601 723 } 602 724 else { … … 608 730 $whole_match = $matches[1]; 609 731 $link_text = $this->runSpanGamut($matches[2]); 610 $url = $matches[3] ;611 $title =& $matches[ 6];612 732 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 733 $title =& $matches[7]; 734 613 735 $url = $this->encodeAmpsAndAngles($url); 614 736 … … 623 745 $result .= ">$link_text</a>"; 624 746 625 return $this->hash Span($result);747 return $this->hashPart($result); 626 748 } 627 749 … … 662 784 \s? # One optional whitespace character 663 785 \( # literal paren 664 [ \t]* 665 <?(\S+?)>? # src url = $3 666 [ \t]* 667 ( # $4 668 ([\'"]) # quote char = $5 669 (.*?) # title = $6 670 \5 # matching quote 671 [ \t]* 786 [ ]* 787 (?: 788 <(\S*)> # src url = $3 789 | 790 ('.$this->nested_url_parenthesis.') # src url = $4 791 ) 792 [ ]* 793 ( # $5 794 ([\'"]) # quote char = $6 795 (.*?) # title = $7 796 \6 # matching quote 797 [ ]* 672 798 )? # title is optional 673 799 \) … … 696 822 } 697 823 $result .= $this->empty_element_suffix; 698 $result = $this->hash Span($result);824 $result = $this->hashPart($result); 699 825 } 700 826 else { … … 708 834 $whole_match = $matches[1]; 709 835 $alt_text = $matches[2]; 710 $url = $matches[3] ;711 $title =& $matches[ 6];836 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 837 $title =& $matches[7]; 712 838 713 839 $alt_text = str_replace('"', '"', $alt_text); … … 719 845 $result .= $this->empty_element_suffix; 720 846 721 return $this->hash Span($result);847 return $this->hashPart($result); 722 848 } 723 849 … … 731 857 # -------- 732 858 # 733 $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx', 734 array(&$this, '_doHeaders_callback_setext_h1'), $text); 735 $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx', 736 array(&$this, '_doHeaders_callback_setext_h2'), $text); 859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 860 array(&$this, '_doHeaders_callback_setext'), $text); 737 861 738 862 # atx-style headers: … … 745 869 $text = preg_replace_callback('{ 746 870 ^(\#{1,6}) # $1 = string of #\'s 747 [ \t]*871 [ ]* 748 872 (.+?) # $2 = Header text 749 [ \t]*873 [ ]* 750 874 \#* # optional closing #\'s (not counted) 751 875 \n+ … … 755 879 return $text; 756 880 } 757 function _doHeaders_callback_setext_h1($matches) { 758 return $this->hashBlock("<h1>".$this->runSpanGamut($matches[1])."</h1>")."\n\n"; 759 } 760 function _doHeaders_callback_setext_h2($matches) { 761 return $this->hashBlock("<h2>".$this->runSpanGamut($matches[1])."</h2>")."\n\n"; 881 function _doHeaders_callback_setext($matches) { 882 $level = $matches[2]{0} == '=' ? 1 : 2; 883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 884 return "\n" . $this->hashBlock($block) . "\n\n"; 762 885 } 763 886 function _doHeaders_callback_atx($matches) { 764 887 $level = strlen($matches[1]); 765 return $this->hashBlock("<h$level>".$this->runSpanGamut($matches[2])."</h$level>")."\n\n"; 888 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 889 return "\n" . $this->hashBlock($block) . "\n\n"; 766 890 } 767 891 … … 787 911 [ ]{0,'.$less_than_tab.'} 788 912 ('.$marker.') # $3 = first list item marker 789 [ \t]+913 [ ]+ 790 914 ) 791 915 (?s:.+?) … … 796 920 (?=\S) 797 921 (?! # Negative lookahead for another list item marker 798 [ \t]*799 '.$marker.'[ \t]+922 [ ]* 923 '.$marker.'[ ]+ 800 924 ) 801 925 ) … … 835 959 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol ); 836 960 837 # Turn double returns into triple returns, so that we can make a 838 # paragraph for the last item in a list, if necessary: 839 $list = preg_replace("/\n{2,}/", "\n\n\n", $list); 961 $list .= "\n"; 840 962 $result = $this->processListItems($list, $marker_any); 841 963 … … 879 1001 $list_str = preg_replace_callback('{ 880 1002 (\n)? # leading line = $1 881 (^[ \t]*) # leading whitespace = $2882 ('.$marker_any.') [ \t]+ # list marker = $3883 ((?s:.+?) # list item text = $4884 ( \n{1,2}))885 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))1003 (^[ ]*) # leading whitespace = $2 1004 ('.$marker_any.') [ ]+ # list marker = $3 1005 ((?s:.+?)) # list item text = $4 1006 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1007 (?= \n* (\z | \2 ('.$marker_any.') [ ]+)) 886 1008 }xm', 887 1009 array(&$this, '_processListItems_callback'), $list_str); … … 894 1016 $leading_line =& $matches[1]; 895 1017 $leading_space =& $matches[2]; 896 897 if ($leading_line || preg_match('/\n{2,}/', $item)) { 898 $item = $this->runBlockGamut($this->outdent($item)); 1018 $tailing_blank_line =& $matches[5]; 1019 1020 if ($leading_line || $tailing_blank_line || 1021 preg_match('/\n{2,}/', $item)) 1022 { 1023 $item = $this->runBlockGamut($this->outdent($item)."\n"); 899 1024 } 900 1025 else { … … 916 1041 (?:\n\n|\A) 917 1042 ( # $1 = the code block -- one or more lines, starting with a space/tab 918 (? :919 (?:[ ]{'.$this->tab_width.'} | \t)# Lines must start with a tab or a tab-width of spaces1043 (?> 1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 920 1045 .*\n+ 921 1046 )+ … … 930 1055 $codeblock = $matches[1]; 931 1056 932 $codeblock = $this->encodeCode($this->outdent($codeblock)); 933 // $codeblock = $this->detab($codeblock); 934 # trim leading newlines and trailing whitespace 935 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock); 936 937 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n"; 938 939 return $result; 940 } 941 942 943 function doCodeSpans($text) { 944 # 945 # * Backtick quotes are used for <code></code> spans. 946 # 947 # * You can use multiple backticks as the delimiters if you want to 948 # include literal backticks in the code span. So, this input: 949 # 950 # Just type ``foo `bar` baz`` at the prompt. 951 # 952 # Will translate to: 953 # 954 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> 955 # 956 # There's no arbitrary limit to the number of backticks you 957 # can use as delimters. If you need three consecutive backticks 958 # in your code, use four for delimiters, etc. 959 # 960 # * You can use spaces to get literal backticks at the edges: 961 # 962 # ... type `` `bar` `` ... 963 # 964 # Turns to: 965 # 966 # ... type <code>`bar`</code> ... 967 # 968 $text = preg_replace_callback('@ 969 (?<!\\\) # Character before opening ` can\'t be a backslash 970 (`+) # $1 = Opening run of ` 971 (.+?) # $2 = The code block 972 (?<!`) 973 \1 # Matching closer 974 (?!`) 975 @xs', 976 array(&$this, '_doCodeSpans_callback'), $text); 977 978 return $text; 979 } 980 function _doCodeSpans_callback($matches) { 981 $c = $matches[2]; 982 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace 983 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace 984 $c = $this->encodeCode($c); 985 return $this->hashSpan("<code>$c</code>"); 986 } 987 988 989 function encodeCode($_) { 990 # 991 # Encode/escape certain characters inside Markdown code runs. 992 # The point is that in code, these characters are literals, 993 # and lose their special Markdown meanings. 994 # 995 # Encode all ampersands; HTML entities are not 996 # entities within a Markdown code span. 997 $_ = str_replace('&', '&', $_); 998 999 # Do the angle bracket song and dance: 1000 $_ = str_replace(array('<', '>'), 1001 array('<', '>'), $_); 1002 1003 # Now, escape characters that are magic in Markdown: 1004 // $_ = str_replace(array_keys($this->escape_table), 1005 // array_values($this->escape_table), $_); 1006 1007 return $_; 1057 $codeblock = $this->outdent($codeblock); 1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1059 1060 # trim leading newlines and trailing newlines 1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1062 1063 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1064 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1065 } 1066 1067 1068 function makeCodeSpan($code) { 1069 # 1070 # Create a code span markup for $code. Called from handleSpanToken. 1071 # 1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1073 return $this->hashPart("<code>$code</code>"); 1008 1074 } 1009 1075 … … 1020 1086 (?!\1\1) # or two others marker chars. 1021 1087 ( # $2: Content 1022 (? :1088 (?> 1023 1089 [^*_]+? # Anthing not em markers. 1024 1090 | … … 1026 1092 \1 (?=\S) .+? (?<=\S) \1 1027 1093 | 1028 (?! \1 ) .# Allow unbalenced * and _.1094 . # Allow unbalenced * and _. 1029 1095 )+? 1030 1096 ) … … 1034 1100 # Then <em>: 1035 1101 $text = preg_replace_callback( 1036 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',1102 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx', 1037 1103 array(&$this, '_doItalicAndBold_em_callback'), $text); 1038 1104 … … 1042 1108 $text = $matches[2]; 1043 1109 $text = $this->runSpanGamut($text); 1044 return $this->hash Span("<em>$text</em>");1110 return $this->hashPart("<em>$text</em>"); 1045 1111 } 1046 1112 function _doItalicAndBold_strong_callback($matches) { 1047 1113 $text = $matches[2]; 1048 1114 $text = $this->runSpanGamut($text); 1049 return $this->hash Span("<strong>$text</strong>");1115 return $this->hashPart("<strong>$text</strong>"); 1050 1116 } 1051 1117 … … 1054 1120 $text = preg_replace_callback('/ 1055 1121 ( # Wrap whole match in $1 1056 ( 1057 ^[ \t]*>[ \t]? # ">" at the start of a line1122 (?> 1123 ^[ ]*>[ ]? # ">" at the start of a line 1058 1124 .+\n # rest of the first line 1059 1125 (.+\n)* # subsequent consecutive lines … … 1069 1135 $bq = $matches[1]; 1070 1136 # trim one level of quoting - trim whitespace-only lines 1071 $bq = preg_replace( array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1072 1138 $bq = $this->runBlockGamut($bq); # recurse 1073 1139 … … 1078 1144 array(&$this, '_DoBlockQuotes_callback2'), $bq); 1079 1145 1080 return $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";1146 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1081 1147 } 1082 1148 function _doBlockQuotes_callback2($matches) { … … 1093 1159 # 1094 1160 # Strip leading and trailing lines: 1095 $text = preg_replace( array('/\A\n+/', '/\n+\z/'), '', $text);1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1096 1162 1097 1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1098 1164 1099 1165 # 1100 # Wrap <p> tags .1166 # Wrap <p> tags and unhashify HTML blocks 1101 1167 # 1102 1168 foreach ($grafs as $key => $value) { 1103 if (!isset( $this->html_blocks[$value] )) { 1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1170 # Is a paragraph. 1104 1171 $value = $this->runSpanGamut($value); 1105 $value = preg_replace('/^([ \t]*)/', "<p>", $value);1172 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1106 1173 $value .= "</p>"; 1107 1174 $grafs[$key] = $this->unhash($value); 1108 1175 } 1109 } 1110 1111 # 1112 # Unhashify HTML blocks 1113 # 1114 foreach ($grafs as $key => $graf) { 1115 # Modify elements of @grafs in-place... 1116 if (isset($this->html_blocks[$graf])) { 1117 $block = $this->html_blocks[$graf]; 1176 else { 1177 # Is a block. 1178 # Modify elements of @grafs in-place... 1179 $graf = $value; 1180 $block = $this->html_hashes[$graf]; 1118 1181 $graf = $block; 1119 1182 // if (preg_match('{ … … 1162 1225 function encodeAmpsAndAngles($text) { 1163 1226 # Smart processing for ampersands and angle brackets that need to be encoded. 1227 if ($this->no_entities) { 1228 $text = str_replace('&', '&', $text); 1229 $text = str_replace('<', '<', $text); 1230 return $text; 1231 } 1164 1232 1165 1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: … … 1169 1237 1170 1238 # Encode naked <'s 1171 $text = preg_replace('{<(?![a-z/?\$! ])}i', '<', $text);1239 $text = preg_replace('{<(?![a-z/?\$!%])}i', '<', $text); 1172 1240 1173 1241 return $text; … … 1175 1243 1176 1244 1177 function encodeBackslashEscapes($text) {1178 #1179 # Parameter: String.1180 # Returns: The string, with after processing the following backslash1181 # escape sequences.1182 #1183 # Must process escaped backslashes first.1184 return str_replace(array_keys($this->backslash_escape_table),1185 array_values($this->backslash_escape_table), $text);1186 }1187 1188 1189 1245 function doAutoLinks($text) { 1190 $text = preg_replace ('{<((https?|ftp|dict):[^\'">\s]+)>}',1191 '<a href="\1">\1</a>', $text);1246 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}', 1247 array(&$this, '_doAutoLinks_url_callback'), $text); 1192 1248 1193 1249 # Email addresses: <address@domain.foo> … … 1202 1258 > 1203 1259 }xi', 1204 array(&$this, '_doAutoLinks_ callback'), $text);1260 array(&$this, '_doAutoLinks_email_callback'), $text); 1205 1261 1206 1262 return $text; 1207 1263 } 1208 function _doAutoLinks_callback($matches) { 1264 function _doAutoLinks_url_callback($matches) { 1265 $url = $this->encodeAmpsAndAngles($matches[1]); 1266 $link = "<a href=\"$url\">$url</a>"; 1267 return $this->hashPart($link); 1268 } 1269 function _doAutoLinks_email_callback($matches) { 1209 1270 $address = $matches[1]; 1210 $address = $this->unescapeSpecialChars($address); 1211 $address = $this->encodeEmailAddress($address); 1212 return $this->hashSpan($address); 1271 $link = $this->encodeEmailAddress($address); 1272 return $this->hashPart($link); 1213 1273 } 1214 1274 … … 1255 1315 1256 1316 1257 function unescapeSpecialChars($text) { 1258 # 1259 # Swap back in all the special characters we've hidden. 1260 # 1261 return str_replace(array_values($this->escape_table), 1262 array_keys($this->escape_table), $text); 1263 } 1264 1265 1266 function tokenizeHTML($str) { 1267 # 1268 # Parameter: String containing HTML + Markdown markup. 1269 # Returns: An array of the tokens comprising the input 1270 # string. Each token is either a tag or a run of text 1271 # between tags. Each element of the array is a 1272 # two-element array; the first is either 'tag' or 'text'; 1273 # the second is the actual value. 1274 # Note: Markdown code spans are taken into account: no tag token is 1275 # generated within a code span. 1276 # 1277 $tokens = array(); 1278 1279 while ($str != "") { 1280 # 1281 # Each loop iteration seach for either the next tag or the next 1282 # openning code span marker. If a code span marker is found, the 1283 # code span is extracted in entierty and will result in an extra 1284 # text token. 1285 # 1286 $parts = preg_split('{ 1317 function parseSpan($str) { 1318 # 1319 # Take the string $str and parse it into tokens, hashing embeded HTML, 1320 # escaped characters and handling code spans. 1321 # 1322 $output = ''; 1323 1324 $regex = '{ 1287 1325 ( 1326 \\\\['.preg_quote($this->escape_chars).'] 1327 | 1288 1328 (?<![`\\\\]) 1289 1329 `+ # code span marker 1330 '.( $this->no_markup ? '' : ' 1290 1331 | 1291 1332 <!-- .*? --> # comment … … 1294 1335 | 1295 1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags 1296 (? :1337 (?> 1297 1338 \s 1298 1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1299 1340 )? 1300 1341 > 1342 ').' 1301 1343 ) 1302 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1344 }xs'; 1345 1346 while (1) { 1347 # 1348 # Each loop iteration seach for either the next tag, the next 1349 # openning code span marker, or the next escaped character. 1350 # Each token is then passed to handleSpanToken. 1351 # 1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1303 1353 1304 1354 # Create token from text preceding tag. 1305 1355 if ($parts[0] != "") { 1306 $ tokens[] = array('text', $parts[0]);1356 $output .= $parts[0]; 1307 1357 } 1308 1358 1309 1359 # Check if we reach the end. 1310 if (count($parts) < 3) { 1360 if (isset($parts[1])) { 1361 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1362 $str = $parts[2]; 1363 } 1364 else { 1311 1365 break; 1312 1366 } 1313 1314 # Create token from tag or code span. 1315 if ($parts[1]{0} == "`") { 1316 $tokens[] = array('text', $parts[1]); 1317 $str = $parts[2]; 1318 1319 # Skip the whole code span, pass as text token. 1320 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/', 1367 } 1368 1369 return $output; 1370 } 1371 1372 1373 function handleSpanToken($token, &$str) { 1374 # 1375 # Handle $token provided by parseSpan by determining its nature and 1376 # returning the corresponding value that should replace it. 1377 # 1378 switch ($token{0}) { 1379 case "\\": 1380 return $this->hashPart("&#". ord($token{1}). ";"); 1381 case "`": 1382 # Search for end marker in remaining text. 1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', 1321 1384 $str, $matches)) 1322 1385 { 1323 $tokens[] = array('text', $matches[1]);1324 1386 $str = $matches[2]; 1387 $codespan = $this->makeCodeSpan($matches[1]); 1388 return $this->hashPart($codespan); 1325 1389 } 1326 } else { 1327 $tokens[] = array('tag', $parts[1]); 1328 $str = $parts[2]; 1329 } 1330 } 1331 1332 return $tokens; 1390 return $token; // return as text since no ending marker found. 1391 default: 1392 return $this->hashPart($token); 1393 } 1333 1394 } 1334 1395 … … 1338 1399 # Remove one level of line-leading tabs or spaces 1339 1400 # 1340 return preg_replace( "/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);1401 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1341 1402 } 1342 1403 … … 1354 1415 # appropriate number of space between each blocks. 1355 1416 1356 $strlen = $this->utf8_strlen; # best strlen function for UTF-8. 1357 $lines = explode("\n", $text); 1358 $text = ""; 1359 1360 foreach ($lines as $line) { 1361 # Split in blocks. 1362 $blocks = explode("\t", $line); 1363 # Add each blocks to the line. 1364 $line = $blocks[0]; 1365 unset($blocks[0]); # Do not add first block twice. 1366 foreach ($blocks as $block) { 1367 # Calculate amount of space, insert spaces, insert block. 1368 $amount = $this->tab_width - 1369 $strlen($line, 'UTF-8') % $this->tab_width; 1370 $line .= str_repeat(" ", $amount) . $block; 1371 } 1372 $text .= "$line\n"; 1373 } 1417 $text = preg_replace_callback('/^.*\t.*$/m', 1418 array(&$this, '_detab_callback'), $text); 1419 1374 1420 return $text; 1375 1421 } 1422 function _detab_callback($matches) { 1423 $line = $matches[0]; 1424 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1425 1426 # Split in blocks. 1427 $blocks = explode("\t", $line); 1428 # Add each blocks to the line. 1429 $line = $blocks[0]; 1430 unset($blocks[0]); # Do not add first block twice. 1431 foreach ($blocks as $block) { 1432 # Calculate amount of space, insert spaces, insert block. 1433 $amount = $this->tab_width - 1434 $strlen($line, 'UTF-8') % $this->tab_width; 1435 $line .= str_repeat(" ", $amount) . $block; 1436 } 1437 return $line; 1438 } 1376 1439 function _initDetab() { 1377 1440 # 1378 1441 # Check for the availability of the function in the `utf8_strlen` property 1379 # ( probably `mb_strlen`). If the function is not available, create a1442 # (initially `mb_strlen`). If the function is not available, create a 1380 1443 # function that will loosely count the number of UTF-8 characters with a 1381 1444 # regular expression. 1382 1445 # 1383 1446 if (function_exists($this->utf8_strlen)) return; 1384 $this->utf8_strlen = 'Markdown_UTF8_strlen'; 1385 1386 if (function_exists($this->utf8_strlen)) return; 1387 function Markdown_UTF8_strlen($text) { 1388 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', 1389 $text, $m); 1390 } 1447 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1448 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1449 $text, $m);'); 1391 1450 } 1392 1451 … … 1396 1455 # Swap back in all the tags hashed by _HashHTMLBlocks. 1397 1456 # 1398 return str_replace(array_keys($this->html_hashes), 1399 array_values($this->html_hashes), $text); 1457 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1458 array(&$this, '_unhash_callback'), $text); 1459 } 1460 function _unhash_callback($matches) { 1461 return $this->html_hashes[$matches[0]]; 1400 1462 } 1401 1463 … … 1403 1465 1404 1466 1467 # 1468 # Markdown Extra Parser Class 1469 # 1470 1471 class MarkdownExtra_Parser extends Markdown_Parser { 1472 1473 # Prefix for footnote ids. 1474 var $fn_id_prefix = ""; 1475 1476 # Optional title attribute for footnote links and backlinks. 1477 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 1478 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1479 1480 # Optional class attribute for footnote links and backlinks. 1481 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 1482 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1483 1484 1485 function MarkdownExtra_Parser() { 1486 # 1487 # Constructor function. Initialize the parser object. 1488 # 1489 # Add extra escapable characters before parent constructor 1490 # initialize the table. 1491 $this->escape_chars .= ':|'; 1492 1493 # Insert extra document, block, and span transformations. 1494 # Parent constructor will do the sorting. 1495 $this->document_gamut += array( 1496 "stripFootnotes" => 15, 1497 "stripAbbreviations" => 25, 1498 "appendFootnotes" => 50, 1499 ); 1500 $this->block_gamut += array( 1501 "doTables" => 15, 1502 "doDefLists" => 45, 1503 ); 1504 $this->span_gamut += array( 1505 "doFootnotes" => 5, 1506 "doAbbreviations" => 70, 1507 ); 1508 1509 parent::Markdown_Parser(); 1510 } 1511 1512 1513 # Extra hashes used during extra transformations. 1514 var $footnotes = array(); 1515 var $footnotes_ordered = array(); 1516 var $abbr_desciptions = array(); 1517 var $abbr_matches = array(); 1518 1519 # Status flag to avoid invalid nesting. 1520 var $in_footnote = false; 1521 1522 1523 function transform($text) { 1524 # 1525 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before 1526 # blank line stripping and added extra parameter to `runBlockGamut`. 1527 # 1528 # Clear the global hashes. If we don't clear these, you get conflicts 1529 # from other articles when generating a page which contains more than 1530 # one article (e.g. an index page that shows the N most recent 1531 # articles): 1532 $this->footnotes = array(); 1533 $this->footnotes_ordered = array(); 1534 $this->abbr_desciptions = array(); 1535 $this->abbr_matches = array(); 1536 1537 return parent::transform($text); 1538 } 1539 1540 1541 ### HTML Block Parser ### 1542 1543 # Tags that are always treated as block tags: 1544 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 1545 1546 # Tags treated as block tags only if the opening tag is alone on it's line: 1547 var $context_block_tags = 'script|noscript|math|ins|del'; 1548 1549 # Tags where markdown="1" default to span mode: 1550 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1551 1552 # Tags which must not have their contents modified, no matter where 1553 # they appear: 1554 var $clean_tags = 'script|math'; 1555 1556 # Tags that do not need to be closed. 1557 var $auto_close_tags = 'hr|img'; 1558 1559 1560 function hashHTMLBlocks($text) { 1561 # 1562 # Hashify HTML Blocks and "clean tags". 1563 # 1564 # We only want to do this for block-level HTML tags, such as headers, 1565 # lists, and tables. That's because we still want to wrap <p>s around 1566 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1567 # phrase emphasis, and spans. The list of tags we're looking for is 1568 # hard-coded. 1569 # 1570 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1571 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1572 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 1573 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1574 # These two functions are calling each other. It's recursive! 1575 # 1576 # 1577 # Call the HTML-in-Markdown hasher. 1578 # 1579 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1580 1581 return $text; 1582 } 1583 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1584 $enclosing_tag = '', $span = false) 1585 { 1586 # 1587 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1588 # 1589 # * $indent is the number of space to be ignored when checking for code 1590 # blocks. This is important because if we don't take the indent into 1591 # account, something like this (which looks right) won't work as expected: 1592 # 1593 # <div> 1594 # <div markdown="1"> 1595 # Hello World. <-- Is this a Markdown code block or text? 1596 # </div> <-- Is this a Markdown code block or a real tag? 1597 # <div> 1598 # 1599 # If you don't like this, just don't indent the tag on which 1600 # you apply the markdown="1" attribute. 1601 # 1602 # * If $enclosing_tag is not empty, stops at the first unmatched closing 1603 # tag with that name. Nested tags supported. 1604 # 1605 # * If $span is true, text inside must treated as span. So any double 1606 # newline will be replaced by a single newline so that it does not create 1607 # paragraphs. 1608 # 1609 # Returns an array of that form: ( processed text , remaining text ) 1610 # 1611 if ($text === '') return array('', ''); 1612 1613 # Regex to check for the presense of newlines around a block tag. 1614 $newline_match_before = '/(?:^\n?|\n\n)*$/'; 1615 $newline_match_after = 1616 '{ 1617 ^ # Start of text following the tag. 1618 (?:[ ]*<!--.*?-->)? # Optional comment. 1619 [ ]*\n # Must be followed by newline. 1620 }xs'; 1621 1622 # Regex to match any tag. 1623 $block_tag_match = 1624 '{ 1625 ( # $2: Capture hole tag. 1626 </? # Any opening or closing tag. 1627 (?: # Tag name. 1628 '.$this->block_tags.' | 1629 '.$this->context_block_tags.' | 1630 '.$this->clean_tags.' | 1631 (?!\s)'.$enclosing_tag.' 1632 ) 1633 \s* # Whitespace. 1634 (?> 1635 ".*?" | # Double quotes (can contain `>`) 1636 \'.*?\' | # Single quotes (can contain `>`) 1637 .+? # Anything but quotes and `>`. 1638 )*? 1639 > # End of tag. 1640 | 1641 <!-- .*? --> # HTML Comment 1642 | 1643 <\?.*?\?> | <%.*?%> # Processing instruction 1644 | 1645 <!\[CDATA\[.*?\]\]> # CData Block 1646 ) 1647 }xs'; 1648 1649 1650 $depth = 0; # Current depth inside the tag tree. 1651 $parsed = ""; # Parsed text that will be returned. 1652 1653 # 1654 # Loop through every tag until we find the closing tag of the parent 1655 # or loop until reaching the end of text if no parent tag specified. 1656 # 1657 do { 1658 # 1659 # Split the text using the first $tag_match pattern found. 1660 # Text before pattern will be first in the array, text after 1661 # pattern will be at the end, and between will be any catches made 1662 # by the pattern. 1663 # 1664 $parts = preg_split($block_tag_match, $text, 2, 1665 PREG_SPLIT_DELIM_CAPTURE); 1666 1667 # If in Markdown span mode, add a empty-string span-level hash 1668 # after each newline to prevent triggering any block element. 1669 if ($span) { 1670 $void = $this->hashPart("", ':'); 1671 $newline = "$void\n"; 1672 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1673 } 1674 1675 $parsed .= $parts[0]; # Text before current tag. 1676 1677 # If end of $text has been reached. Stop loop. 1678 if (count($parts) < 3) { 1679 $text = ""; 1680 break; 1681 } 1682 1683 $tag = $parts[1]; # Tag to handle. 1684 $text = $parts[2]; # Remaining text after current tag. 1685 1686 # 1687 # Check for: Tag inside code block or span 1688 # 1689 if (# Find current paragraph 1690 preg_match('/(?>^\n?|\n\n)((?>.+\n?)*?)$/', $parsed, $matches) && 1691 ( 1692 # Then match in it either a code block... 1693 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'. 1694 '(?!\n)$/', $matches[1], $x) || 1695 # ...or unbalenced code span markers. (the regex matches balenced) 1696 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s', 1697 $matches[1]) 1698 )) 1699 { 1700 # Tag is in code block or span and may not be a tag at all. So we 1701 # simply skip the first char (should be a `<`). 1702 $parsed .= $tag{0}; 1703 $text = substr($tag, 1) . $text; # Put back $tag minus first char. 1704 } 1705 # 1706 # Check for: Opening Block level tag or 1707 # Opening Content Block tag (like ins and del) 1708 # used as a block tag (tag is alone on it's line). 1709 # 1710 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) || 1711 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) && 1712 preg_match($newline_match_before, $parsed) && 1713 preg_match($newline_match_after, $text) ) 1714 ) 1715 { 1716 # Need to parse tag and following text using the HTML parser. 1717 list($block_text, $text) = 1718 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1719 1720 # Make sure it stays outside of any paragraph by adding newlines. 1721 $parsed .= "\n\n$block_text\n\n"; 1722 } 1723 # 1724 # Check for: Clean tag (like script, math) 1725 # HTML Comments, processing instructions. 1726 # 1727 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) || 1728 $tag{1} == '!' || $tag{1} == '?') 1729 { 1730 # Need to parse tag and following text using the HTML parser. 1731 # (don't check for markdown attribute) 1732 list($block_text, $text) = 1733 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 1734 1735 $parsed .= $block_text; 1736 } 1737 # 1738 # Check for: Tag with same name as enclosing tag. 1739 # 1740 else if ($enclosing_tag !== '' && 1741 # Same name as enclosing tag. 1742 preg_match("{^</?(?:$enclosing_tag)\b}", $tag)) 1743 { 1744 # 1745 # Increase/decrease nested tag count. 1746 # 1747 if ($tag{1} == '/') $depth--; 1748 else if ($tag{strlen($tag)-2} != '/') $depth++; 1749 1750 if ($depth < 0) { 1751 # 1752 # Going out of parent element. Clean up and break so we 1753 # return to the calling function. 1754 # 1755 $text = $tag . $text; 1756 break; 1757 } 1758 1759 $parsed .= $tag; 1760 } 1761 else { 1762 $parsed .= $tag; 1763 } 1764 } while ($depth >= 0); 1765 1766 return array($parsed, $text); 1767 } 1768 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 1769 # 1770 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 1771 # 1772 # * Calls $hash_method to convert any blocks. 1773 # * Stops when the first opening tag closes. 1774 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 1775 # (it is not inside clean tags) 1776 # 1777 # Returns an array of that form: ( processed text , remaining text ) 1778 # 1779 if ($text === '') return array('', ''); 1780 1781 # Regex to match `markdown` attribute inside of a tag. 1782 $markdown_attr_match = ' 1783 { 1784 \s* # Eat whitespace before the `markdown` attribute 1785 markdown 1786 \s*=\s* 1787 (?: 1788 (["\']) # $1: quote delimiter 1789 (.*?) # $2: attribute value 1790 \1 # matching delimiter 1791 | 1792 ([^\s>]*) # $3: unquoted attribute value 1793 ) 1794 () # $4: make $3 always defined (avoid warnings) 1795 }xs'; 1796 1797 # Regex to match any tag. 1798 $tag_match = '{ 1799 ( # $2: Capture hole tag. 1800 </? # Any opening or closing tag. 1801 [\w:$]+ # Tag name. 1802 \s* # Whitespace. 1803 (?> 1804 ".*?" | # Double quotes (can contain `>`) 1805 \'.*?\' | # Single quotes (can contain `>`) 1806 .+? # Anything but quotes and `>`. 1807 )*? 1808 > # End of tag. 1809 | 1810 <!-- .*? --> # HTML Comment 1811 | 1812 <\?.*?\?> | <%.*?%> # Processing instruction 1813 | 1814 <!\[CDATA\[.*?\]\]> # CData Block 1815 ) 1816 }xs'; 1817 1818 $original_text = $text; # Save original text in case of faliure. 1819 1820 $depth = 0; # Current depth inside the tag tree. 1821 $block_text = ""; # Temporary text holder for current text. 1822 $parsed = ""; # Parsed text that will be returned. 1823 1824 # 1825 # Get the name of the starting tag. 1826 # 1827 if (preg_match("/^<([\w:$]*)\b/", $text, $matches)) 1828 $base_tag_name = $matches[1]; 1829 1830 # 1831 # Loop through every tag until we find the corresponding closing tag. 1832 # 1833 do { 1834 # 1835 # Split the text using the first $tag_match pattern found. 1836 # Text before pattern will be first in the array, text after 1837 # pattern will be at the end, and between will be any catches made 1838 # by the pattern. 1839 # 1840 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1841 1842 if (count($parts) < 3) { 1843 # 1844 # End of $text reached with unbalenced tag(s). 1845 # In that case, we return original text unchanged and pass the 1846 # first character as filtered to prevent an infinite loop in the 1847 # parent function. 1848 # 1849 return array($original_text{0}, substr($original_text, 1)); 1850 } 1851 1852 $block_text .= $parts[0]; # Text before current tag. 1853 $tag = $parts[1]; # Tag to handle. 1854 $text = $parts[2]; # Remaining text after current tag. 1855 1856 # 1857 # Check for: Auto-close tag (like <hr/>) 1858 # Comments and Processing Instructions. 1859 # 1860 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) || 1861 $tag{1} == '!' || $tag{1} == '?') 1862 { 1863 # Just add the tag to the block as if it was text. 1864 $block_text .= $tag; 1865 } 1866 else { 1867 # 1868 # Increase/decrease nested tag count. Only do so if 1869 # the tag's name match base tag's. 1870 # 1871 if (preg_match("{^</?$base_tag_name\b}", $tag)) { 1872 if ($tag{1} == '/') $depth--; 1873 else if ($tag{strlen($tag)-2} != '/') $depth++; 1874 } 1875 1876 # 1877 # Check for `markdown="1"` attribute and handle it. 1878 # 1879 if ($md_attr && 1880 preg_match($markdown_attr_match, $tag, $attr_m) && 1881 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 1882 { 1883 # Remove `markdown` attribute from opening tag. 1884 $tag = preg_replace($markdown_attr_match, '', $tag); 1885 1886 # Check if text inside this tag must be parsed in span mode. 1887 $this->mode = $attr_m[2] . $attr_m[3]; 1888 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 1889 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag); 1890 1891 # Calculate indent before tag. 1892 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches); 1893 $indent = strlen($matches[1]); 1894 1895 # End preceding block with this tag. 1896 $block_text .= $tag; 1897 $parsed .= $this->$hash_method($block_text); 1898 1899 # Get enclosing tag name for the ParseMarkdown function. 1900 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 1901 $tag_name = $matches[1]; 1902 1903 # Parse the content using the HTML-in-Markdown parser. 1904 list ($block_text, $text) 1905 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 1906 $tag_name, $span_mode); 1907 1908 # Outdent markdown text. 1909 if ($indent > 0) { 1910 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 1911 $block_text); 1912 } 1913 1914 # Append tag content to parsed text. 1915 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 1916 else $parsed .= "$block_text"; 1917 1918 # Start over a new block. 1919 $block_text = ""; 1920 } 1921 else $block_text .= $tag; 1922 } 1923 1924 } while ($depth > 0); 1925 1926 # 1927 # Hash last block text that wasn't processed inside the loop. 1928 # 1929 $parsed .= $this->$hash_method($block_text); 1930 1931 return array($parsed, $text); 1932 } 1933 1934 1935 function hashClean($text) { 1936 # 1937 # Called whenever a tag must be hashed when a function insert a "clean" tag 1938 # in $text, it pass through this function and is automaticaly escaped, 1939 # blocking invalid nested overlap. 1940 # 1941 return $this->hashPart($text, 'C'); 1942 } 1943 1944 1945 function doHeaders($text) { 1946 # 1947 # Redefined to add id attribute support. 1948 # 1949 # Setext-style headers: 1950 # Header 1 {#header1} 1951 # ======== 1952 # 1953 # Header 2 {#header2} 1954 # -------- 1955 # 1956 $text = preg_replace_callback( 1957 '{ 1958 (^.+?) # $1: Header text 1959 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 1960 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1961 }mx', 1962 array(&$this, '_doHeaders_callback_setext'), $text); 1963 1964 # atx-style headers: 1965 # # Header 1 {#header1} 1966 # ## Header 2 {#header2} 1967 # ## Header 2 with closing hashes ## {#header3} 1968 # ... 1969 # ###### Header 6 {#header2} 1970 # 1971 $text = preg_replace_callback('{ 1972 ^(\#{1,6}) # $1 = string of #\'s 1973 [ ]* 1974 (.+?) # $2 = Header text 1975 [ ]* 1976 \#* # optional closing #\'s (not counted) 1977 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 1978 [ ]* 1979 \n+ 1980 }xm', 1981 array(&$this, '_doHeaders_callback_atx'), $text); 1982 1983 return $text; 1984 } 1985 function _doHeaders_attr($attr) { 1986 if (empty($attr)) return ""; 1987 return " id=\"$attr\""; 1988 } 1989 function _doHeaders_callback_setext($matches) { 1990 $level = $matches[3]{0} == '=' ? 1 : 2; 1991 $attr = $this->_doHeaders_attr($id =& $matches[2]); 1992 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 1993 return "\n" . $this->hashBlock($block) . "\n\n"; 1994 } 1995 function _doHeaders_callback_atx($matches) { 1996 $level = strlen($matches[1]); 1997 $attr = $this->_doHeaders_attr($id =& $matches[3]); 1998 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 1999 return "\n" . $this->hashBlock($block) . "\n\n"; 2000 } 2001 2002 2003 function doTables($text) { 2004 # 2005 # Form HTML tables. 2006 # 2007 $less_than_tab = $this->tab_width - 1; 2008 # 2009 # Find tables with leading pipe. 2010 # 2011 # | Header 1 | Header 2 2012 # | -------- | -------- 2013 # | Cell 1 | Cell 2 2014 # | Cell 3 | Cell 4 2015 # 2016 $text = preg_replace_callback(' 2017 { 2018 ^ # Start of a line 2019 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2020 [|] # Optional leading pipe (present) 2021 (.+) \n # $1: Header row (at least one pipe) 2022 2023 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2024 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2025 2026 ( # $3: Cells 2027 (?> 2028 [ ]* # Allowed whitespace. 2029 [|] .* \n # Row content. 2030 )* 2031 ) 2032 (?=\n|\Z) # Stop at final double newline. 2033 }xm', 2034 array(&$this, '_doTable_leadingPipe_callback'), $text); 2035 2036 # 2037 # Find tables without leading pipe. 2038 # 2039 # Header 1 | Header 2 2040 # -------- | -------- 2041 # Cell 1 | Cell 2 2042 # Cell 3 | Cell 4 2043 # 2044 $text = preg_replace_callback(' 2045 { 2046 ^ # Start of a line 2047 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2048 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2049 2050 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2051 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2052 2053 ( # $3: Cells 2054 (?> 2055 .* [|] .* \n # Row content 2056 )* 2057 ) 2058 (?=\n|\Z) # Stop at final double newline. 2059 }xm', 2060 array(&$this, '_DoTable_callback'), $text); 2061 2062 return $text; 2063 } 2064 function _doTable_leadingPipe_callback($matches) { 2065 $head = $matches[1]; 2066 $underline = $matches[2]; 2067 $content = $matches[3]; 2068 2069 # Remove leading pipe for each row. 2070 $content = preg_replace('/^ *[|]/m', '', $content); 2071 2072 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2073 } 2074 function _doTable_callback($matches) { 2075 $head = $matches[1]; 2076 $underline = $matches[2]; 2077 $content = $matches[3]; 2078 2079 # Remove any tailing pipes for each line. 2080 $head = preg_replace('/[|] *$/m', '', $head); 2081 $underline = preg_replace('/[|] *$/m', '', $underline); 2082 $content = preg_replace('/[|] *$/m', '', $content); 2083 2084 # Reading alignement from header underline. 2085 $separators = preg_split('/ *[|] */', $underline); 2086 foreach ($separators as $n => $s) { 2087 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 2088 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 2089 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 2090 else $attr[$n] = ''; 2091 } 2092 2093 # Parsing span elements, including code spans, character escapes, 2094 # and inline HTML tags, so that pipes inside those gets ignored. 2095 $head = $this->parseSpan($head); 2096 $headers = preg_split('/ *[|] */', $head); 2097 $col_count = count($headers); 2098 2099 # Write column headers. 2100 $text = "<table>\n"; 2101 $text .= "<thead>\n"; 2102 $text .= "<tr>\n"; 2103 foreach ($headers as $n => $header) 2104 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2105 $text .= "</tr>\n"; 2106 $text .= "</thead>\n"; 2107 2108 # Split content by row. 2109 $rows = explode("\n", trim($content, "\n")); 2110 2111 $text .= "<tbody>\n"; 2112 foreach ($rows as $row) { 2113 # Parsing span elements, including code spans, character escapes, 2114 # and inline HTML tags, so that pipes inside those gets ignored. 2115 $row = $this->parseSpan($row); 2116 2117 # Split row by cell. 2118 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2119 $row_cells = array_pad($row_cells, $col_count, ''); 2120 2121 $text .= "<tr>\n"; 2122 foreach ($row_cells as $n => $cell) 2123 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2124 $text .= "</tr>\n"; 2125 } 2126 $text .= "</tbody>\n"; 2127 $text .= "</table>"; 2128 2129 return $this->hashBlock($text) . "\n"; 2130 } 2131 2132 2133 function doDefLists($text) { 2134 # 2135 # Form HTML definition lists. 2136 # 2137 $less_than_tab = $this->tab_width - 1; 2138 2139 # Re-usable pattern to match any entire dl list: 2140 $whole_list = '(?> 2141 ( # $1 = whole list 2142 ( # $2 2143 [ ]{0,'.$less_than_tab.'} 2144 ((?>.*\S.*\n)+) # $3 = defined term 2145 \n? 2146 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2147 ) 2148 (?s:.+?) 2149 ( # $4 2150 \z 2151 | 2152 \n{2,} 2153 (?=\S) 2154 (?! # Negative lookahead for another term 2155 [ ]{0,'.$less_than_tab.'} 2156 (?: \S.*\n )+? # defined term 2157 \n? 2158 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2159 ) 2160 (?! # Negative lookahead for another definition 2161 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2162 ) 2163 ) 2164 ) 2165 )'; // mx 2166 2167 $text = preg_replace_callback('{ 2168 (?:(?<=\n\n)|\A\n?) 2169 '.$whole_list.' 2170 }mx', 2171 array(&$this, '_doDefLists_callback'), $text); 2172 2173 return $text; 2174 } 2175 function _doDefLists_callback($matches) { 2176 # Re-usable patterns to match list item bullets and number markers: 2177 $list = $matches[1]; 2178 2179 # Turn double returns into triple returns, so that we can make a 2180 # paragraph for the last item in a list, if necessary: 2181 $result = trim($this->processDefListItems($list)); 2182 $result = "<dl>\n" . $result . "\n</dl>"; 2183 return $this->hashBlock($result) . "\n\n"; 2184 } 2185 2186 2187 function processDefListItems($list_str) { 2188 # 2189 # Process the contents of a single definition list, splitting it 2190 # into individual term and definition list items. 2191 # 2192 $less_than_tab = $this->tab_width - 1; 2193 2194 # trim trailing blank lines: 2195 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2196 2197 # Process definition terms. 2198 $list_str = preg_replace_callback('{ 2199 (?:\n\n+|\A\n?) # leading line 2200 ( # definition terms = $1 2201 [ ]{0,'.$less_than_tab.'} # leading whitespace 2202 (?![:][ ]|[ ]) # negative lookahead for a definition 2203 # mark (colon) or more whitespace. 2204 (?: \S.* \n)+? # actual term (not whitespace). 2205 ) 2206 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2207 # with a definition mark. 2208 }xm', 2209 array(&$this, '_processDefListItems_callback_dt'), $list_str); 2210 2211 # Process actual definitions. 2212 $list_str = preg_replace_callback('{ 2213 \n(\n+)? # leading line = $1 2214 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2215 [:][ ]+ # definition mark (colon) 2216 ((?s:.+?)) # definition text = $2 2217 (?= \n+ # stop at next definition mark, 2218 (?: # next term or end of text 2219 [ ]{0,'.$less_than_tab.'} [:][ ] | 2220 <dt> | \z 2221 ) 2222 ) 2223 }xm', 2224 array(&$this, '_processDefListItems_callback_dd'), $list_str); 2225 2226 return $list_str; 2227 } 2228 function _processDefListItems_callback_dt($matches) { 2229 $terms = explode("\n", trim($matches[1])); 2230 $text = ''; 2231 foreach ($terms as $term) { 2232 $term = $this->runSpanGamut(trim($term)); 2233 $text .= "\n<dt>" . $term . "</dt>"; 2234 } 2235 return $text . "\n"; 2236 } 2237 function _processDefListItems_callback_dd($matches) { 2238 $leading_line = $matches[1]; 2239 $def = $matches[2]; 2240 2241 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2242 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2243 $def = "\n". $def ."\n"; 2244 } 2245 else { 2246 $def = rtrim($def); 2247 $def = $this->runSpanGamut($this->outdent($def)); 2248 } 2249 2250 return "\n<dd>" . $def . "</dd>\n"; 2251 } 2252 2253 2254 function doItalicsAndBold($text) { 2255 # 2256 # Redefined to change emphasis by underscore behaviour so that it does not 2257 # work in the middle of a word. 2258 # 2259 # <strong> must go first: 2260 $text = preg_replace_callback(array( 2261 '{ 2262 ( # $1: Marker 2263 (?<![a-zA-Z0-9]) # Not preceded by alphanum 2264 (?<!__) # or by two marker chars. 2265 __ 2266 ) 2267 (?=\S) # Not followed by whitespace 2268 (?!__) # or two others marker chars. 2269 ( # $2: Content 2270 (?> 2271 [^_]+? # Anthing not em markers. 2272 | 2273 # Balence any regular _ emphasis inside. 2274 (?<![a-zA-Z0-9]) _ (?=\S) (.+?) 2275 (?<=\S) _ (?![a-zA-Z0-9]) 2276 | 2277 _+ # Allow unbalenced as last resort. 2278 )+? 2279 ) 2280 (?<=\S) __ # End mark not preceded by whitespace. 2281 (?![a-zA-Z0-9]) # Not followed by alphanum 2282 (?!__) # or two others marker chars. 2283 }sx', 2284 '{ 2285 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *) 2286 (?=\S) # Not followed by whitespace 2287 (?!\1) # or two others marker chars. 2288 ( # $2: Content 2289 (?> 2290 [^*]+? # Anthing not em markers. 2291 | 2292 # Balence any regular * emphasis inside. 2293 \* (?=\S) (.+?) (?<=\S) \* 2294 | 2295 \* # Allow unbalenced as last resort. 2296 )+? 2297 ) 2298 (?<=\S) \*\* # End mark not preceded by whitespace. 2299 }sx', 2300 ), 2301 array(&$this, '_doItalicAndBold_strong_callback'), $text); 2302 # Then <em>: 2303 $text = preg_replace_callback(array( 2304 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx', 2305 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx', 2306 ), 2307 array(&$this, '_doItalicAndBold_em_callback'), $text); 2308 2309 return $text; 2310 } 2311 2312 2313 function formParagraphs($text) { 2314 # 2315 # Params: 2316 # $text - string to process with html <p> tags 2317 # 2318 # Strip leading and trailing lines: 2319 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2320 2321 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2322 2323 # 2324 # Wrap <p> tags and unhashify HTML blocks 2325 # 2326 foreach ($grafs as $key => $value) { 2327 $value = trim($this->runSpanGamut($value)); 2328 2329 # Check if this should be enclosed in a paragraph. 2330 # Clean tag hashes & block tag hashes are left alone. 2331 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2332 2333 if ($is_p) { 2334 $value = "<p>$value</p>"; 2335 } 2336 $grafs[$key] = $value; 2337 } 2338 2339 # Join grafs in one text, then unhash HTML tags. 2340 $text = implode("\n\n", $grafs); 2341 2342 # Finish by removing any tag hashes still present in $text. 2343 $text = $this->unhash($text); 2344 2345 return $text; 2346 } 2347 2348 2349 ### Footnotes 2350 2351 function stripFootnotes($text) { 2352 # 2353 # Strips link definitions from text, stores the URLs and titles in 2354 # hash references. 2355 # 2356 $less_than_tab = $this->tab_width - 1; 2357 2358 # Link defs are in the form: [^id]: url "optional title" 2359 $text = preg_replace_callback('{ 2360 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2361 [ ]* 2362 \n? # maybe *one* newline 2363 ( # text = $2 (no blank lines allowed) 2364 (?: 2365 .+ # actual text 2366 | 2367 \n # newlines but 2368 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 2369 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2370 # by non-indented content 2371 )* 2372 ) 2373 }xm', 2374 array(&$this, '_stripFootnotes_callback'), 2375 $text); 2376 return $text; 2377 } 2378 function _stripFootnotes_callback($matches) { 2379 $note_id = $this->fn_id_prefix . $matches[1]; 2380 $this->footnotes[$note_id] = $this->outdent($matches[2]); 2381 return ''; # String that will replace the block 2382 } 2383 2384 2385 function doFootnotes($text) { 2386 # 2387 # Replace footnote references in $text [^id] with a special text-token 2388 # which will be can be 2389 # 2390 if (!$this->in_footnote && !$this->in_anchor) { 2391 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2392 } 2393 return $text; 2394 } 2395 2396 2397 function appendFootnotes($text) { 2398 # 2399 # Append footnote list to text. 2400 # 2401 2402 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2403
