Changeset 1179
- Timestamp:
- 2008년 03월 14일 11시 42분 13초 (4 years ago)
- File:
-
- 1 edited
-
trunk/plugins/Markdown.php (modified) (65 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/plugins/Markdown.php
r965 r1179 1 1 <?php 2 2 # 3 # Markdown - A text-to-HTML conversion tool for web writers3 # Markdown Extra - A text-to-HTML conversion tool for web writers 4 4 # 5 # PHP Markdown 6 # Copyright (c) 2004-200 6Michel Fortin5 # PHP Markdown & Extra 6 # Copyright (c) 2004-2007 Michel Fortin 7 7 # <http://www.michelf.com/projects/php-markdown/> 8 8 # … … 13 13 14 14 15 define( 'MARKDOWN_VERSION', "1.0.1e" ); # Thu 28 Dec 2006 15 define( 'MARKDOWN_VERSION', "1.0.1k" ); # Wed 26 Sep 2007 16 define( 'MARKDOWNEXTRA_VERSION', "1.1.7" ); # Wed 26 Sep 2007 16 17 17 18 … … 21 22 22 23 # Change to ">" for HTML output 23 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 24 25 25 26 # Define the width of a tab for code blocks. 26 define( 'MARKDOWN_TAB_WIDTH', 4 ); 27 @define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 29 # Optional title attribute for footnote links and backlinks. 30 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 33 # Optional class attribute for footnote links and backlinks. 34 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 37 38 # 39 # WordPress settings: 40 # 41 42 # Change to false to remove Markdown from posts and/or comments. 43 @define( 'MARKDOWN_WP_POSTS', true ); 44 @define( 'MARKDOWN_WP_COMMENTS', true ); 45 46 27 47 28 48 ### Standard Function Interface ### 29 49 30 define( 'MARKDOWN_PARSER_CLASS', 'Markdown_Parser' );50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 31 51 32 52 function Markdown($text) { … … 46 66 47 67 68 ### WordPress Plugin Interface ### 69 70 /* 71 Plugin Name: Markdown Extra 72 Plugin URI: http://www.michelf.com/projects/php-markdown/ 73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a> 74 Version: 1.1.7 75 Author: Michel Fortin 76 Author URI: http://www.michelf.com/ 77 */ 78 79 if (isset($wp_version)) { 80 # More details about how it works here: 81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> 82 83 # Post content and excerpts 84 # - Remove WordPress paragraph generator. 85 # - Run Markdown on excerpt, then remove all tags. 86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 87 if (MARKDOWN_WP_POSTS) { 88 remove_filter('the_content', 'wpautop'); 89 remove_filter('the_content_rss', 'wpautop'); 90 remove_filter('the_excerpt', 'wpautop'); 91 add_filter('the_content', 'Markdown', 6); 92 add_filter('the_content_rss', 'Markdown', 6); 93 add_filter('get_the_excerpt', 'Markdown', 6); 94 add_filter('get_the_excerpt', 'trim', 7); 95 add_filter('the_excerpt', 'mdwp_add_p'); 96 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 97 98 remove_filter('content_save_pre', 'balanceTags', 50); 99 remove_filter('excerpt_save_pre', 'balanceTags', 50); 100 add_filter('the_content', 'balanceTags', 50); 101 add_filter('get_the_excerpt', 'balanceTags', 9); 102 } 103 104 # Comments 105 # - Remove WordPress paragraph generator. 106 # - Remove WordPress auto-link generator. 107 # - Scramble important tags before passing them to the kses filter. 108 # - Run Markdown on excerpt then remove paragraph tags. 109 if (MARKDOWN_WP_COMMENTS) { 110 remove_filter('comment_text', 'wpautop', 30); 111 remove_filter('comment_text', 'make_clickable'); 112 add_filter('pre_comment_content', 'Markdown', 6); 113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 114 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 115 add_filter('get_comment_text', 'Markdown', 6); 116 add_filter('get_comment_excerpt', 'Markdown', 6); 117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 118 119 global $mdwp_hidden_tags, $mdwp_placeholders; 120 $mdwp_hidden_tags = explode(' ', 121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 122 $mdwp_placeholders = explode(' ', str_rot13( 123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 125 } 126 127 function mdwp_add_p($text) { 128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 129 $text = '<p>'.$text.'</p>'; 130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 131 } 132 return $text; 133 } 134 135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 136 137 function mdwp_hide_tags($text) { 138 global $mdwp_hidden_tags, $mdwp_placeholders; 139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 140 } 141 function mdwp_show_tags($text) { 142 global $mdwp_hidden_tags, $mdwp_placeholders; 143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 144 } 145 } 146 147 148 ### bBlog Plugin Info ### 149 150 function identify_modifier_markdown() { 151 return array( 152 'name' => 'markdown', 153 'type' => 'modifier', 154 'nicename' => 'PHP Markdown Extra', 155 'description' => 'A text-to-HTML conversion tool for web writers', 156 'authors' => 'Michel Fortin and John Gruber', 157 'licence' => 'GPL', 158 'version' => MARKDOWNEXTRA_VERSION, 159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>', 160 ); 161 } 162 163 164 ### Smarty Modifier Interface ### 165 166 function smarty_modifier_markdown($text) { 167 return Markdown($text); 168 } 169 170 171 ### Textile Compatibility Mode ### 172 173 # Rename this file to "classTextile.php" and it can replace Textile everywhere. 174 175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 176 # Try to include PHP SmartyPants. Should be in the same directory. 177 @include_once 'smartypants.php'; 178 # Fake Textile class. It calls Markdown instead. 179 class Textile { 180 function TextileThis($text, $lite='', $encode='') { 181 if ($lite == '' && $encode == '') $text = Markdown($text); 182 if (function_exists('SmartyPants')) $text = SmartyPants($text); 183 return $text; 184 } 185 # Fake restricted version: restrictions are not supported for now. 186 function TextileRestricted($text, $lite='', $noimage='') { 187 return $this->TextileThis($text, $lite); 188 } 189 # Workaround to ensure compatibility with TextPattern 4.0.3. 190 function blockLite($text) { return $text; } 191 } 192 } 193 194 195 48 196 # 49 197 # Markdown Parser Class … … 56 204 var $nested_brackets_depth = 6; 57 205 var $nested_brackets; 206 207 var $nested_url_parenthesis_depth = 4; 208 var $nested_url_parenthesis; 58 209 59 210 # Table of hash values for escaped characters: 60 211 var $escape_chars = '\`*_{}[]()>#+-.!'; 61 var $escape_table = array();62 var $backslash_escape_table = array();63 212 64 213 # Change to ">" for HTML output. 65 214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 66 215 var $tab_width = MARKDOWN_TAB_WIDTH; 216 217 # Change to `true` to disallow markup or entities. 218 var $no_markup = false; 219 var $no_entities = false; 67 220 68 221 … … 76 229 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 77 230 str_repeat('\])*', $this->nested_brackets_depth); 78 79 # Create an identical table but for escaped characters. 80 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) { 81 $hash = md5($char); 82 $this->escape_table[$char] = $hash; 83 $this->backslash_escape_table["\\$char"] = $hash; 84 } 231 232 $this->nested_url_parenthesis = 233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 85 235 86 236 # Sort document, block, and span gamut in ascendent priority order. … … 94 244 var $urls = array(); 95 245 var $titles = array(); 96 var $html_blocks = array(); 97 var $html_hashes = array(); # Contains both blocks and span hashes. 246 var $html_hashes = array(); 247 248 # Status flag to avoid invalid nesting. 249 var $in_anchor = false; 98 250 99 251 … … 111 263 $this->urls = array(); 112 264 $this->titles = array(); 113 $this->html_blocks = array();114 265 $this->html_hashes = array(); 115 266 116 267 # Standardize line endings: 117 268 # DOS to Unix and Mac to Unix 118 $text = str_replace(array("\r\n", "\r"), "\n", $text);269 $text = preg_replace('{\r\n?}', "\n", $text); 119 270 120 271 # Make sure $text ends with a couple of newlines: … … 130 281 # This makes subsequent regexen easier to write, because we can 131 282 # match consecutive blank lines with /\n+/ instead of something 132 # contorted like /[ \t]*\n+/ .133 $text = preg_replace('/^[ \t]+$/m', '', $text);283 # contorted like /[ ]*\n+/ . 284 $text = preg_replace('/^[ ]+$/m', '', $text); 134 285 135 286 # Run document gamut methods. … … 146 297 147 298 "runBasicBlockGamut" => 30, 148 "unescapeSpecialChars" => 90,149 299 ); 150 300 … … 160 310 $text = preg_replace_callback('{ 161 311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 162 [ \t]*312 [ ]* 163 313 \n? # maybe *one* newline 164 [ \t]*314 [ ]* 165 315 <?(\S+?)>? # url = $2 166 [ \t]*316 [ ]* 167 317 \n? # maybe one newline 168 [ \t]*318 [ ]* 169 319 (?: 170 320 (?<=\s) # lookbehind for whitespace … … 172 322 (.*?) # title = $3 173 323 [")] 174 [ \t]*324 [ ]* 175 325 )? # title is optional 176 326 (?:\n+|\Z) … … 190 340 191 341 function hashHTMLBlocks($text) { 342 if ($this->no_markup) return $text; 343 192 344 $less_than_tab = $this->tab_width - 1; 193 345 … … 198 350 # phrase emphasis, and spans. The list of tags we're looking for is 199 351 # hard-coded: 200 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 201 'script|noscript|form|fieldset|iframe|math|ins|del'; 352 # 353 # * List "a" is made of tags which can be both inline or block-level. 354 # These will be treated block-level when the start tag is alone on 355 # its line, otherwise they're not matched here and will be taken as 356 # inline later. 357 # * List "b" is made of tags which are always block-level; 358 # 359 $block_tags_a = 'ins|del'; 202 360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 203 361 'script|noscript|form|fieldset|iframe|math'; … … 226 384 <\2 # nested opening tag 227 385 '.$attr.' # attributes 228 (? :386 (?> 229 387 /> 230 388 | … … 239 397 )*', 240 398 $nested_tags_level); 399 $content2 = str_replace('\2', '\3', $content); 241 400 242 401 # First, look for nested blocks, e.g.: … … 251 410 # We need to do this before the next, more liberal match, because the next 252 411 # match will start at the first `<div>` and stop at the first `</div>`. 253 $text = preg_replace_callback('{ 254 ( # save in $1 255 ^ # start of line (with /m) 256 <('.$block_tags_a.')# start tag = $2 257 '.$attr.'>\n # attributes followed by > and \n 412 $text = preg_replace_callback('{(?> 413 (?> 414 (?<=\n\n) # Starting after a blank line 415 | # or 416 \A\n? # the beginning of the doc 417 ) 418 ( # save in $1 419 420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 421 # in between. 422 423 [ ]{0,'.$less_than_tab.'} 424 <('.$block_tags_b.')# start tag = $2 425 '.$attr.'> # attributes followed by > and \n 258 426 '.$content.' # content, support nesting 259 427 </\2> # the matching end tag 260 [ \t]* # trailing spaces/tabs428 [ ]* # trailing spaces/tabs 261 429 (?=\n+|\Z) # followed by a newline or end of document 262 ) 263 }xm', 264 array(&$this, '_hashHTMLBlocks_callback'), 265 $text); 266 267 # 268 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between. 269 # 270 $text = preg_replace_callback('{ 271 ( # save in $1 272 ^ # start of line (with /m) 273 <('.$block_tags_b.')# start tag = $2 274 '.$attr.'> # attributes followed by > 275 '.$content.' # content, support nesting 276 </\2> # the matching end tag 277 [ \t]* # trailing spaces/tabs 430 431 | # Special version for tags of group a. 432 433 [ ]{0,'.$less_than_tab.'} 434 <('.$block_tags_a.')# start tag = $3 435 '.$attr.'>[ ]*\n # attributes followed by > 436 '.$content2.' # content, support nesting 437 </\3> # the matching end tag 438 [ ]* # trailing spaces/tabs 278 439 (?=\n+|\Z) # followed by a newline or end of document 279 ) 280 }xm', 281 array(&$this, '_hashHTMLBlocks_callback'), 282 $text); 283 284 # Special case just for <hr />. It was easier to make a special case than 285 # to make the other regex more complicated. 286 $text = preg_replace_callback('{ 287 (?: 288 (?<=\n\n) # Starting after a blank line 289 | # or 290 \A\n? # the beginning of the doc 291 ) 292 ( # save in $1 440 441 | # Special case just for <hr />. It was easier to make a special 442 # case than to make the other regex more complicated. 443 293 444 [ ]{0,'.$less_than_tab.'} 294 445 <(hr) # start tag = $2 … … 296 447 ([^<>])*? # 297 448 /?> # the matching end tag 298 [ \t]*449 [ ]* 299 450 (?=\n{2,}|\Z) # followed by a blank line or end of document 300 ) 301 }x', 302 array(&$this, '_hashHTMLBlocks_callback'), 303 $text); 304 305 # Special case for standalone HTML comments: 306 $text = preg_replace_callback('{ 307 (?: 308 (?<=\n\n) # Starting after a blank line 309 | # or 310 \A\n? # the beginning of the doc 311 ) 312 ( # save in $1 451 452 | # Special case for standalone HTML comments: 453 313 454 [ ]{0,'.$less_than_tab.'} 314 455 (?s: 315 456 <!-- .*? --> 316 457 ) 317 [ \t]*458 [ ]* 318 459 (?=\n{2,}|\Z) # followed by a blank line or end of document 319 ) 320 }x', 321 array(&$this, '_hashHTMLBlocks_callback'), 322 $text); 323 324 # PHP and ASP-style processor instructions (<? and <%...%>) 325 $text = preg_replace_callback('{ 326 (?: 327 (?<=\n\n) # Starting after a blank line 328 | # or 329 \A\n? # the beginning of the doc 330 ) 331 ( # save in $1 460 461 | # PHP and ASP-style processor instructions (<? and <%) 462 332 463 [ ]{0,'.$less_than_tab.'} 333 464 (?s: … … 336 467 \2> 337 468 ) 338 [ \t]*469 [ ]* 339 470 (?=\n{2,}|\Z) # followed by a blank line or end of document 340 ) 341 }x', 471 472 ) 473 )}Sxmi', 342 474 array(&$this, '_hashHTMLBlocks_callback'), 343 475 $text); … … 350 482 return "\n\n$key\n\n"; 351 483 } 352 353 354 function hashBlock($text) { 355 # 356 # Called whenever a tag must be hashed when a function insert a block-level 357 # tag in $text, it pass through this function and is automaticaly escaped, 358 # which remove the need to call _HashHTMLBlocks at every step. 484 485 486 function hashPart($text, $boundary = 'X') { 487 # 488 # Called whenever a tag must be hashed when a function insert an atomic 489 # element in the text stream. Passing $text to through this function gives 490 # a unique text-token which will be reverted back when calling unhash. 491 # 492 # The $boundary argument specify what character should be used to surround 493 # the token. By convension, "B" is used for block elements that needs not 494 # to be wrapped into paragraph tags at the end, ":" is used for elements 495 # that are word separators and "S" is used for general span-level elements. 359 496 # 360 497 # Swap back any tag hash found in $text so we do not have to `unhash` … … 363 500 364 501 # Then hash the block. 365 $key = md5($text); 502 static $i = 0; 503 $key = "$boundary\x1A" . ++$i . $boundary; 366 504 $this->html_hashes[$key] = $text; 367 $this->html_blocks[$key] = $text;368 505 return $key; # String that will replace the tag. 369 506 } 370 507 371 508 372 function hashSpan($text) { 373 # 374 # Called whenever a tag must be hashed when a function insert a span-level 375 # element in $text, it pass through this function and is automaticaly 376 # escaped, blocking invalid nested overlap. 377 # 378 # Swap back any tag hash found in $text so we do not have to `unhash` 379 # multiple times at the end. 380 $text = $this->unhash($text); 381 382 # Then hash the span. 383 $key = md5($text); 384 $this->html_hashes[$key] = $text; 385 return $key; # String that will replace the span tag. 509 function hashBlock($text) { 510 # 511 # Shortcut function for hashPart with block-level boundaries. 512 # 513 return $this->hashPart($text, 'B'); 386 514 } 387 515 … … 434 562 # Do Horizontal Rules: 435 563 return preg_replace( 436 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx', 437 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx', 438 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'), 564 '{ 565 ^[ ]{0,3} # Leading space 566 ([-*_]) # $1: First marker 567 (?> # Repeated marker group 568 [ ]{0,2} # Zero, one, or two spaces. 569 \1 # Marker character 570 ){2,} # Group repeated at least twice 571 [ ]* # Tailing spaces 572 $ # End of line. 573 }mx', 439 574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 440 575 $text); … … 447 582 # tags like paragraphs, headers, and list items. 448 583 # 449 "escapeSpecialCharsWithinTagAttributes" => -20,450 "doCodeSpans" => -10,451 " encodeBackslashEscapes" => -5,584 # Process character escapes, code spans, and inline HTML 585 # in one shot. 586 "parseSpan" => -30, 452 587 453 588 # Process anchor and image tags. Images must come first, … … 480 615 function doHardBreaks($text) { 481 616 # Do hard breaks: 482 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n"); 483 return preg_replace('/ {2,}\n/', $br_tag, $text); 484 } 485 486 487 function escapeSpecialCharsWithinTagAttributes($text) { 488 # 489 # Within tags -- meaning between < and > -- encode [\ ` * _] so they 490 # don't conflict with their use in Markdown for code, italics and strong. 491 # We're replacing each such character with its corresponding MD5 checksum 492 # value; this is likely overkill, but it should prevent us from colliding 493 # with the escape values by accident. 494 # 495 $tokens = $this->tokenizeHTML($text); 496 $text = ''; # rebuild $text from the tokens 497 498 foreach ($tokens as $cur_token) { 499 if ($cur_token[0] == 'tag') { 500 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]); 501 $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]); 502 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]); 503 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]); 504 } 505 $text .= $cur_token[1]; 506 } 507 return $text; 617 return preg_replace_callback('/ {2,}\n/', 618 array(&$this, '_doHardBreaks_callback'), $text); 619 } 620 function _doHardBreaks_callback($matches) { 621 return $this->hashPart("<br$this->empty_element_suffix\n"); 508 622 } 509 623 … … 513 627 # Turn Markdown link shortcuts into XHTML <a> tags. 514 628 # 629 if ($this->in_anchor) return $text; 630 $this->in_anchor = true; 631 515 632 # 516 633 # First, handle reference-style links: [link text] [id] … … 541 658 \] 542 659 \( # literal paren 543 [ \t]* 544 <?(.*?)>? # href = $3 545 [ \t]* 546 ( # $4 547 ([\'"]) # quote char = $5 548 (.*?) # Title = $6 549 \5 # matching quote 550 [ \t]* # ignore any spaces/tabs between closing quote and ) 660 [ ]* 661 (?: 662 <(\S*)> # href = $3 663 | 664 ('.$this->nested_url_parenthesis.') # href = $4 665 ) 666 [ ]* 667 ( # $5 668 ([\'"]) # quote char = $6 669 (.*?) # Title = $7 670 \6 # matching quote 671 [ ]* # ignore any spaces/tabs between closing quote and ) 551 672 )? # title is optional 552 673 \) … … 569 690 // array(&$this, '_doAnchors_reference_callback'), $text); 570 691 692 $this->in_anchor = false; 571 693 return $text; 572 694 } … … 598 720 $link_text = $this->runSpanGamut($link_text); 599 721 $result .= ">$link_text</a>"; 600 $result = $this->hash Span($result);722 $result = $this->hashPart($result); 601 723 } 602 724 else { … … 608 730 $whole_match = $matches[1]; 609 731 $link_text = $this->runSpanGamut($matches[2]); 610 $url = $matches[3] ;611 $title =& $matches[ 6];612 732 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 733 $title =& $matches[7]; 734 613 735 $url = $this->encodeAmpsAndAngles($url); 614 736 … … 623 745 $result .= ">$link_text</a>"; 624 746 625 return $this->hash Span($result);747 return $this->hashPart($result); 626 748 } 627 749 … … 662 784 \s? # One optional whitespace character 663 785 \( # literal paren 664 [ \t]* 665 <?(\S+?)>? # src url = $3 666 [ \t]* 667 ( # $4 668 ([\'"]) # quote char = $5 669 (.*?) # title = $6 670 \5 # matching quote 671 [ \t]* 786 [ ]* 787 (?: 788 <(\S*)> # src url = $3 789 | 790 ('.$this->nested_url_parenthesis.') # src url = $4 791 ) 792 [ ]* 793 ( # $5 794 ([\'"]) # quote char = $6 795 (.*?) # title = $7 796 \6 # matching quote 797 [ ]* 672 798 )? # title is optional 673 799 \) … … 696 822 } 697 823 $result .= $this->empty_element_suffix; 698 $result = $this->hash Span($result);824 $result = $this->hashPart($result); 699 825 } 700 826 else { … … 708 834 $whole_match = $matches[1]; 709 835 $alt_text = $matches[2]; 710 $url = $matches[3] ;711 $title =& $matches[ 6];836 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 837 $title =& $matches[7]; 712 838 713 839 $alt_text = str_replace('"', '"', $alt_text); … … 719 845 $result .= $this->empty_element_suffix; 720 846 721 return $this->hash Span($result);847 return $this->hashPart($result); 722 848 } 723 849 … … 731 857 # -------- 732 858 # 733 $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx', 734 array(&$this, '_doHeaders_callback_setext_h1'), $text); 735 $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx', 736 array(&$this, '_doHeaders_callback_setext_h2'), $text); 859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 860 array(&$this, '_doHeaders_callback_setext'), $text); 737 861 738 862 # atx-style headers: … … 745 869 $text = preg_replace_callback('{ 746 870 ^(\#{1,6}) # $1 = string of #\'s 747 [ \t]*871 [ ]* 748 872 (.+?) # $2 = Header text 749 [ \t]*873 [ ]* 750 874 \#* # optional closing #\'s (not counted) 751 875 \n+ … … 755 879 return $text; 756 880 } 757 function _doHeaders_callback_setext_h1($matches) { 758 return $this->hashBlock("<h1>".$this->runSpanGamut($matches[1])."</h1>")."\n\n"; 759 } 760 function _doHeaders_callback_setext_h2($matches) { 761 return $this->hashBlock("<h2>".$this->runSpanGamut($matches[1])."</h2>")."\n\n"; 881 function _doHeaders_callback_setext($matches) { 882 $level = $matches[2]{0} == '=' ? 1 : 2; 883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 884 return "\n" . $this->hashBlock($block) . "\n\n"; 762 885 } 763 886 function _doHeaders_callback_atx($matches) { 764 887 $level = strlen($matches[1]); 765 return $this->hashBlock("<h$level>".$this->runSpanGamut($matches[2])."</h$level>")."\n\n"; 888 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 889 return "\n" . $this->hashBlock($block) . "\n\n"; 766 890 } 767 891 … … 787 911 [ ]{0,'.$less_than_tab.'} 788 912 ('.$marker.') # $3 = first list item marker 789 [ \t]+913 [ ]+ 790 914 ) 791 915 (?s:.+?) … … 796 920 (?=\S) 797 921 (?! # Negative lookahead for another list item marker 798 [ \t]*799 '.$marker.'[ \t]+922 [ ]* 923 '.$marker.'[ ]+ 800 924 ) 801 925 ) … … 835 959 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol ); 836 960 837 # Turn double returns into triple returns, so that we can make a 838 # paragraph for the last item in a list, if necessary: 839 $list = preg_replace("/\n{2,}/", "\n\n\n", $list); 961 $list .= "\n"; 840 962 $result = $this->processListItems($list, $marker_any); 841 963 … … 879 1001 $list_str = preg_replace_callback('{ 880 1002 (\n)? # leading line = $1 881 (^[ \t]*) # leading whitespace = $2882 ('.$marker_any.') [ \t]+ # list marker = $3883 ((?s:.+?) # list item text = $4884 ( \n{1,2}))885 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))1003 (^[ ]*) # leading whitespace = $2 1004 ('.$marker_any.') [ ]+ # list marker = $3 1005 ((?s:.+?)) # list item text = $4 1006 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1007 (?= \n* (\z | \2 ('.$marker_any.') [ ]+)) 886 1008 }xm', 887 1009 array(&$this, '_processListItems_callback'), $list_str); … … 894 1016 $leading_line =& $matches[1]; 895 1017 $leading_space =& $matches[2]; 896 897 if ($leading_line || preg_match('/\n{2,}/', $item)) { 898 $item = $this->runBlockGamut($this->outdent($item)); 1018 $tailing_blank_line =& $matches[5]; 1019 1020 if ($leading_line || $tailing_blank_line || 1021 preg_match('/\n{2,}/', $item)) 1022 { 1023 $item = $this->runBlockGamut($this->outdent($item)."\n"); 899 1024 } 900 1025 else { … … 916 1041 (?:\n\n|\A) 917 1042 ( # $1 = the code block -- one or more lines, starting with a space/tab 918 (? :919 (?:[ ]{'.$this->tab_width.'} | \t)# Lines must start with a tab or a tab-width of spaces1043 (?> 1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 920 1045 .*\n+ 921 1046 )+ … … 930 1055 $codeblock = $matches[1]; 931 1056 932 $codeblock = $this->encodeCode($this->outdent($codeblock)); 933 // $codeblock = $this->detab($codeblock); 934 # trim leading newlines and trailing whitespace 935 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock); 936 937 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n"; 938 939 return $result; 940 } 941 942 943 function doCodeSpans($text) { 944 # 945 # * Backtick quotes are used for <code></code> spans. 946 # 947 # * You can use multiple backticks as the delimiters if you want to 948 # include literal backticks in the code span. So, this input: 949 # 950 # Just type ``foo `bar` baz`` at the prompt. 951 # 952 # Will translate to: 953 # 954 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> 955 # 956 # There's no arbitrary limit to the number of backticks you 957 # can use as delimters. If you need three consecutive backticks 958 # in your code, use four for delimiters, etc. 959 # 960 # * You can use spaces to get literal backticks at the edges: 961 # 962 # ... type `` `bar` `` ... 963 # 964 # Turns to: 965 # 966 # ... type <code>`bar`</code> ... 967 # 968 $text = preg_replace_callback('@ 969 (?<!\\\) # Character before opening ` can\'t be a backslash 970 (`+) # $1 = Opening run of ` 971 (.+?) # $2 = The code block 972 (?<!`) 973 \1 # Matching closer 974 (?!`) 975 @xs', 976 array(&$this, '_doCodeSpans_callback'), $text); 977 978 return $text; 979 } 980 function _doCodeSpans_callback($matches) { 981 $c = $matches[2]; 982 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace 983 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace 984 $c = $this->encodeCode($c); 985 return $this->hashSpan("<code>$c</code>"); 986 } 987 988 989 function encodeCode($_) { 990 # 991 # Encode/escape certain characters inside Markdown code runs. 992 # The point is that in code, these characters are literals, 993 # and lose their special Markdown meanings. 994 # 995 # Encode all ampersands; HTML entities are not 996 # entities within a Markdown code span. 997 $_ = str_replace('&', '&', $_); 998 999 # Do the angle bracket song and dance: 1000 $_ = str_replace(array('<', '>'), 1001 array('<', '>'), $_); 1002 1003 # Now, escape characters that are magic in Markdown: 1004 // $_ = str_replace(array_keys($this->escape_table), 1005 // array_values($this->escape_table), $_); 1006 1007 return $_; 1057 $codeblock = $this->outdent($codeblock); 1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1059 1060 # trim leading newlines and trailing newlines 1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1062 1063 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1064 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1065 } 1066 1067 1068 function makeCodeSpan($code) { 1069 # 1070 # Create a code span markup for $code. Called from handleSpanToken. 1071 # 1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1073 return $this->hashPart("<code>$code</code>"); 1008 1074 } 1009 1075 … … 1020 1086 (?!\1\1) # or two others marker chars. 1021 1087 ( # $2: Content 1022 (? :1088 (?> 1023 1089 [^*_]+? # Anthing not em markers. 1024 1090 | … … 1026 1092 \1 (?=\S) .+? (?<=\S) \1 1027 1093 | 1028 (?! \1 ) .# Allow unbalenced * and _.1094 . # Allow unbalenced * and _. 1029 1095 )+? 1030 1096 ) … … 1034 1100 # Then <em>: 1035 1101 $text = preg_replace_callback( 1036 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',1102 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx', 1037 1103 array(&$this, '_doItalicAndBold_em_callback'), $text); 1038 1104 … … 1042 1108 $text = $matches[2]; 1043 1109 $text = $this->runSpanGamut($text); 1044 return $this->hash Span("<em>$text</em>");1110 return $this->hashPart("<em>$text</em>"); 1045 1111 } 1046 1112 function _doItalicAndBold_strong_callback($matches) { 1047 1113 $text = $matches[2]; 1048 1114 $text = $this->runSpanGamut($text); 1049 return $this->hash Span("<strong>$text</strong>");1115 return $this->hashPart("<strong>$text</strong>"); 1050 1116 } 1051 1117 … … 1054 1120 $text = preg_replace_callback('/ 1055 1121 ( # Wrap whole match in $1 1056 ( 1057 ^[ \t]*>[ \t]? # ">" at the start of a line1122 (?> 1123 ^[ ]*>[ ]? # ">" at the start of a line 1058 1124 .+\n # rest of the first line 1059 1125 (.+\n)* # subsequent consecutive lines … … 1069 1135 $bq = $matches[1]; 1070 1136 # trim one level of quoting - trim whitespace-only lines 1071 $bq = preg_replace( array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1072 1138 $bq = $this->runBlockGamut($bq); # recurse 1073 1139 … … 1078 1144 array(&$this, '_DoBlockQuotes_callback2'), $bq); 1079 1145 1080 return $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";1146 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1081 1147 } 1082 1148 function _doBlockQuotes_callback2($matches) { … … 1093 1159 # 1094 1160 # Strip leading and trailing lines: 1095 $text = preg_replace( array('/\A\n+/', '/\n+\z/'), '', $text);1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1096 1162 1097 1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1098 1164 1099 1165 # 1100 # Wrap <p> tags .1166 # Wrap <p> tags and unhashify HTML blocks 1101 1167 # 1102 1168 foreach ($grafs as $key => $value) { 1103 if (!isset( $this->html_blocks[$value] )) { 1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1170 # Is a paragraph. 1104 1171 $value = $this->runSpanGamut($value); 1105 $value = preg_replace('/^([ \t]*)/', "<p>", $value);1172 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1106 1173 $value .= "</p>"; 1107 1174 $grafs[$key] = $this->unhash($value); 1108 1175 } 1109 } 1110 1111 # 1112 # Unhashify HTML blocks 1113 # 1114 foreach ($grafs as $key => $graf) { 1115 # Modify elements of @grafs in-place... 1116 if (isset($this->html_blocks[$graf])) { 1117 $block = $this->html_blocks[$graf]; 1176 else { 1177 # Is a block. 1178 # Modify elements of @grafs in-place... 1179 $graf = $value; 1180 $block = $this->html_hashes[$graf]; 1118 1181 $graf = $block; 1119 1182 // if (preg_match('{ … … 1162 1225 function encodeAmpsAndAngles($text) { 1163 1226 # Smart processing for ampersands and angle brackets that need to be encoded. 1227 if ($this->no_entities) { 1228 $text = str_replace('&', '&', $text); 1229 $text = str_replace('<', '<', $text); 1230 return $text; 1231 } 1164 1232 1165 1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: … … 1169 1237 1170 1238 # Encode naked <'s 1171 $text = preg_replace('{<(?![a-z/?\$! ])}i', '<', $text);1239 $text = preg_replace('{<(?![a-z/?\$!%])}i', '<', $text); 1172 1240 1173 1241 return $text; … … 1175 1243 1176 1244 1177 function encodeBackslashEscapes($text) {1178 #1179 # Parameter: String.1180 # Returns: The string, with after processing the following backslash1181 # escape sequences.1182 #1183 # Must process escaped backslashes first.1184 return str_replace(array_keys($this->backslash_escape_table),1185 array_values($this->backslash_escape_table), $text);1186 }1187 1188 1189 1245 function doAutoLinks($text) { 1190 $text = preg_replace ('{<((https?|ftp|dict):[^\'">\s]+)>}',1191 '<a href="\1">\1</a>', $text);1246 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}', 1247 array(&$this, '_doAutoLinks_url_callback'), $text); 1192 1248 1193 1249 # Email addresses: <address@domain.foo> … … 1202 1258 > 1203 1259 }xi', 1204 array(&$this, '_doAutoLinks_ callback'), $text);1260 array(&$this, '_doAutoLinks_email_callback'), $text); 1205 1261 1206 1262 return $text; 1207 1263 } 1208 function _doAutoLinks_callback($matches) { 1264 function _doAutoLinks_url_callback($matches) { 1265 $url = $this->encodeAmpsAndAngles($matches[1]); 1266 $link = "<a href=\"$url\">$url</a>"; 1267 return $this->hashPart($link); 1268 } 1269 function _doAutoLinks_email_callback($matches) { 1209 1270 $address = $matches[1]; 1210 $address = $this->unescapeSpecialChars($address); 1211 $address = $this->encodeEmailAddress($address); 1212 return $this->hashSpan($address); 1271 $link = $this->encodeEmailAddress($address); 1272 return $this->hashPart($link); 1213 1273 } 1214 1274 … … 1255 1315 1256 1316 1257 function unescapeSpecialChars($text) { 1258 # 1259 # Swap back in all the special characters we've hidden. 1260 # 1261 return str_replace(array_values($this->escape_table), 1262 array_keys($this->escape_table), $text); 1263 } 1264 1265 1266 function tokenizeHTML($str) { 1267 # 1268 # Parameter: String containing HTML + Markdown markup. 1269 # Returns: An array of the tokens comprising the input 1270 # string. Each token is either a tag or a run of text 1271 # between tags. Each element of the array is a 1272 # two-element array; the first is either 'tag' or 'text'; 1273 # the second is the actual value. 1274 # Note: Markdown code spans are taken into account: no tag token is 1275 # generated within a code span. 1276 # 1277 $tokens = array(); 1278 1279 while ($str != "") { 1280 # 1281 # Each loop iteration seach for either the next tag or the next 1282 # openning code span marker. If a code span marker is found, the 1283 # code span is extracted in entierty and will result in an extra 1284 # text token. 1285 # 1286 $parts = preg_split('{ 1317 function parseSpan($str) { 1318 # 1319 # Take the string $str and parse it into tokens, hashing embeded HTML, 1320 # escaped characters and handling code spans. 1321 # 1322 $output = ''; 1323 1324 $regex = '{ 1287 1325 ( 1326 \\\\['.preg_quote($this->escape_chars).'] 1327 | 1288 1328 (?<![`\\\\]) 1289 1329 `+ # code span marker 1330 '.( $this->no_markup ? '' : ' 1290 1331 | 1291 1332 <!-- .*? --> # comment … … 1294 1335 | 1295 1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags 1296 (? :1337 (?> 1297 1338 \s 1298 1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1299 1340 )? 1300 1341 > 1342 ').' 1301 1343 ) 1302 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1344 }xs'; 1345 1346 while (1) { 1347 # 1348 # Each loop iteration seach for either the next tag, the next 1349 # openning code span marker, or the next escaped character. 1350 # Each token is then passed to handleSpanToken. 1351 # 1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1303 1353 1304 1354 # Create token from text preceding tag. 1305 1355 if ($parts[0] != "") { 1306 $ tokens[] = array('text', $parts[0]);1356 $output .= $parts[0]; 1307 1357 } 1308 1358 1309 1359 # Check if we reach the end. 1310 if (count($parts) < 3) { 1360 if (isset($parts[1])) { 1361 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1362 $str = $parts[2]; 1363 } 1364 else { 1311 1365 break; 1312 1366 } 1313 1314 # Create token from tag or code span. 1315 if ($parts[1]{0} == "`") { 1316 $tokens[] = array('text', $parts[1]); 1317 $str = $parts[2]; 1318 1319 # Skip the whole code span, pass as text token. 1320 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/', 1367 } 1368 1369 return $output; 1370 } 1371 1372 1373 function handleSpanToken($token, &$str) { 1374 # 1375 # Handle $token provided by parseSpan by determining its nature and 1376 # returning the corresponding value that should replace it. 1377 # 1378 switch ($token{0}) { 1379 case "\\": 1380 return $this->hashPart("&#". ord($token{1}). ";"); 1381 case "`": 1382 # Search for end marker in remaining text. 1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', 1321 1384 $str, $matches)) 1322 1385 { 1323 $tokens[] = array('text', $matches[1]);1324 1386 $str = $matches[2]; 1387 $codespan = $this->makeCodeSpan($matches[1]); 1388 return $this->hashPart($codespan); 1325 1389 } 1326 } else { 1327 $tokens[] = array('tag', $parts[1]); 1328 $str = $parts[2]; 1329 } 1330 } 1331 1332 return $tokens; 1390 return $token; // return as text since no ending marker found. 1391 default: 1392 return $this->hashPart($token); 1393 } 1333 1394 } 1334 1395 … … 1338 1399 # Remove one level of line-leading tabs or spaces 1339 1400 # 1340 return preg_replace( "/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);1401 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1341 1402 } 1342 1403 … … 1354 1415 # appropriate number of space between each blocks. 1355 1416 1356 $strlen = $this->utf8_strlen; # best strlen function for UTF-8. 1357 $lines = explode("\n", $text); 1358 $text = ""; 1359 1360 foreach ($lines as $line) { 1361 # Split in blocks. 1362 $blocks = explode("\t", $line); 1363 # Add each blocks to the line. 1364 $line = $blocks[0]; 1365 unset($blocks[0]); # Do not add first block twice. 1366 foreach ($blocks as $block) { 1367 # Calculate amount of space, insert spaces, insert block. 1368 $amount = $this->tab_width - 1369 $strlen($line, 'UTF-8') % $this->tab_width; 1370 $line .= str_repeat(" ", $amount) . $block; 1371 } 1372 $text .= "$line\n"; 1373 } 1417 $text = preg_replace_callback('/^.*\t.*$/m', 1418 array(&$this, '_detab_callback'), $text); 1419 1374 1420 return $text; 1375 1421 } 1422 function _detab_callback($matches) { 1423 $line = $matches[0]; 1424 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1425 1426 # Split in blocks. 1427 $blocks = explode("\t", $line); 1428 # Add each blocks to the line. 1429 $line = $blocks[0]; 1430 unset($blocks[0]); # Do not add first block twice. 1431 foreach ($blocks as $block) { 1432 # Calculate amount of space, insert spaces, insert block. 1433 $amount = $this->tab_width - 1434 $strlen($line, 'UTF-8') % $this->tab_width; 1435 $line .= str_repeat(" ", $amount) . $block; 1436 } 1437 return $line; 1438 } 1376 1439 function _initDetab() { 1377 1440 # 1378 1441 # Check for the availability of the function in the `utf8_strlen` property 1379 # ( probably `mb_strlen`). If the function is not available, create a1442 # (initially `mb_strlen`). If the function is not available, create a 1380 1443 # function that will loosely count the number of UTF-8 characters with a 1381 1444 # regular expression. 1382 1445 # 1383 1446 if (function_exists($this->utf8_strlen)) return; 1384 $this->utf8_strlen = 'Markdown_UTF8_strlen'; 1385 1386 if (function_exists($this->utf8_strlen)) return; 1387 function Markdown_UTF8_strlen($text) { 1388 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', 1389 $text, $m); 1390 } 1447 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1448 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1449 $text, $m);'); 1391 1450 } 1392 1451 … … 1396 1455 # Swap back in all the tags hashed by _HashHTMLBlocks. 1397 1456 # 1398 return str_replace(array_keys($this->html_hashes), 1399 array_values($this->html_hashes), $text); 1457 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1458 array(&$this, '_unhash_callback'), $text); 1459 } 1460 function _unhash_callback($matches) { 1461 return $this->html_hashes[$matches[0]]; 1400 1462 } 1401 1463 … … 1403 1465 1404 1466 1467 # 1468 # Markdown Extra Parser Class 1469 # 1470 1471 class MarkdownExtra_Parser extends Markdown_Parser { 1472 1473 # Prefix for footnote ids. 1474 var $fn_id_prefix = ""; 1475 1476 # Optional title attribute for footnote links and backlinks. 1477 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 1478 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1479 1480 # Optional class attribute for footnote links and backlinks. 1481 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 1482 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1483 1484 1485 function MarkdownExtra_Parser() { 1486 # 1487 # Constructor function. Initialize the parser object. 1488 # 1489 # Add extra escapable characters before parent constructor 1490 # initialize the table. 1491 $this->escape_chars .= ':|'; 1492 1493 # Insert extra document, block, and span transformations. 1494 # Parent constructor will do the sorting. 1495 $this->document_gamut += array( 1496 "stripFootnotes" => 15, 1497 "stripAbbreviations" => 25, 1498 "appendFootnotes" => 50, 1499 ); 1500 $this->block_gamut += array( 1501 "doTables" => 15, 1502 "doDefLists" => 45, 1503 ); 1504 $this->span_gamut += array( 1505 "doFootnotes" => 5, 1506 "doAbbreviations" => 70, 1507 ); 1508 1509 parent::Markdown_Parser(); 1510 } 1511 1512 1513 # Extra hashes used during extra transformations. 1514 var $footnotes = array(); 1515 var $footnotes_ordered = array(); 1516 var $abbr_desciptions = array(); 1517 var $abbr_matches = array(); 1518 1519 # Status flag to avoid invalid nesting. 1520 var $in_footnote = false; 1521 1522 1523 function transform($text) { 1524 # 1525 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before 1526 # blank line stripping and added extra parameter to `runBlockGamut`. 1527 # 1528 # Clear the global hashes. If we don't clear these, you get conflicts 1529 # from other articles when generating a page which contains more than 1530 # one article (e.g. an index page that shows the N most recent 1531 # articles): 1532 $this->footnotes = array(); 1533 $this->footnotes_ordered = array(); 1534 $this->abbr_desciptions = array(); 1535 $this->abbr_matches = array(); 1536 1537 return parent::transform($text); 1538 } 1539 1540 1541 ### HTML Block Parser ### 1542 1543 # Tags that are always treated as block tags: 1544 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 1545 1546 # Tags treated as block tags only if the opening tag is alone on it's line: 1547 var $context_block_tags = 'script|noscript|math|ins|del'; 1548 1549 # Tags where markdown="1" default to span mode: 1550 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1551 1552 # Tags which must not have their contents modified, no matter where 1553 # they appear: 1554 var $clean_tags = 'script|math'; 1555 1556 # Tags that do not need to be closed. 1557 var $auto_close_tags = 'hr|img'; 1558 1559 1560 function hashHTMLBlocks($text) { 1561 # 1562 # Hashify HTML Blocks and "clean tags". 1563 # 1564 # We only want to do this for block-level HTML tags, such as headers, 1565 # lists, and tables. That's because we still want to wrap <p>s around 1566 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1567 # phrase emphasis, and spans. The list of tags we're looking for is 1568 # hard-coded. 1569 # 1570 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1571 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1572 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 1573 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1574 # These two functions are calling each other. It's recursive! 1575 # 1576 # 1577 # Call the HTML-in-Markdown hasher. 1578 # 1579 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1580 1581 return $text; 1582 } 1583 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1584 $enclosing_tag = '', $span = false) 1585 { 1586 # 1587 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1588 # 1589 # * $indent is the number of space to be ignored when checking for code 1590 # blocks. This is important because if we don't take the indent into 1591 # account, something like this (which looks right) won't work as expected: 1592 # 1593 # <div> 1594 # <div markdown="1"> 1595 # Hello World. <-- Is this a Markdown code block or text? 1596 # </div> <-- Is this a Markdown code block or a real tag? 1597 # <div> 1598 # 1599 # If you don't like this, just don't indent the tag on which 1600 # you apply the markdown="1" attribute. 1601 # 1602 # * If $enclosing_tag is not empty, stops at the first unmatched closing 1603 # tag with that name. Nested tags supported. 1604 # 1605 # * If $span is true, text inside must treated as span. So any double 1606 # newline will be replaced by a single newline so that it does not create 1607 # paragraphs. 1608 # 1609 # Returns an array of that form: ( processed text , remaining text ) 1610 # 1611 if ($text === '') return array('', ''); 1612 1613 # Regex to check for the presense of newlines around a block tag. 1614 $newline_match_before = '/(?:^\n?|\n\n)*$/'; 1615 $newline_match_after = 1616 '{ 1617 ^ # Start of text following the tag. 1618 (?:[ ]*<!--.*?-->)? # Optional comment. 1619 [ ]*\n # Must be followed by newline. 1620 }xs'; 1621 1622 # Regex to match any tag. 1623 $block_tag_match = 1624 '{ 1625 ( # $2: Capture hole tag. 1626 </? # Any opening or closing tag. 1627 (?: # Tag name. 1628 '.$this->block_tags.' | 1629 '.$this->context_block_tags.' | 1630 '.$this->clean_tags.' | 1631 (?!\s)'.$enclosing_tag.' 1632 ) 1633 \s* # Whitespace. 1634 (?> 1635 ".*?" | # Double quotes (can contain `>`) 1636 \'.*?\' | # Single quotes (can contain `>`) 1637 .+? # Anything but quotes and `>`. 1638 )*? 1639 > # End of tag. 1640 | 1641 <!-- .*? --> # HTML Comment 1642 | 1643 <\?.*?\?> | <%.*?%> # Processing instruction 1644 | 1645 <!\[CDATA\[.*?\]\]> # CData Block 1646 ) 1647 }xs'; 1648 1649 1650 $depth = 0; # Current depth inside the tag tree. 1651 $parsed = ""; # Parsed text that will be returned. 1652 1653 # 1654 # Loop through every tag until we find the closing tag of the parent 1655 # or loop until reaching the end of text if no parent tag specified. 1656 # 1657 do { 1658 # 1659 # Split the text using the first $tag_match pattern found. 1660 # Text before pattern will be first in the array, text after 1661 # pattern will be at the end, and between will be any catches made 1662 # by the pattern. 1663 # 1664 $parts = preg_split($block_tag_match, $text, 2, 1665 PREG_SPLIT_DELIM_CAPTURE); 1666 1667 # If in Markdown span mode, add a empty-string span-level hash 1668 # after each newline to prevent triggering any block element. 1669 if ($span) { 1670 $void = $this->hashPart("", ':'); 1671 $newline = "$void\n"; 1672 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1673 } 1674 1675 $parsed .= $parts[0]; # Text before current tag. 1676 1677 # If end of $text has been reached. Stop loop. 1678 if (count($parts) < 3) { 1679 $text = ""; 1680 break; 1681 } 1682 1683 $tag = $parts[1]; # Tag to handle. 1684 $text = $parts[2]; # Remaining text after current tag. 1685 1686 # 1687 # Check for: Tag inside code block or span 1688 # 1689 if (# Find current paragraph 1690 preg_match('/(?>^\n?|\n\n)((?>.+\n?)*?)$/', $parsed, $matches) && 1691 ( 1692 # Then match in it either a code block... 1693 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'. 1694 '(?!\n)$/', $matches[1], $x) || 1695 # ...or unbalenced code span markers. (the regex matches balenced) 1696 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s', 1697 $matches[1]) 1698 )) 1699 { 1700 # Tag is in code block or span and may not be a tag at all. So we 1701 # simply skip the first char (should be a `<`). 1702 $parsed .= $tag{0}; 1703 $text = substr($tag, 1) . $text; # Put back $tag minus first char. 1704 } 1705 # 1706 # Check for: Opening Block level tag or 1707 # Opening Content Block tag (like ins and del) 1708 # used as a block tag (tag is alone on it's line). 1709 # 1710 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) || 1711 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) && 1712 preg_match($newline_match_before, $parsed) && 1713 preg_match($newline_match_after, $text) ) 1714 ) 1715 { 1716 # Need to parse tag and following text using the HTML parser. 1717 list($block_text, $text) = 1718 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1719 1720 # Make sure it stays outside of any paragraph by adding newlines. 1721 $parsed .= "\n\n$block_text\n\n"; 1722 } 1723 # 1724 # Check for: Clean tag (like script, math) 1725 # HTML Comments, processing instructions. 1726 # 1727 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) || 1728 $tag{1} == '!' || $tag{1} == '?') 1729 { 1730 # Need to parse tag and following text using the HTML parser. 1731 # (don't check for markdown attribute) 1732 list($block_text, $text) = 1733 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 1734 1735 $parsed .= $block_text; 1736 } 1737 # 1738 # Check for: Tag with same name as enclosing tag. 1739 # 1740 else if ($enclosing_tag !== '' && 1741 # Same name as enclosing tag. 1742 preg_match("{^</?(?:$enclosing_tag)\b}", $tag)) 1743 { 1744 # 1745 # Increase/decrease nested tag count. 1746 # 1747 if ($tag{1} == '/') $depth--; 1748 else if ($tag{strlen($tag)-2} != '/') $depth++; 1749 1750 if ($depth < 0) { 1751 # 1752 # Going out of parent element. Clean up and break so we 1753 # return to the calling function. 1754 # 1755 $text = $tag . $text; 1756 break; 1757 } 1758 1759 $parsed .= $tag; 1760 } 1761 else { 1762 $parsed .= $tag; 1763 } 1764 } while ($depth >= 0); 1765 1766 return array($parsed, $text); 1767 } 1768 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 1769 # 1770 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 1771 # 1772 # * Calls $hash_method to convert any blocks. 1773 # * Stops when the first opening tag closes. 1774 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 1775 # (it is not inside clean tags) 1776 # 1777 # Returns an array of that form: ( processed text , remaining text ) 1778 # 1779 if ($text === '') return array('', ''); 1780 1781 # Regex to match `markdown` attribute inside of a tag. 1782 $markdown_attr_match = ' 1783 { 1784 \s* # Eat whitespace before the `markdown` attribute 1785 markdown 1786 \s*=\s* 1787 (?: 1788 (["\']) # $1: quote delimiter 1789 (.*?) # $2: attribute value 1790 \1 # matching delimiter 1791 | 1792 ([^\s>]*) # $3: unquoted attribute value 1793 ) 1794 () # $4: make $3 always defined (avoid warnings) 1795 }xs'; 1796 1797 # Regex to match any tag. 1798 $tag_match = '{ 1799 ( # $2: Capture hole tag. 1800 </? # Any opening or closing tag. 1801 [\w:$]+ # Tag name. 1802 \s* # Whitespace. 1803 (?> 1804 ".*?" | # Double quotes (can contain `>`) 1805 \'.*?\' | # Single quotes (can contain `>`) 1806 .+? # Anything but quotes and `>`. 1807 )*? 1808 > # End of tag. 1809 | 1810 <!-- .*? --> # HTML Comment 1811 | 1812 <\?.*?\?> | <%.*?%> # Processing instruction 1813 | 1814 <!\[CDATA\[.*?\]\]> # CData Block 1815 ) 1816 }xs'; 1817 1818 $original_text = $text; # Save original text in case of faliure. 1819 1820 $depth = 0; # Current depth inside the tag tree. 1821 $block_text = ""; # Temporary text holder for current text. 1822 $parsed = ""; # Parsed text that will be returned. 1823 1824 # 1825 # Get the name of the starting tag. 1826 # 1827 if (preg_match("/^<([\w:$]*)\b/", $text, $matches)) 1828 $base_tag_name = $matches[1]; 1829 1830 # 1831 # Loop through every tag until we find the corresponding closing tag. 1832 # 1833 do { 1834 # 1835 # Split the text using the first $tag_match pattern found. 1836 # Text before pattern will be first in the array, text after 1837 # pattern will be at the end, and between will be any catches made 1838 # by the pattern. 1839 # 1840 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1841 1842 if (count($parts) < 3) { 1843 # 1844 # End of $text reached with unbalenced tag(s). 1845 # In that case, we return original text unchanged and pass the 1846 # first character as filtered to prevent an infinite loop in the 1847 # parent function. 1848 # 1849 return array($original_text{0}, substr($original_text, 1)); 1850 } 1851 1852 $block_text .= $parts[0]; # Text before current tag. 1853 $tag = $parts[1]; # Tag to handle. 1854 $text = $parts[2]; # Remaining text after current tag. 1855 1856 # 1857 # Check for: Auto-close tag (like <hr/>) 1858 # Comments and Processing Instructions. 1859 # 1860 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) || 1861 $tag{1} == '!' || $tag{1} == '?') 1862 { 1863 # Just add the tag to the block as if it was text. 1864 $block_text .= $tag; 1865 } 1866 else { 1867 # 1868 # Increase/decrease nested tag count. Only do so if 1869 # the tag's name match base tag's. 1870 # 1871 if (preg_match("{^</?$base_tag_name\b}", $tag)) { 1872 if ($tag{1} == '/') $depth--; 1873 else if ($tag{strlen($tag)-2} != '/') $depth++; 1874 } 1875 1876 # 1877 # Check for `markdown="1"` attribute and handle it. 1878 # 1879 if ($md_attr && 1880 preg_match($markdown_attr_match, $tag, $attr_m) && 1881 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 1882 { 1883 # Remove `markdown` attribute from opening tag. 1884 $tag = preg_replace($markdown_attr_match, '', $tag); 1885 1886 # Check if text inside this tag must be parsed in span mode. 1887 $this->mode = $attr_m[2] . $attr_m[3]; 1888 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 1889 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag); 1890 1891 # Calculate indent before tag. 1892 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches); 1893 $indent = strlen($matches[1]); 1894 1895 # End preceding block with this tag. 1896 $block_text .= $tag; 1897 $parsed .= $this->$hash_method($block_text); 1898 1899 # Get enclosing tag name for the ParseMarkdown function. 1900 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 1901 $tag_name = $matches[1]; 1902 1903 # Parse the content using the HTML-in-Markdown parser. 1904 list ($block_text, $text) 1905 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 1906 $tag_name, $span_mode); 1907 1908 # Outdent markdown text. 1909 if ($indent > 0) { 1910 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 1911 $block_text); 1912 } 1913 1914 # Append tag content to parsed text. 1915 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 1916 else $parsed .= "$block_text"; 1917 1918 # Start over a new block. 1919 $block_text = ""; 1920 } 1921 else $block_text .= $tag; 1922 } 1923 1924 } while ($depth > 0); 1925 1926 # 1927 # Hash last block text that wasn't processed inside the loop. 1928 # 1929 $parsed .= $this->$hash_method($block_text); 1930 1931 return array($parsed, $text); 1932 } 1933 1934 1935 function hashClean($text) { 1936 # 1937 # Called whenever a tag must be hashed when a function insert a "clean" tag 1938 # in $text, it pass through this function and is automaticaly escaped, 1939 # blocking invalid nested overlap. 1940 # 1941 return $this->hashPart($text, 'C'); 1942 } 1943 1944 1945 function doHeaders($text) { 1946 # 1947 # Redefined to add id attribute support. 1948 # 1949 # Setext-style headers: 1950 # Header 1 {#header1} 1951 # ======== 1952 # 1953 # Header 2 {#header2} 1954 # -------- 1955 # 1956 $text = preg_replace_callback( 1957 '{ 1958 (^.+?) # $1: Header text 1959 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 1960 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1961 }mx', 1962 array(&$this, '_doHeaders_callback_setext'), $text); 1963 1964 # atx-style headers: 1965 # # Header 1 {#header1} 1966 # ## Header 2 {#header2} 1967 # ## Header 2 with closing hashes ## {#header3} 1968 # ... 1969 # ###### Header 6 {#header2} 1970 # 1971 $text = preg_replace_callback('{ 1972 ^(\#{1,6}) # $1 = string of #\'s 1973 [ ]* 1974 (.+?) # $2 = Header text 1975 [ ]* 1976 \#* # optional closing #\'s (not counted) 1977 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 1978 [ ]* 1979 \n+ 1980 }xm', 1981 array(&$this, '_doHeaders_callback_atx'), $text); 1982 1983 return $text; 1984 } 1985 function _doHeaders_attr($attr) { 1986 if (empty($attr)) return ""; 1987 return " id=\"$attr\""; 1988 } 1989 function _doHeaders_callback_setext($matches) { 1990 $level = $matches[3]{0} == '=' ? 1 : 2; 1991 $attr = $this->_doHeaders_attr($id =& $matches[2]); 1992 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 1993 return "\n" . $this->hashBlock($block) . "\n\n"; 1994 } 1995 function _doHeaders_callback_atx($matches) { 1996 $level = strlen($matches[1]); 1997 $attr = $this->_doHeaders_attr($id =& $matches[3]); 1998 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 1999 return "\n" . $this->hashBlock($block) . "\n\n"; 2000 } 2001 2002 2003 function doTables($text) { 2004 # 2005 # Form HTML tables. 2006 # 2007 $less_than_tab = $this->tab_width - 1; 2008 # 2009 # Find tables with leading pipe. 2010 # 2011 # | Header 1 | Header 2 2012 # | -------- | -------- 2013 # | Cell 1 | Cell 2 2014 # | Cell 3 | Cell 4 2015 # 2016 $text = preg_replace_callback(' 2017 { 2018 ^ # Start of a line 2019 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2020 [|] # Optional leading pipe (present) 2021 (.+) \n # $1: Header row (at least one pipe) 2022 2023 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2024 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2025 2026 ( # $3: Cells 2027 (?> 2028 [ ]* # Allowed whitespace. 2029 [|] .* \n # Row content. 2030 )* 2031 ) 2032 (?=\n|\Z) # Stop at final double newline. 2033 }xm', 2034 array(&$this, '_doTable_leadingPipe_callback'), $text); 2035 2036 # 2037 # Find tables without leading pipe. 2038 # 2039 # Header 1 | Header 2 2040 # -------- | -------- 2041 # Cell 1 | Cell 2 2042 # Cell 3 | Cell 4 2043 # 2044 $text = preg_replace_callback(' 2045 { 2046 ^ # Start of a line 2047 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2048 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2049 2050 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2051 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2052 2053 ( # $3: Cells 2054 (?> 2055 .* [|] .* \n # Row content 2056 )* 2057 ) 2058 (?=\n|\Z) # Stop at final double newline. 2059 }xm', 2060 array(&$this, '_DoTable_callback'), $text); 2061 2062 return $text; 2063 } 2064 function _doTable_leadingPipe_callback($matches) { 2065 $head = $matches[1]; 2066 $underline = $matches[2]; 2067 $content = $matches[3]; 2068 2069 # Remove leading pipe for each row. 2070 $content = preg_replace('/^ *[|]/m', '', $content); 2071 2072 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2073 } 2074 function _doTable_callback($matches) { 2075 $head = $matches[1]; 2076 $underline = $matches[2]; 2077 $content = $matches[3]; 2078 2079 # Remove any tailing pipes for each line. 2080 $head = preg_replace('/[|] *$/m', '', $head); 2081 $underline = preg_replace('/[|] *$/m', '', $underline); 2082 $content = preg_replace('/[|] *$/m', '', $content); 2083 2084 # Reading alignement from header underline. 2085 $separators = preg_split('/ *[|] */', $underline); 2086 foreach ($separators as $n => $s) { 2087 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 2088 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 2089 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 2090 else $attr[$n] = ''; 2091 } 2092 2093 # Parsing span elements, including code spans, character escapes, 2094 # and inline HTML tags, so that pipes inside those gets ignored. 2095 $head = $this->parseSpan($head); 2096 $headers = preg_split('/ *[|] */', $head); 2097 $col_count = count($headers); 2098 2099 # Write column headers. 2100 $text = "<table>\n"; 2101 $text .= "<thead>\n"; 2102 $text .= "<tr>\n"; 2103 foreach ($headers as $n => $header) 2104 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2105 $text .= "</tr>\n"; 2106 $text .= "</thead>\n"; 2107 2108 # Split content by row. 2109 $rows = explode("\n", trim($content, "\n")); 2110 2111 $text .= "<tbody>\n"; 2112 foreach ($rows as $row) { 2113 # Parsing span elements, including code spans, character escapes, 2114 # and inline HTML tags, so that pipes inside those gets ignored. 2115 $row = $this->parseSpan($row); 2116 2117 # Split row by cell. 2118 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2119 $row_cells = array_pad($row_cells, $col_count, ''); 2120 2121 $text .= "<tr>\n"; 2122 foreach ($row_cells as $n => $cell) 2123 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2124 $text .= "</tr>\n"; 2125 } 2126 $text .= "</tbody>\n"; 2127 $text .= "</table>"; 2128 2129 return $this->hashBlock($text) . "\n"; 2130 } 2131 2132 2133 function doDefLists($text) { 2134 # 2135 # Form HTML definition lists. 2136 # 2137 $less_than_tab = $this->tab_width - 1; 2138 2139 # Re-usable pattern to match any entire dl list: 2140 $whole_list = '(?> 2141 ( # $1 = whole list 2142 ( # $2 2143 [ ]{0,'.$less_than_tab.'} 2144 ((?>.*\S.*\n)+) # $3 = defined term 2145 \n? 2146 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2147 ) 2148 (?s:.+?) 2149 ( # $4 2150 \z 2151 | 2152 \n{2,} 2153 (?=\S) 2154 (?! # Negative lookahead for another term 2155 [ ]{0,'.$less_than_tab.'} 2156 (?: \S.*\n )+? # defined term 2157 \n? 2158 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2159 ) 2160 (?! # Negative lookahead for another definition 2161 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2162 ) 2163 ) 2164 ) 2165 )'; // mx 2166 2167 $text = preg_replace_callback('{ 2168 (?:(?<=\n\n)|\A\n?) 2169 '.$whole_list.' 2170 }mx', 2171 array(&$this, '_doDefLists_callback'), $text); 2172 2173 return $text; 2174 } 2175 function _doDefLists_callback($matches) { 2176 # Re-usable patterns to match list item bullets and number markers: 2177 $list = $matches[1]; 2178 2179 # Turn double returns into triple returns, so that we can make a 2180 # paragraph for the last item in a list, if necessary: 2181 $result = trim($this->processDefListItems($list)); 2182 $result = "<dl>\n" . $result . "\n</dl>"; 2183 return $this->hashBlock($result) . "\n\n"; 2184 } 2185 2186 2187 function processDefListItems($list_str) { 2188 # 2189 # Process the contents of a single definition list, splitting it 2190 # into individual term and definition list items. 2191 # 2192 $less_than_tab = $this->tab_width - 1; 2193 2194 # trim trailing blank lines: 2195 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2196 2197 # Process definition terms. 2198 $list_str = preg_replace_callback('{ 2199 (?:\n\n+|\A\n?) # leading line 2200 ( # definition terms = $1 2201 [ ]{0,'.$less_than_tab.'} # leading whitespace 2202 (?![:][ ]|[ ]) # negative lookahead for a definition 2203 # mark (colon) or more whitespace. 2204 (?: \S.* \n)+? # actual term (not whitespace). 2205 ) 2206 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2207 # with a definition mark. 2208 }xm', 2209 array(&$this, '_processDefListItems_callback_dt'), $list_str); 2210 2211 # Process actual definitions. 2212 $list_str = preg_replace_callback('{ 2213 \n(\n+)? # leading line = $1 2214 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2215 [:][ ]+ # definition mark (colon) 2216 ((?s:.+?)) # definition text = $2 2217 (?= \n+ # stop at next definition mark, 2218 (?: # next term or end of text 2219 [ ]{0,'.$less_than_tab.'} [:][ ] | 2220 <dt> | \z 2221 ) 2222 ) 2223 }xm', 2224 array(&$this, '_processDefListItems_callback_dd'), $list_str); 2225 2226 return $list_str; 2227 } 2228 function _processDefListItems_callback_dt($matches) { 2229 $terms = explode("\n", trim($matches[1])); 2230 $text = ''; 2231 foreach ($terms as $term) { 2232 $term = $this->runSpanGamut(trim($term)); 2233 $text .= "\n<dt>" . $term . "</dt>"; 2234 } 2235 return $text . "\n"; 2236 } 2237 function _processDefListItems_callback_dd($matches) { 2238 $leading_line = $matches[1]; 2239 $def = $matches[2]; 2240 2241 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2242 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2243 $def = "\n". $def ."\n"; 2244 } 2245 else { 2246 $def = rtrim($def); 2247 $def = $this->runSpanGamut($this->outdent($def)); 2248 } 2249 2250 return "\n<dd>" . $def . "</dd>\n"; 2251 } 2252 2253 2254 function doItalicsAndBold($text) { 2255 # 2256 # Redefined to change emphasis by underscore behaviour so that it does not 2257 # work in the middle of a word. 2258 # 2259 # <strong> must go first: 2260 $text = preg_replace_callback(array( 2261 '{ 2262 ( # $1: Marker 2263 (?<![a-zA-Z0-9]) # Not preceded by alphanum 2264 (?<!__) # or by two marker chars. 2265 __ 2266 ) 2267 (?=\S) # Not followed by whitespace 2268 (?!__) # or two others marker chars. 2269 ( # $2: Content 2270 (?> 2271 [^_]+? # Anthing not em markers. 2272 | 2273 # Balence any regular _ emphasis inside. 2274 (?<![a-zA-Z0-9]) _ (?=\S) (.+?) 2275 (?<=\S) _ (?![a-zA-Z0-9]) 2276 | 2277 _+ # Allow unbalenced as last resort. 2278 )+? 2279 ) 2280 (?<=\S) __ # End mark not preceded by whitespace. 2281 (?![a-zA-Z0-9]) # Not followed by alphanum 2282 (?!__) # or two others marker chars. 2283 }sx', 2284 '{ 2285 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *) 2286 (?=\S) # Not followed by whitespace 2287 (?!\1) # or two others marker chars. 2288 ( # $2: Content 2289 (?> 2290 [^*]+? # Anthing not em markers. 2291 | 2292 # Balence any regular * emphasis inside. 2293 \* (?=\S) (.+?) (?<=\S) \* 2294 | 2295 \* # Allow unbalenced as last resort. 2296 )+? 2297 ) 2298 (?<=\S) \*\* # End mark not preceded by whitespace. 2299 }sx', 2300 ), 2301 array(&$this, '_doItalicAndBold_strong_callback'), $text); 2302 # Then <em>: 2303 $text = preg_replace_callback(array( 2304 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx', 2305 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx', 2306 ), 2307 array(&$this, '_doItalicAndBold_em_callback'), $text); 2308 2309 return $text; 2310 } 2311 2312 2313 function formParagraphs($text) { 2314 # 2315 # Params: 2316 # $text - string to process with html <p> tags 2317 # 2318 # Strip leading and trailing lines: 2319 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2320 2321 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2322 2323 # 2324 # Wrap <p> tags and unhashify HTML blocks 2325 # 2326 foreach ($grafs as $key => $value) { 2327 $value = trim($this->runSpanGamut($value)); 2328 2329 # Check if this should be enclosed in a paragraph. 2330 # Clean tag hashes & block tag hashes are left alone. 2331 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2332 2333 if ($is_p) { 2334 $value = "<p>$value</p>"; 2335 } 2336 $grafs[$key] = $value; 2337 } 2338 2339 # Join grafs in one text, then unhash HTML tags. 2340 $text = implode("\n\n", $grafs); 2341 2342 # Finish by removing any tag hashes still present in $text. 2343 $text = $this->unhash($text); 2344 2345 return $text; 2346 } 2347 2348 2349 ### Footnotes 2350 2351 function stripFootnotes($text) { 2352 # 2353 # Strips link definitions from text, stores the URLs and titles in 2354 # hash references. 2355 # 2356 $less_than_tab = $this->tab_width - 1; 2357 2358 # Link defs are in the form: [^id]: url "optional title" 2359 $text = preg_replace_callback('{ 2360 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2361 [ ]* 2362 \n? # maybe *one* newline 2363 ( # text = $2 (no blank lines allowed) 2364 (?: 2365 .+ # actual text 2366 | 2367 \n # newlines but 2368 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 2369 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2370 # by non-indented content 2371 )* 2372 ) 2373 }xm', 2374 array(&$this, '_stripFootnotes_callback'), 2375 $text); 2376 return $text; 2377 } 2378 function _stripFootnotes_callback($matches) { 2379 $note_id = $this->fn_id_prefix . $matches[1]; 2380 $this->footnotes[$note_id] = $this->outdent($matches[2]); 2381 return ''; # String that will replace the block 2382 } 2383 2384 2385 function doFootnotes($text) { 2386 # 2387 # Replace footnote references in $text [^id] with a special text-token 2388 # which will be can be 2389 # 2390 if (!$this->in_footnote && !$this->in_anchor) { 2391 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2392 } 2393 return $text; 2394 } 2395 2396 2397 function appendFootnotes($text) { 2398 # 2399 # Append footnote list to text. 2400 # 2401 2402 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2403 array(&$this, '_appendFootnotes_callback'), $text); 2404 2405 if (!empty($this->footnotes_ordered)) { 2406 $text .= "\n\n"; 2407 $text .= "<div class=\"footnotes\">\n"; 2408 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n"; 2409 $text .= "<ol>\n\n"; 2410 2411 $attr = " rev=\"footnote\""; 2412 if ($this->fn_backlink_class != "") { 2413 $class = $this->fn_backlink_class; 2414 $class = $this->encodeAmpsAndAngles($class); 2415 $class = str_replace('"', '"', $class); 2416 $attr .= " class=\"$class\""; 2417 } 2418 if ($this->fn_backlink_title != "") { 2419 $title = $this->fn_backlink_title; 2420 $title = $this->encodeAmpsAndAngles($title); 2421 $title = str_replace('"', '"', $title); 2422 $attr .= " title=\"$title\""; 2423 } 2424 $num = 0; 2425 2426 $this->in_footnote = true; 2427 2428 foreach ($this->footnotes_ordered as $note_id => $footnote) { 2429 $footnote .= "\n"; # Need to append newline before parsing. 2430 $footnote = $this->runBlockGamut("$footnote\n"); 2431 2432 $attr2 = str_replace("%%", ++$num, $attr); 2433 2434 # Add backlink to last paragraph; create new paragraph if needed. 2435 $backlink = "<a href=\"#fnref:$note_id\"$attr2>↩</a>"; 2436 if (preg_match('{</p>$}', $footnote)) { 2437 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 2438 } else { 2439 $footnote .= "\n\n<p>$backlink</p>"; 2440 } 2441 2442 $text .= "<li id=\"fn:$note_id\">\n"; 2443 $text .= $footnote . "\n"; 2444 $text .= "</li>\n\n"; 2445 } 2446 2447 $this->in_footnote = false; 2448 2449 $text .= "</ol>\n"; 2450 $text .= "</div>"; 2451 } 2452 return $text; 2453 } 2454 function _appendFootnotes_callback($matches) { 2455 $node_id = $this->fn_id_prefix . $matches[1]; 2456 2457 # Create footnote marker only if it has a corresponding footnote *and* 2458 # the footnote hasn't been used by another marker. 2459 if (isset($this->footnotes[$node_id])) { 2460 # Transfert footnote content to the ordered list. 2461 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 2462 unset($this->footnotes[$node_id]); 2463 2464 $num = count($this->footnotes_ordered); 2465 $attr = " rel=\"footnote\""; 2466 if ($this->fn_link_class != "") { 2467 $class = $this->fn_link_class; 2468 $class = $this->encodeAmpsAndAngles($class); 2469 $class = str_replace('"', '"', $class); 2470 $attr .= " class=\"$class\""; 2471 } 2472 if ($this->fn_link_title != "") { 2473 $title = $this->fn_link_title; 2474 $title = $this->encodeAmpsAndAngles($title); 2475 $title = str_replace('"', '"', $title); 2476 $attr .= " title=\"$title\""; 2477 } 2478 $attr = str_replace("%%", $num, $attr); 2479 2480 return 2481 "<sup id=\"fnref:$node_id\">". 2482 "<a href=\"#fn:$node_id\"$attr>$num</a>". 2483 "</sup>"; 2484 } 2485 2486 return "[^".$matches[1]."]"; 2487 } 2488 2489 2490 ### Abbreviations ### 2491 2492 function stripAbbreviations($text) { 2493 # 2494 # Strips abbreviations from text, stores titles in hash references. 2495 # 2496 $less_than_tab = $this->tab_width - 1; 2497 2498 # Link defs are in the form: [id]*: url "optional title" 2499 $text = preg_replace_callback('{ 2500 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 2501 (.*) # text = $2 (no blank lines allowed) 2502 }xm', 2503 array(&$this, '_stripAbbreviations_callback'), 2504 $text); 2505 return $text; 2506 } 2507 function _stripAbbreviations_callback($matches) { 2508 $abbr_word = $matches[1]; 2509 $abbr_desc = $matches[2]; 2510 $this->abbr_matches[] = preg_quote($abbr_word); 2511 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 2512 return ''; # String that will replace the block 2513 } 2514 2515 2516 function doAbbreviations($text) { 2517 # 2518 # Find defined abbreviations in text and wrap them in <abbr> elements. 2519 # 2520 if ($this->abbr_matches) { 2521 // cannot use the /x modifier because abbr_matches may 2522 // contain spaces: 2523 $text = preg_replace_callback('{'. 2524 '(?<![\w\x1A])'. 2525 '(?:'. implode('|', $this->abbr_matches) .')'. 2526 '(?![\w\x1A])'. 2527 '}', 2528 array(&$this, '_doAbbreviations_callback'), $text); 2529 } 2530 return $text; 2531 } 2532 function _doAbbreviations_callback($matches) { 2533 $abbr = $matches[0]; 2534 if (isset($this->abbr_desciptions[$abbr])) { 2535 $desc = $this->abbr_desciptions[$abbr]; 2536 if (empty($desc)) { 2537 return $this->hashPart("<abbr>$abbr</abbr>"); 2538 } else { 2539 $desc = htmlspecialchars($desc, ENT_NOQUOTES); 2540 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 2541 } 2542 } else { 2543 return $matches[0]; 2544 } 2545 } 2546 2547 } 2548 2549 1405 2550 /* 1406 2551 1407 PHP Markdown 1408 ============ 2552 PHP Markdown Extra 2553 ================== 1409 2554 1410 2555 Description 1411 2556 ----------- 1412 2557 1413 This is a PHP translation of the original Markdown formatter written in 1414 Perl by John Gruber. 2558 This is a PHP port of the original Markdown formatter written in Perl 2559 by John Gruber. This special "Extra" version of PHP Markdown features 2560 further enhancements to the syntax for making additional constructs 2561 such as tables and definition list. 1415 2562 1416 2563 Markdown is a text-to-HTML filter; it translates an easy-to-read / … … 1445 2592 See the readme file for detailed release notes for this version. 1446 2593 1447 1.0.1e (28 Dec 2006)1448 1449 1.0.1d (1 Dec 2006)1450 1451 1.0.1c (9 Dec 2005)1452 1453 1.0.1b (6 Jun 2005)1454 1455 1.0.1a (15 Apr 2005)1456 1457 1.0.1 (16 Dec 2004)1458 1459 1.0 (21 Aug 2004)1460 1461 1462 Author & Contributors1463 ---------------------1464 1465 Original Markdown by John Gruber1466 <http://daringfireball.net/>1467 1468 PHP port and extras by Michel Fortin1469 <http://www.michelf.com/>1470 1471 2594 1472 2595 Copyright and License 1473 2596 --------------------- 1474 2597 1475 Copyright (c) 2004-2006 Michel Fortin 2598 PHP Markdown & Extra 2599 Copyright (c) 2004-2007 Michel Fortin 1476 2600 <http://www.michelf.com/> 1477 2601 All rights reserved. 1478 2602 2603 Based on Markdown 1479 2604 Copyright (c) 2003-2006 John Gruber 1480 2605 <http://daringfireball.net/> … … 1512 2637 class Markdown extends Plugin { 1513 2638 var $description = 'Markdown 형식을 처리합니다.'; 2639 var $markdown_class; 2640 var $markdown_config_path = 'data/markdown_class_name.txt'; 1514 2641 1515 2642 function on_init() { 2643 if ($class_name = @file($this->markdown_config_path)) 2644 $class_name = trim($class_name[0]); 2645 else 2646 $class_name = 'Markdown_Parser'; 2647 2648 $this->markdown_class = $class_name; 2649 $this->parser = new $class_name; 2650 1516 2651 add_filter('PostList', array(&$this, 'format'), 500); 1517 2652 add_filter('PostView', array(&$this, 'format'), 500); … … 1522 2657 function format(&$model) { 1523 2658 if (!$model->body) return; 1524 $model->body = Markdown($model->body); 2659 $model->body = $this->parser->transform($model->body); 2660 } 2661 2662 function on_settings() { 2663 if (is_post()) { 2664 if ($fp = fopen($this->markdown_config_path, 'w')) { 2665 fwrite($fp, $_POST['class_name']); 2666 fclose($fp); 2667 2668 $this->markdown_class = $_POST['class_name']; 2669 2670 echo '<div class="flash pass">설정을 저장했습니다.</div>'; 2671 } 2672 else { 2673 ?><div class="flash pass"> 2674 설정 파일을 저장하는데 실패했습니다. 2675 ({$this->markdown_config_path}에 쓰기 권한이 없습니다.) 2676 </div><?php 2677 } 2678 } 2679 2680 $types = array( 2681 'Markdown_Parser' => 'Markdown', 2682 'MarkdownExtra_Parser' => 'Markdown Extra' 2683 ); 2684 2685 ?><form method="post" action="?"> 2686 <div> 2687 <label for="markdown_type">종류</label> 2688 <select id="markdown_type" name="class_name"><?php 2689 foreach($types as $val => $text) 2690 echo option_tag($val, $text, $val == $this->markdown_class) 2691 ?></select> 2692 </div> 2693 2694 <p><a href="http://michelf.com/projects/php-markdown/extra/"> 2695 Markdown Extra</a>는 오리지널 Markdown의 확장판으로, 표, 정의 목록, 2696 약자 표기 등의 기능이 추가되어 있습니다.</p> 2697 2698 <div><?php echo submit_tag('Save') ?></div> 2699 </form><?php 1525 2700 } 1526 2701 }
Note: See TracChangeset
for help on using the changeset viewer.
