PageRenderTime 95ms CodeModel.GetById 57ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 1ms

/modules/userguide/vendor/markdown/markdown.php

https://bitbucket.org/sklyarov_ivan/trap
PHP | 2909 lines | 1846 code | 324 blank | 739 comment | 179 complexity | 8320ec55eab3a91d90b014b16f99a9b2 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2#
   3# Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4#
   5# PHP Markdown & Extra
   6# Copyright (c) 2004-2008 Michel Fortin  
   7# <http://www.michelf.com/projects/php-markdown/>
   8#
   9# Original Markdown
  10# Copyright (c) 2004-2006 John Gruber  
  11# <http://daringfireball.net/projects/markdown/>
  12#
  13
  14
  15define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
  16define( 'MARKDOWNEXTRA_VERSION',  "1.2.3" ); # Wed 31 Dec 2008
  17
  18
  19#
  20# Global default settings:
  21#
  22
  23# Change to ">" for HTML output
  24@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  25
  26# Define the width of a tab for code blocks.
  27@define( 'MARKDOWN_TAB_WIDTH',     4 );
  28
  29# Optional title attribute for footnote links and backlinks.
  30@define( 'MARKDOWN_FN_LINK_TITLE',         "" );
  31@define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
  32
  33# Optional class attribute for footnote links and backlinks.
  34@define( 'MARKDOWN_FN_LINK_CLASS',         "" );
  35@define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
  36
  37
  38#
  39# WordPress settings:
  40#
  41
  42# Change to false to remove Markdown from posts and/or comments.
  43@define( 'MARKDOWN_WP_POSTS',      true );
  44@define( 'MARKDOWN_WP_COMMENTS',   true );
  45
  46
  47
  48### Standard Function Interface ###
  49
  50@define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
  51
  52function Markdown($text) {
  53#
  54# Initialize the parser and return the result of its transform method.
  55#
  56	# Setup static parser variable.
  57	static $parser;
  58	if (!isset($parser)) {
  59		$parser_class = MARKDOWN_PARSER_CLASS;
  60		$parser = new $parser_class;
  61	}
  62
  63	# Transform text using parser.
  64	return $parser->transform($text);
  65}
  66
  67
  68### WordPress Plugin Interface ###
  69
  70/*
  71Plugin Name: Markdown Extra
  72Plugin URI: http://www.michelf.com/projects/php-markdown/
  73Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  74Version: 1.2.2
  75Author: Michel Fortin
  76Author URI: http://www.michelf.com/
  77*/
  78
  79if (isset($wp_version)) {
  80	# More details about how it works here:
  81	# <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  82	
  83	# Post content and excerpts
  84	# - Remove WordPress paragraph generator.
  85	# - Run Markdown on excerpt, then remove all tags.
  86	# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  87	if (MARKDOWN_WP_POSTS) {
  88		remove_filter('the_content',     'wpautop');
  89        remove_filter('the_content_rss', 'wpautop');
  90		remove_filter('the_excerpt',     'wpautop');
  91		add_filter('the_content',     'mdwp_MarkdownPost', 6);
  92        add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
  93		add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
  94		add_filter('get_the_excerpt', 'trim', 7);
  95		add_filter('the_excerpt',     'mdwp_add_p');
  96		add_filter('the_excerpt_rss', 'mdwp_strip_p');
  97		
  98		remove_filter('content_save_pre',  'balanceTags', 50);
  99		remove_filter('excerpt_save_pre',  'balanceTags', 50);
 100		add_filter('the_content',  	  'balanceTags', 50);
 101		add_filter('get_the_excerpt', 'balanceTags', 9);
 102	}
 103	
 104	# Add a footnote id prefix to posts when inside a loop.
 105	function mdwp_MarkdownPost($text) {
 106		static $parser;
 107		if (!$parser) {
 108			$parser_class = MARKDOWN_PARSER_CLASS;
 109			$parser = new $parser_class;
 110		}
 111		if (is_single() || is_page() || is_feed()) {
 112			$parser->fn_id_prefix = "";
 113		} else {
 114			$parser->fn_id_prefix = get_the_ID() . ".";
 115		}
 116		return $parser->transform($text);
 117	}
 118	
 119	# Comments
 120	# - Remove WordPress paragraph generator.
 121	# - Remove WordPress auto-link generator.
 122	# - Scramble important tags before passing them to the kses filter.
 123	# - Run Markdown on excerpt then remove paragraph tags.
 124	if (MARKDOWN_WP_COMMENTS) {
 125		remove_filter('comment_text', 'wpautop', 30);
 126		remove_filter('comment_text', 'make_clickable');
 127		add_filter('pre_comment_content', 'Markdown', 6);
 128		add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 129		add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 130		add_filter('get_comment_text',    'Markdown', 6);
 131		add_filter('get_comment_excerpt', 'Markdown', 6);
 132		add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 133	
 134		global $mdwp_hidden_tags, $mdwp_placeholders;
 135		$mdwp_hidden_tags = explode(' ',
 136			'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 137		$mdwp_placeholders = explode(' ', str_rot13(
 138			'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 139			'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 140	}
 141	
 142	function mdwp_add_p($text) {
 143		if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 144			$text = '<p>'.$text.'</p>';
 145			$text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 146		}
 147		return $text;
 148	}
 149	
 150	function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 151
 152	function mdwp_hide_tags($text) {
 153		global $mdwp_hidden_tags, $mdwp_placeholders;
 154		return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 155	}
 156	function mdwp_show_tags($text) {
 157		global $mdwp_hidden_tags, $mdwp_placeholders;
 158		return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 159	}
 160}
 161
 162
 163### bBlog Plugin Info ###
 164
 165function identify_modifier_markdown() {
 166	return array(
 167		'name' => 'markdown',
 168		'type' => 'modifier',
 169		'nicename' => 'PHP Markdown Extra',
 170		'description' => 'A text-to-HTML conversion tool for web writers',
 171		'authors' => 'Michel Fortin and John Gruber',
 172		'licence' => 'GPL',
 173		'version' => MARKDOWNEXTRA_VERSION,
 174		'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
 175		);
 176}
 177
 178
 179### Smarty Modifier Interface ###
 180
 181function smarty_modifier_markdown($text) {
 182	return Markdown($text);
 183}
 184
 185
 186### Textile Compatibility Mode ###
 187
 188# Rename this file to "classTextile.php" and it can replace Textile everywhere.
 189
 190if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 191	# Try to include PHP SmartyPants. Should be in the same directory.
 192	@include_once 'smartypants.php';
 193	# Fake Textile class. It calls Markdown instead.
 194	class Textile {
 195		function TextileThis($text, $lite='', $encode='') {
 196			if ($lite == '' && $encode == '')    $text = Markdown($text);
 197			if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 198			return $text;
 199		}
 200		# Fake restricted version: restrictions are not supported for now.
 201		function TextileRestricted($text, $lite='', $noimage='') {
 202			return $this->TextileThis($text, $lite);
 203		}
 204		# Workaround to ensure compatibility with TextPattern 4.0.3.
 205		function blockLite($text) { return $text; }
 206	}
 207}
 208
 209
 210
 211#
 212# Markdown Parser Class
 213#
 214
 215class Markdown_Parser {
 216
 217	# Regex to match balanced [brackets].
 218	# Needed to insert a maximum bracked depth while converting to PHP.
 219	var $nested_brackets_depth = 6;
 220	var $nested_brackets_re;
 221	
 222	var $nested_url_parenthesis_depth = 4;
 223	var $nested_url_parenthesis_re;
 224
 225	# Table of hash values for escaped characters:
 226	var $escape_chars = '\`*_{}[]()>#+-.!';
 227	var $escape_chars_re;
 228
 229	# Change to ">" for HTML output.
 230	var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 231	var $tab_width = MARKDOWN_TAB_WIDTH;
 232	
 233	# Change to `true` to disallow markup or entities.
 234	var $no_markup = false;
 235	var $no_entities = false;
 236	
 237	# Predefined urls and titles for reference links and images.
 238	var $predef_urls = array();
 239	var $predef_titles = array();
 240
 241
 242	function Markdown_Parser() {
 243	#
 244	# Constructor function. Initialize appropriate member variables.
 245	#
 246		$this->_initDetab();
 247		$this->prepareItalicsAndBold();
 248	
 249		$this->nested_brackets_re = 
 250			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 251			str_repeat('\])*', $this->nested_brackets_depth);
 252	
 253		$this->nested_url_parenthesis_re = 
 254			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 255			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 256		
 257		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 258		
 259		# Sort document, block, and span gamut in ascendent priority order.
 260		asort($this->document_gamut);
 261		asort($this->block_gamut);
 262		asort($this->span_gamut);
 263	}
 264
 265
 266	# Internal hashes used during transformation.
 267	var $urls = array();
 268	var $titles = array();
 269	var $html_hashes = array();
 270	
 271	# Status flag to avoid invalid nesting.
 272	var $in_anchor = false;
 273	
 274	
 275	function setup() {
 276	#
 277	# Called before the transformation process starts to setup parser 
 278	# states.
 279	#
 280		# Clear global hashes.
 281		$this->urls = $this->predef_urls;
 282		$this->titles = $this->predef_titles;
 283		$this->html_hashes = array();
 284		
 285		$in_anchor = false;
 286	}
 287	
 288	function teardown() {
 289	#
 290	# Called after the transformation process to clear any variable 
 291	# which may be taking up memory unnecessarly.
 292	#
 293		$this->urls = array();
 294		$this->titles = array();
 295		$this->html_hashes = array();
 296	}
 297
 298
 299	function transform($text) {
 300	#
 301	# Main function. Performs some preprocessing on the input text
 302	# and pass it through the document gamut.
 303	#
 304		$this->setup();
 305	
 306		# Remove UTF-8 BOM and marker character in input, if present.
 307		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 308
 309		# Standardize line endings:
 310		#   DOS to Unix and Mac to Unix
 311		$text = preg_replace('{\r\n?}', "\n", $text);
 312
 313		# Make sure $text ends with a couple of newlines:
 314		$text .= "\n\n";
 315
 316		# Convert all tabs to spaces.
 317		$text = $this->detab($text);
 318
 319		# Turn block-level HTML blocks into hash entries
 320		$text = $this->hashHTMLBlocks($text);
 321
 322		# Strip any lines consisting only of spaces and tabs.
 323		# This makes subsequent regexen easier to write, because we can
 324		# match consecutive blank lines with /\n+/ instead of something
 325		# contorted like /[ ]*\n+/ .
 326		$text = preg_replace('/^[ ]+$/m', '', $text);
 327
 328		# Run document gamut methods.
 329		foreach ($this->document_gamut as $method => $priority) {
 330			$text = $this->$method($text);
 331		}
 332		
 333		$this->teardown();
 334
 335		return $text . "\n";
 336	}
 337	
 338	var $document_gamut = array(
 339		# Strip link definitions, store in hashes.
 340		"stripLinkDefinitions" => 20,
 341		
 342		"runBasicBlockGamut"   => 30,
 343		);
 344
 345
 346	function stripLinkDefinitions($text) {
 347	#
 348	# Strips link definitions from text, stores the URLs and titles in
 349	# hash references.
 350	#
 351		$less_than_tab = $this->tab_width - 1;
 352
 353		# Link defs are in the form: ^[id]: url "optional title"
 354		$text = preg_replace_callback('{
 355							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
 356							  [ ]*
 357							  \n?				# maybe *one* newline
 358							  [ ]*
 359							<?(\S+?)>?			# url = $2
 360							  [ ]*
 361							  \n?				# maybe one newline
 362							  [ ]*
 363							(?:
 364								(?<=\s)			# lookbehind for whitespace
 365								["(]
 366								(.*?)			# title = $3
 367								[")]
 368								[ ]*
 369							)?	# title is optional
 370							(?:\n+|\Z)
 371			}xm',
 372			array(&$this, '_stripLinkDefinitions_callback'),
 373			$text);
 374		return $text;
 375	}
 376	function _stripLinkDefinitions_callback($matches) {
 377		$link_id = strtolower($matches[1]);
 378		$this->urls[$link_id] = $matches[2];
 379		$this->titles[$link_id] =& $matches[3];
 380		return ''; # String that will replace the block
 381	}
 382
 383
 384	function hashHTMLBlocks($text) {
 385		if ($this->no_markup)  return $text;
 386
 387		$less_than_tab = $this->tab_width - 1;
 388
 389		# Hashify HTML blocks:
 390		# We only want to do this for block-level HTML tags, such as headers,
 391		# lists, and tables. That's because we still want to wrap <p>s around
 392		# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 393		# phrase emphasis, and spans. The list of tags we're looking for is
 394		# hard-coded:
 395		#
 396		# *  List "a" is made of tags which can be both inline or block-level.
 397		#    These will be treated block-level when the start tag is alone on 
 398		#    its line, otherwise they're not matched here and will be taken as 
 399		#    inline later.
 400		# *  List "b" is made of tags which are always block-level;
 401		#
 402		$block_tags_a_re = 'ins|del';
 403		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 404						   'script|noscript|form|fieldset|iframe|math';
 405
 406		# Regular expression for the content of a block tag.
 407		$nested_tags_level = 4;
 408		$attr = '
 409			(?>				# optional tag attributes
 410			  \s			# starts with whitespace
 411			  (?>
 412				[^>"/]+		# text outside quotes
 413			  |
 414				/+(?!>)		# slash not followed by ">"
 415			  |
 416				"[^"]*"		# text inside double quotes (tolerate ">")
 417			  |
 418				\'[^\']*\'	# text inside single quotes (tolerate ">")
 419			  )*
 420			)?	
 421			';
 422		$content =
 423			str_repeat('
 424				(?>
 425				  [^<]+			# content without tag
 426				|
 427				  <\2			# nested opening tag
 428					'.$attr.'	# attributes
 429					(?>
 430					  />
 431					|
 432					  >', $nested_tags_level).	# end of opening tag
 433					  '.*?'.					# last level nested tag content
 434			str_repeat('
 435					  </\2\s*>	# closing nested tag
 436					)
 437				  |				
 438					<(?!/\2\s*>	# other tags with a different name
 439				  )
 440				)*',
 441				$nested_tags_level);
 442		$content2 = str_replace('\2', '\3', $content);
 443
 444		# First, look for nested blocks, e.g.:
 445		# 	<div>
 446		# 		<div>
 447		# 		tags for inner block must be indented.
 448		# 		</div>
 449		# 	</div>
 450		#
 451		# The outermost tags must start at the left margin for this to match, and
 452		# the inner nested divs must be indented.
 453		# We need to do this before the next, more liberal match, because the next
 454		# match will start at the first `<div>` and stop at the first `</div>`.
 455		$text = preg_replace_callback('{(?>
 456			(?>
 457				(?<=\n\n)		# Starting after a blank line
 458				|				# or
 459				\A\n?			# the beginning of the doc
 460			)
 461			(						# save in $1
 462
 463			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
 464			  # in between.
 465					
 466						[ ]{0,'.$less_than_tab.'}
 467						<('.$block_tags_b_re.')# start tag = $2
 468						'.$attr.'>			# attributes followed by > and \n
 469						'.$content.'		# content, support nesting
 470						</\2>				# the matching end tag
 471						[ ]*				# trailing spaces/tabs
 472						(?=\n+|\Z)	# followed by a newline or end of document
 473
 474			| # Special version for tags of group a.
 475
 476						[ ]{0,'.$less_than_tab.'}
 477						<('.$block_tags_a_re.')# start tag = $3
 478						'.$attr.'>[ ]*\n	# attributes followed by >
 479						'.$content2.'		# content, support nesting
 480						</\3>				# the matching end tag
 481						[ ]*				# trailing spaces/tabs
 482						(?=\n+|\Z)	# followed by a newline or end of document
 483					
 484			| # Special case just for <hr />. It was easier to make a special 
 485			  # case than to make the other regex more complicated.
 486			
 487						[ ]{0,'.$less_than_tab.'}
 488						<(hr)				# start tag = $2
 489						'.$attr.'			# attributes
 490						/?>					# the matching end tag
 491						[ ]*
 492						(?=\n{2,}|\Z)		# followed by a blank line or end of document
 493			
 494			| # Special case for standalone HTML comments:
 495			
 496					[ ]{0,'.$less_than_tab.'}
 497					(?s:
 498						<!-- .*? -->
 499					)
 500					[ ]*
 501					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 502			
 503			| # PHP and ASP-style processor instructions (<? and <%)
 504			
 505					[ ]{0,'.$less_than_tab.'}
 506					(?s:
 507						<([?%])			# $2
 508						.*?
 509						\2>
 510					)
 511					[ ]*
 512					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 513					
 514			)
 515			)}Sxmi',
 516			array(&$this, '_hashHTMLBlocks_callback'),
 517			$text);
 518
 519		return $text;
 520	}
 521	function _hashHTMLBlocks_callback($matches) {
 522		$text = $matches[1];
 523		$key  = $this->hashBlock($text);
 524		return "\n\n$key\n\n";
 525	}
 526	
 527	
 528	function hashPart($text, $boundary = 'X') {
 529	#
 530	# Called whenever a tag must be hashed when a function insert an atomic 
 531	# element in the text stream. Passing $text to through this function gives
 532	# a unique text-token which will be reverted back when calling unhash.
 533	#
 534	# The $boundary argument specify what character should be used to surround
 535	# the token. By convension, "B" is used for block elements that needs not
 536	# to be wrapped into paragraph tags at the end, ":" is used for elements
 537	# that are word separators and "X" is used in the general case.
 538	#
 539		# Swap back any tag hash found in $text so we do not have to `unhash`
 540		# multiple times at the end.
 541		$text = $this->unhash($text);
 542		
 543		# Then hash the block.
 544		static $i = 0;
 545		$key = "$boundary\x1A" . ++$i . $boundary;
 546		$this->html_hashes[$key] = $text;
 547		return $key; # String that will replace the tag.
 548	}
 549
 550
 551	function hashBlock($text) {
 552	#
 553	# Shortcut function for hashPart with block-level boundaries.
 554	#
 555		return $this->hashPart($text, 'B');
 556	}
 557
 558
 559	var $block_gamut = array(
 560	#
 561	# These are all the transformations that form block-level
 562	# tags like paragraphs, headers, and list items.
 563	#
 564		"doHeaders"         => 10,
 565		"doHorizontalRules" => 20,
 566		
 567		"doLists"           => 40,
 568		"doCodeBlocks"      => 50,
 569		"doBlockQuotes"     => 60,
 570		);
 571
 572	function runBlockGamut($text) {
 573	#
 574	# Run block gamut tranformations.
 575	#
 576		# We need to escape raw HTML in Markdown source before doing anything 
 577		# else. This need to be done for each block, and not only at the 
 578		# begining in the Markdown function since hashed blocks can be part of
 579		# list items and could have been indented. Indented blocks would have 
 580		# been seen as a code block in a previous pass of hashHTMLBlocks.
 581		$text = $this->hashHTMLBlocks($text);
 582		
 583		return $this->runBasicBlockGamut($text);
 584	}
 585	
 586	function runBasicBlockGamut($text) {
 587	#
 588	# Run block gamut tranformations, without hashing HTML blocks. This is 
 589	# useful when HTML blocks are known to be already hashed, like in the first
 590	# whole-document pass.
 591	#
 592		foreach ($this->block_gamut as $method => $priority) {
 593			$text = $this->$method($text);
 594		}
 595		
 596		# Finally form paragraph and restore hashed blocks.
 597		$text = $this->formParagraphs($text);
 598
 599		return $text;
 600	}
 601	
 602	
 603	function doHorizontalRules($text) {
 604		# Do Horizontal Rules:
 605		return preg_replace(
 606			'{
 607				^[ ]{0,3}	# Leading space
 608				([-*_])		# $1: First marker
 609				(?>			# Repeated marker group
 610					[ ]{0,2}	# Zero, one, or two spaces.
 611					\1			# Marker character
 612				){2,}		# Group repeated at least twice
 613				[ ]*		# Tailing spaces
 614				$			# End of line.
 615			}mx',
 616			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
 617			$text);
 618	}
 619
 620
 621	var $span_gamut = array(
 622	#
 623	# These are all the transformations that occur *within* block-level
 624	# tags like paragraphs, headers, and list items.
 625	#
 626		# Process character escapes, code spans, and inline HTML
 627		# in one shot.
 628		"parseSpan"           => -30,
 629
 630		# Process anchor and image tags. Images must come first,
 631		# because ![foo][f] looks like an anchor.
 632		"doImages"            =>  10,
 633		"doAnchors"           =>  20,
 634		
 635		# Make links out of things like `<http://example.com/>`
 636		# Must come after doAnchors, because you can use < and >
 637		# delimiters in inline links like [this](<url>).
 638		"doAutoLinks"         =>  30,
 639		"encodeAmpsAndAngles" =>  40,
 640
 641		"doItalicsAndBold"    =>  50,
 642		"doHardBreaks"        =>  60,
 643		);
 644
 645	function runSpanGamut($text) {
 646	#
 647	# Run span gamut tranformations.
 648	#
 649		foreach ($this->span_gamut as $method => $priority) {
 650			$text = $this->$method($text);
 651		}
 652
 653		return $text;
 654	}
 655	
 656	
 657	function doHardBreaks($text) {
 658		# Do hard breaks:
 659		return preg_replace_callback('/ {2,}\n/', 
 660			array(&$this, '_doHardBreaks_callback'), $text);
 661	}
 662	function _doHardBreaks_callback($matches) {
 663		return $this->hashPart("<br$this->empty_element_suffix\n");
 664	}
 665
 666
 667	function doAnchors($text) {
 668	#
 669	# Turn Markdown link shortcuts into XHTML <a> tags.
 670	#
 671		if ($this->in_anchor) return $text;
 672		$this->in_anchor = true;
 673		
 674		#
 675		# First, handle reference-style links: [link text] [id]
 676		#
 677		$text = preg_replace_callback('{
 678			(					# wrap whole match in $1
 679			  \[
 680				('.$this->nested_brackets_re.')	# link text = $2
 681			  \]
 682
 683			  [ ]?				# one optional space
 684			  (?:\n[ ]*)?		# one optional newline followed by spaces
 685
 686			  \[
 687				(.*?)		# id = $3
 688			  \]
 689			)
 690			}xs',
 691			array(&$this, '_doAnchors_reference_callback'), $text);
 692
 693		#
 694		# Next, inline-style links: [link text](url "optional title")
 695		#
 696		$text = preg_replace_callback('{
 697			(				# wrap whole match in $1
 698			  \[
 699				('.$this->nested_brackets_re.')	# link text = $2
 700			  \]
 701			  \(			# literal paren
 702				[ ]*
 703				(?:
 704					<(\S*)>	# href = $3
 705				|
 706					('.$this->nested_url_parenthesis_re.')	# href = $4
 707				)
 708				[ ]*
 709				(			# $5
 710				  ([\'"])	# quote char = $6
 711				  (.*?)		# Title = $7
 712				  \6		# matching quote
 713				  [ ]*	# ignore any spaces/tabs between closing quote and )
 714				)?			# title is optional
 715			  \)
 716			)
 717			}xs',
 718			array(&$this, '_DoAnchors_inline_callback'), $text);
 719
 720		#
 721		# Last, handle reference-style shortcuts: [link text]
 722		# These must come last in case you've also got [link test][1]
 723		# or [link test](/foo)
 724		#
 725//		$text = preg_replace_callback('{
 726//			(					# wrap whole match in $1
 727//			  \[
 728//				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
 729//			  \]
 730//			)
 731//			}xs',
 732//			array(&$this, '_doAnchors_reference_callback'), $text);
 733
 734		$this->in_anchor = false;
 735		return $text;
 736	}
 737	function _doAnchors_reference_callback($matches) {
 738		$whole_match =  $matches[1];
 739		$link_text   =  $matches[2];
 740		$link_id     =& $matches[3];
 741
 742		if ($link_id == "") {
 743			# for shortcut links like [this][] or [this].
 744			$link_id = $link_text;
 745		}
 746		
 747		# lower-case and turn embedded newlines into spaces
 748		$link_id = strtolower($link_id);
 749		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 750
 751		if (isset($this->urls[$link_id])) {
 752			$url = $this->urls[$link_id];
 753			$url = $this->encodeAttribute($url);
 754			
 755			$result = "<a href=\"$url\"";
 756			if ( isset( $this->titles[$link_id] ) ) {
 757				$title = $this->titles[$link_id];
 758				$title = $this->encodeAttribute($title);
 759				$result .=  " title=\"$title\"";
 760			}
 761		
 762			$link_text = $this->runSpanGamut($link_text);
 763			$result .= ">$link_text</a>";
 764			$result = $this->hashPart($result);
 765		}
 766		else {
 767			$result = $whole_match;
 768		}
 769		return $result;
 770	}
 771	function _doAnchors_inline_callback($matches) {
 772		$whole_match	=  $matches[1];
 773		$link_text		=  $this->runSpanGamut($matches[2]);
 774		$url			=  $matches[3] == '' ? $matches[4] : $matches[3];
 775		$title			=& $matches[7];
 776
 777		$url = $this->encodeAttribute($url);
 778
 779		$result = "<a href=\"$url\"";
 780		if (isset($title)) {
 781			$title = $this->encodeAttribute($title);
 782			$result .=  " title=\"$title\"";
 783		}
 784		
 785		$link_text = $this->runSpanGamut($link_text);
 786		$result .= ">$link_text</a>";
 787
 788		return $this->hashPart($result);
 789	}
 790
 791
 792	function doImages($text) {
 793	#
 794	# Turn Markdown image shortcuts into <img> tags.
 795	#
 796		#
 797		# First, handle reference-style labeled images: ![alt text][id]
 798		#
 799		$text = preg_replace_callback('{
 800			(				# wrap whole match in $1
 801			  !\[
 802				('.$this->nested_brackets_re.')		# alt text = $2
 803			  \]
 804
 805			  [ ]?				# one optional space
 806			  (?:\n[ ]*)?		# one optional newline followed by spaces
 807
 808			  \[
 809				(.*?)		# id = $3
 810			  \]
 811
 812			)
 813			}xs', 
 814			array(&$this, '_doImages_reference_callback'), $text);
 815
 816		#
 817		# Next, handle inline images:  ![alt text](url "optional title")
 818		# Don't forget: encode * and _
 819		#
 820		$text = preg_replace_callback('{
 821			(				# wrap whole match in $1
 822			  !\[
 823				('.$this->nested_brackets_re.')		# alt text = $2
 824			  \]
 825			  \s?			# One optional whitespace character
 826			  \(			# literal paren
 827				[ ]*
 828				(?:
 829					<(\S*)>	# src url = $3
 830				|
 831					('.$this->nested_url_parenthesis_re.')	# src url = $4
 832				)
 833				[ ]*
 834				(			# $5
 835				  ([\'"])	# quote char = $6
 836				  (.*?)		# title = $7
 837				  \6		# matching quote
 838				  [ ]*
 839				)?			# title is optional
 840			  \)
 841			)
 842			}xs',
 843			array(&$this, '_doImages_inline_callback'), $text);
 844
 845		return $text;
 846	}
 847	function _doImages_reference_callback($matches) {
 848		$whole_match = $matches[1];
 849		$alt_text    = $matches[2];
 850		$link_id     = strtolower($matches[3]);
 851
 852		if ($link_id == "") {
 853			$link_id = strtolower($alt_text); # for shortcut links like ![this][].
 854		}
 855
 856		$alt_text = $this->encodeAttribute($alt_text);
 857		if (isset($this->urls[$link_id])) {
 858			$url = $this->encodeAttribute($this->urls[$link_id]);
 859			$result = "<img src=\"$url\" alt=\"$alt_text\"";
 860			if (isset($this->titles[$link_id])) {
 861				$title = $this->titles[$link_id];
 862				$title = $this->encodeAttribute($title);
 863				$result .=  " title=\"$title\"";
 864			}
 865			$result .= $this->empty_element_suffix;
 866			$result = $this->hashPart($result);
 867		}
 868		else {
 869			# If there's no such link ID, leave intact:
 870			$result = $whole_match;
 871		}
 872
 873		return $result;
 874	}
 875	function _doImages_inline_callback($matches) {
 876		$whole_match	= $matches[1];
 877		$alt_text		= $matches[2];
 878		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
 879		$title			=& $matches[7];
 880
 881		$alt_text = $this->encodeAttribute($alt_text);
 882		$url = $this->encodeAttribute($url);
 883		$result = "<img src=\"$url\" alt=\"$alt_text\"";
 884		if (isset($title)) {
 885			$title = $this->encodeAttribute($title);
 886			$result .=  " title=\"$title\""; # $title already quoted
 887		}
 888		$result .= $this->empty_element_suffix;
 889
 890		return $this->hashPart($result);
 891	}
 892
 893
 894	function doHeaders($text) {
 895		# Setext-style headers:
 896		#	  Header 1
 897		#	  ========
 898		#  
 899		#	  Header 2
 900		#	  --------
 901		#
 902		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 903			array(&$this, '_doHeaders_callback_setext'), $text);
 904
 905		# atx-style headers:
 906		#	# Header 1
 907		#	## Header 2
 908		#	## Header 2 with closing hashes ##
 909		#	...
 910		#	###### Header 6
 911		#
 912		$text = preg_replace_callback('{
 913				^(\#{1,6})	# $1 = string of #\'s
 914				[ ]*
 915				(.+?)		# $2 = Header text
 916				[ ]*
 917				\#*			# optional closing #\'s (not counted)
 918				\n+
 919			}xm',
 920			array(&$this, '_doHeaders_callback_atx'), $text);
 921
 922		return $text;
 923	}
 924	function _doHeaders_callback_setext($matches) {
 925		# Terrible hack to check we haven't found an empty list item.
 926		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 927			return $matches[0];
 928		
 929		$level = $matches[2]{0} == '=' ? 1 : 2;
 930		$block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 931		return "\n" . $this->hashBlock($block) . "\n\n";
 932	}
 933	function _doHeaders_callback_atx($matches) {
 934		$level = strlen($matches[1]);
 935		$block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 936		return "\n" . $this->hashBlock($block) . "\n\n";
 937	}
 938
 939
 940	function doLists($text) {
 941	#
 942	# Form HTML ordered (numbered) and unordered (bulleted) lists.
 943	#
 944		$less_than_tab = $this->tab_width - 1;
 945
 946		# Re-usable patterns to match list item bullets and number markers:
 947		$marker_ul_re  = '[*+-]';
 948		$marker_ol_re  = '\d+[.]';
 949		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 950
 951		$markers_relist = array($marker_ul_re, $marker_ol_re);
 952
 953		foreach ($markers_relist as $marker_re) {
 954			# Re-usable pattern to match any entirel ul or ol list:
 955			$whole_list_re = '
 956				(								# $1 = whole list
 957				  (								# $2
 958					[ ]{0,'.$less_than_tab.'}
 959					('.$marker_re.')			# $3 = first list item marker
 960					[ ]+
 961				  )
 962				  (?s:.+?)
 963				  (								# $4
 964					  \z
 965					|
 966					  \n{2,}
 967					  (?=\S)
 968					  (?!						# Negative lookahead for another list item marker
 969						[ ]*
 970						'.$marker_re.'[ ]+
 971					  )
 972				  )
 973				)
 974			'; // mx
 975			
 976			# We use a different prefix before nested lists than top-level lists.
 977			# See extended comment in _ProcessListItems().
 978		
 979			if ($this->list_level) {
 980				$text = preg_replace_callback('{
 981						^
 982						'.$whole_list_re.'
 983					}mx',
 984					array(&$this, '_doLists_callback'), $text);
 985			}
 986			else {
 987				$text = preg_replace_callback('{
 988						(?:(?<=\n)\n|\A\n?) # Must eat the newline
 989						'.$whole_list_re.'
 990					}mx',
 991					array(&$this, '_doLists_callback'), $text);
 992			}
 993		}
 994
 995		return $text;
 996	}
 997	function _doLists_callback($matches) {
 998		# Re-usable patterns to match list item bullets and number markers:
 999		$marker_ul_re  = '[*+-]';
1000		$marker_ol_re  = '\d+[.]';
1001		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1002		
1003		$list = $matches[1];
1004		$list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
1005		
1006		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1007		
1008		$list .= "\n";
1009		$result = $this->processListItems($list, $marker_any_re);
1010		
1011		$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1012		return "\n". $result ."\n\n";
1013	}
1014
1015	var $list_level = 0;
1016
1017	function processListItems($list_str, $marker_any_re) {
1018	#
1019	#	Process the contents of a single ordered or unordered list, splitting it
1020	#	into individual list items.
1021	#
1022		# The $this->list_level global keeps track of when we're inside a list.
1023		# Each time we enter a list, we increment it; when we leave a list,
1024		# we decrement. If it's zero, we're not in a list anymore.
1025		#
1026		# We do this because when we're not inside a list, we want to treat
1027		# something like this:
1028		#
1029		#		I recommend upgrading to version
1030		#		8. Oops, now this line is treated
1031		#		as a sub-list.
1032		#
1033		# As a single paragraph, despite the fact that the second line starts
1034		# with a digit-period-space sequence.
1035		#
1036		# Whereas when we're inside a list (or sub-list), that line will be
1037		# treated as the start of a sub-list. What a kludge, huh? This is
1038		# an aspect of Markdown's syntax that's hard to parse perfectly
1039		# without resorting to mind-reading. Perhaps the solution is to
1040		# change the syntax rules such that sub-lists must start with a
1041		# starting cardinal number; e.g. "1." or "a.".
1042		
1043		$this->list_level++;
1044
1045		# trim trailing blank lines:
1046		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1047
1048		$list_str = preg_replace_callback('{
1049			(\n)?							# leading line = $1
1050			(^[ ]*)							# leading whitespace = $2
1051			('.$marker_any_re.'				# list marker and space = $3
1052				(?:[ ]+|(?=\n))	# space only required if item is not empty
1053			)
1054			((?s:.*?))						# list item text   = $4
1055			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1056			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1057			}xm',
1058			array(&$this, '_processListItems_callback'), $list_str);
1059
1060		$this->list_level--;
1061		return $list_str;
1062	}
1063	function _processListItems_callback($matches) {
1064		$item = $matches[4];
1065		$leading_line =& $matches[1];
1066		$leading_space =& $matches[2];
1067		$marker_space = $matches[3];
1068		$tailing_blank_line =& $matches[5];
1069
1070		if ($leading_line || $tailing_blank_line || 
1071			preg_match('/\n{2,}/', $item))
1072		{
1073			# Replace marker with the appropriate whitespace indentation
1074			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1075			$item = $this->runBlockGamut($this->outdent($item)."\n");
1076		}
1077		else {
1078			# Recursion for sub-lists:
1079			$item = $this->doLists($this->outdent($item));
1080			$item = preg_replace('/\n+$/', '', $item);
1081			$item = $this->runSpanGamut($item);
1082		}
1083
1084		return "<li>" . $item . "</li>\n";
1085	}
1086
1087
1088	function doCodeBlocks($text) {
1089	#
1090	#	Process Markdown `<pre><code>` blocks.
1091	#
1092		$text = preg_replace_callback('{
1093				(?:\n\n|\A\n?)
1094				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1095				  (?>
1096					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1097					.*\n+
1098				  )+
1099				)
1100				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1101			}xm',
1102			array(&$this, '_doCodeBlocks_callback'), $text);
1103
1104		return $text;
1105	}
1106	function _doCodeBlocks_callback($matches) {
1107		$codeblock = $matches[1];
1108
1109		$codeblock = $this->outdent($codeblock);
1110		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1111
1112		# trim leading newlines and trailing newlines
1113		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1114
1115		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1116		return "\n\n".$this->hashBlock($codeblock)."\n\n";
1117	}
1118
1119
1120	function makeCodeSpan($code) {
1121	#
1122	# Create a code span markup for $code. Called from handleSpanToken.
1123	#
1124		$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1125		return $this->hashPart("<code>$code</code>");
1126	}
1127
1128
1129	var $em_relist = array(
1130		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1131		'*' => '(?<=\S)(?<!\*)\*(?!\*)',
1132		'_' => '(?<=\S)(?<!_)_(?!_)',
1133		);
1134	var $strong_relist = array(
1135		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1136		'**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1137		'__' => '(?<=\S)(?<!_)__(?!_)',
1138		);
1139	var $em_strong_relist = array(
1140		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1141		'***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1142		'___' => '(?<=\S)(?<!_)___(?!_)',
1143		);
1144	var $em_strong_prepared_relist;
1145	
1146	function prepareItalicsAndBold() {
1147	#
1148	# Prepare regular expressions for seraching emphasis tokens in any
1149	# context.
1150	#
1151		foreach ($this->em_relist as $em => $em_re) {
1152			foreach ($this->strong_relist as $strong => $strong_re) {
1153				# Construct list of allowed token expressions.
1154				$token_relist = array();
1155				if (isset($this->em_strong_relist["$em$strong"])) {
1156					$token_relist[] = $this->em_strong_relist["$em$strong"];
1157				}
1158				$token_relist[] = $em_re;
1159				$token_relist[] = $strong_re;
1160				
1161				# Construct master expression from list.
1162				$token_re = '{('. implode('|', $token_relist) .')}';
1163				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1164			}
1165		}
1166	}
1167	
1168	function doItalicsAndBold($text) {
1169		$token_stack = array('');
1170		$text_stack = array('');
1171		$em = '';
1172		$strong = '';
1173		$tree_char_em = false;
1174		
1175		while (1) {
1176			#
1177			# Get prepared regular expression for seraching emphasis tokens
1178			# in current context.
1179			#
1180			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1181			
1182			#
1183			# Each loop iteration seach for the next emphasis token. 
1184			# Each token is then passed to handleSpanToken.
1185			#
1186			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1187			$text_stack[0] .= $parts[0];
1188			$token =& $parts[1];
1189			$text =& $parts[2];
1190			
1191			if (empty($token)) {
1192				# Reached end of text span: empty stack without emitting.
1193				# any more emphasis.
1194				while ($token_stack[0]) {
1195					$text_stack[1] .= array_shift($token_stack);
1196					$text_stack[0] .= array_shift($text_stack);
1197				}
1198				break;
1199			}
1200			
1201			$token_len = strlen($token);
1202			if ($tree_char_em) {
1203				# Reached closing marker while inside a three-char emphasis.
1204				if ($token_len == 3) {
1205					# Three-char closing marker, close em and strong.
1206					array_shift($token_stack);
1207					$span = array_shift($text_stack);
1208					$span = $this->runSpanGamut($span);
1209					$span = "<strong><em>$span</em></strong>";
1210					$text_stack[0] .= $this->hashPart($span);
1211					$em = '';
1212					$strong = '';
1213				} else {
1214					# Other closing marker: close one em or strong and
1215					# change current token state to match the other
1216					$token_stack[0] = str_repeat($token{0}, 3-$token_len);
1217					$tag = $token_len == 2 ? "strong" : "em";
1218					$span = $text_stack[0];
1219					$span = $this->runSpanGamut($span);
1220					$span = "<$tag>$span</$tag>";
1221					$text_stack[0] = $this->hashPart($span);
1222					$$tag = ''; # $$tag stands for $em or $strong
1223				}
1224				$tree_char_em = false;
1225			} else if ($token_len == 3) {
1226				if ($em) {
1227					# Reached closing marker for both em and strong.
1228					# Closing strong marker:
1229					for ($i = 0; $i < 2; ++$i) {
1230						$shifted_token = array_shift($token_stack);
1231						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1232						$span = array_shift($text_stack);
1233						$span = $this->runSpanGamut($span);
1234						$span = "<$tag>$span</$tag>";
1235						$text_stack[0] .= $this->hashPart($span);
1236						$$tag = ''; # $$tag stands for $em or $strong
1237					}
1238				} else {
1239					# Reached opening three-char emphasis marker. Push on token 
1240					# stack; will be handled by the special condition above.
1241					$em = $token{0};
1242					$strong = "$em$em";
1243					array_unshift($token_stack, $token);
1244					array_unshift($text_stack, '');
1245					$tree_char_em = true;
1246				}
1247			} else if ($token_len == 2) {
1248				if ($strong) {
1249					# Unwind any dangling emphasis marker:
1250					if (strlen($token_stack[0]) == 1) {
1251						$text_stack[1] .= array_shift($token_stack);
1252						$text_stack[0] .= array_shift($text_stack);
1253					}
1254					# Closing strong marker:
1255					array_shift($token_stack);
1256					$span = array_shift($text_stack);
1257					$span = $this->runSpanGamut($span);
1258					$span = "<strong>$span</strong>";
1259					$text_stack[0] .= $this->hashPart($span);
1260					$strong = '';
1261				} else {
1262					array_unshift($token_stack, $token);
1263					array_unshift($text_stack, '');
1264					$strong = $token;
1265				}
1266			} else {
1267				# Here $token_len == 1
1268				if ($em) {
1269					if (strlen($token_stack[0]) == 1) {
1270						# Closing emphasis marker:
1271						array_shift($token_stack);
1272						$span = array_shift($text_stack);
1273						$span = $this->runSpanGamut($span);
1274						$span = "<em>$span</em>";
1275						$text_stack[0] .= $this->hashPart($span);
1276						$em = '';
1277					} else {
1278						$text_stack[0] .= $token;
1279					}
1280				} else {
1281					array_unshift($token_stack, $token);
1282					array_unshift($text_stack, '');
1283					$em = $token;
1284				}
1285			}
1286		}
1287		return $text_stack[0];
1288	}
1289
1290
1291	function doBlockQuotes($text) {
1292		$text = preg_replace_callback('/
1293			  (								# Wrap whole match in $1
1294				(?>
1295				  ^[ ]*>[ ]?			# ">" at the start of a line
1296					.+\n					# rest of the first line
1297				  (.+\n)*					# subsequent consecutive lines
1298				  \n*						# blanks
1299				)+
1300			  )
1301			/xm',
1302			array(&$this, '_doBlockQuotes_callback'), $text);
1303
1304		return $text;
1305	}
1306	function _doBlockQuotes_callback($matches) {
1307		$bq = $matches[1];
1308		# trim one level of quoting - trim whitespace-only lines
1309		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1310		$bq = $this->runBlockGamut($bq);		# recurse
1311
1312		$bq = preg_replace('/^/m', "  ", $bq);
1313		# These leading spaces cause problem with <pre> content, 
1314		# so we need to fix that:
1315		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1316			array(&$this, '_DoBlockQuotes_callback2'), $bq);
1317
1318		return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1319	}
1320	function _doBlockQuotes_callback2($matches) {
1321		$pre = $matches[1];
1322		$pre = preg_replace('/^  /m', '', $pre);
1323		return $pre;
1324	}
1325
1326
1327	function formParagraphs($text) {
1328	#
1329	#	Params:
1330	#		$text - string to process with html <p> tags
1331	#
1332		# Strip leading and trailing lines:
1333		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1334
1335		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1336
1337		#
1338		# Wrap <p> tags and unhashify HTML blocks
1339		#
1340		foreach ($grafs as $key => $value) {
1341			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1342				# Is a paragraph.
1343				$value = $this->runSpanGamut($value);
1344				$value = preg_replace('/^([ ]*)/', "<p>", $value);
1345				$value .= "</p>";
1346				$grafs[$key] = $this->unhash($value);
1347			}
1348			else {
1349				# Is a block.
1350				# Modify elements of @grafs in-place...
1351				$graf = $value;
1352				$block = $this->html_hashes[$graf];
1353				$graf = $block;
1354//				if (preg_match('{
1355//					\A
1356//					(							# $1 = <div> tag
1357//					  <div  \s+
1358//					  [^>]*
1359//					  \b
1360//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1361//					  1
1362//					  \2
1363//					  [^>]*
1364//					  >
1365//					)
1366//					(							# $3 = contents
1367//					.*
1368//					)
1369//					(</div>)					# $4 = closing tag
1370//					\z
1371//					}xs', $block, $matches))
1372//				{
1373//					list(, $div_open, , $div_content, $div_close) = $matches;
1374//
1375//					# We can't call Markdown(), because that resets the hash;
1376//					# that initialization code should be pulled into its own sub, though.
1377//					$div_content = $this->hashHTMLBlocks($div_content);
1378//					
1379//					# Run document gamut methods on the content.
1380//					foreach ($this->document_gamut as $method => $priority) {
1381//						$div_content = $this->$method($div_content);
1382//					}
1383//
1384//					$div_open = preg_replace(
1385//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1386//
1387//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1388//				}
1389				$grafs[$key] = $graf;
1390			}
1391		}
1392
1393		return implode("\n\n", $grafs);
1394	}
1395
1396
1397	function encodeAttribute($text) {
1398	#
1399	# Encode text for a double-quoted HTML attribute. This function
1400	# is *not* suitable for attributes enclosed in single quotes.
1401	#
1402		$text = $this->encodeAmpsAndAngles($text);
1403		$text = str_replace('"', '&quot;', $text);
1404		return $text;
1405	}
1406	
1407	
1408	function encodeAmpsAndAngles($text) {
1409	#
1410	# Smart processing for ampersands and angle brackets that need to 
1411	# be encoded. Valid character entities are left alone unless the
1412	# no-entities mode is set.
1413	#
1414		if ($this->no_entities) {
1415			$text = str_replace('&', '&amp;', $text);
1416		} else {
1417			# Ampersand-encoding based entirely on Nat Irons's Amputator
1418			# MT plugin: <http://bumppo.net/projects/amputator/>
1419			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1420								'&amp;', $text);;
1421		}
1422		# Encode remaining <'s
1423		$text = str_replace('<', '&lt;', $text);
1424
1425		return $text;
1426	}
1427
1428
1429	function doAutoLinks($text) {
1430		$text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1431			array(&$this, '_doAutoLinks_url_callback'), $text);
1432
1433		# Email addresses: <address@domain.foo>
1434		$text = preg_replace_callback('{
1435			<
1436			(?:mailto:)?
1437			(
1438				[-.\w\x80-\xFF]+
1439				\@
1440				[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1441			)
1442			>
1443			}xi',
1444			array(&$this, '_doAutoLinks_email_callback'), $text);
1445
1446		return $text;
1447	}
1448	function _doAutoLinks_url_callback($matches) {
1449		$url = $this->encodeAttribute($matches[1]);
1450		$link = "<a href=\"$url\">$url</a>";
1451		return $this->hashPart($link);
1452	}
1453	function _doAutoLinks_email_callback($matches) {
1454		$address = $matches[1];
1455		$link = $this->encodeEmailAddress($address);
1456		return $this->hashPart($link);
1457	}
1458
1459
1460	function encodeEmailAddress($addr) {
1461	#
1462	#	Input: an email address, e.g. "foo@example.com"
1463	#
1464	#	Output: the email address as a mailto link, with each character
1465	#		of the address encoded as either a decimal or hex entity, in
1466	#		the hopes of foiling most address harvesting spam bots. E.g.:
1467	#
1468	#	  <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1469	#        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1470	#        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1471	#        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1472	#
1473	#	Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1474	#   With some optimizations by Milian Wolff.
1475	#
1476		$addr = "mailto:" . $addr;
1477		$chars = preg_split('/(?<!^)(?!$)/', $addr);
1478		$seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1479		
1480		foreach ($chars as $key => $char) {
1481			$ord = ord($char);
1482			# Ignore non-ascii chars.
1483			if ($ord < 128) {
1484				$r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1485				# roughly 10% raw, 45% hex, 45% dec
1486				# '@' *must* be encoded. I insist.
1487				if ($r > 90 && $char != '@') /* do nothing */;
1488				else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1489				else              $chars[$key] = '&#'.$ord.';';
1490			}
1491		}
1492		
1493		$addr = implode('', $chars);
1494		$text = implode('', array_slice($chars, 7)); # text without `mailto:`
1495		$addr = "<a href=\"$addr\">$text</a>";
1496
1497		return $addr;
1498	}
1499
1500
1501	function parseSpan($str) {
1502	#
1503	# Take the string $str and parse it into tokens, hashing embeded HTML,
1504	# escaped characters and handling code spans.
1505	#
1506		$output = '';
1507		
1508		$span_re = '{
1509				(
1510					\\\\'.$this->escape_chars_re.'
1511				|
1512					(?<![`\\\\])
1513					`+						# code span marker
1514			'.( $this->no_markup ? '' : '
1515				|
1516					<!--    .*?     -->		# comment
1517				|
1518					<\?.*?\?> | <%.*?%>		# processing instruction
1519				|
1520					<[/!$]?[-a-zA-Z0-9:]+	# regular tags
1521					(?>
1522						\s
1523						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1524					)?
1525					>
1526			').'
1527				)
1528				}xs';
1529
1530		while (1) {
1531			#
1532			# Each loop iteration seach for either the next tag, the next 
1533			# openning code span marker, or the next escaped character. 
1534			# Each token is then passed to handleSpanToken.
1535			#
1536			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1537			
1538			# Create token from text preceding tag.
1539			if ($parts[0] != "") {
1540				$output .= $parts[0];
1541			}
1542			
1543			# Check if we reach the end.
1544			if (isset($parts[1])) {
1545				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1546				$str = $parts[2];
1547			}
1548			else {
1549				break;
1550			}
1551		}
1552		
1553		return $output;
1554	}
1555	
1556	
1557	function handleSpanToken($token, &$str) {
1558	#
1559	# Handle $token provided by parseSpan by determining its nature and 
1560	# returning the corresponding value that should replace it.
1561	#
1562		switch ($token{0}) {
1563			case "\\":
1564				return $this->hashPart("&#". ord($token{1}). ";");
1565			case "`":
1566				# Search for end marker in remaining text.
1567				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1568					$str, $matches))
1569				{
1570					$str = $matches[2];
1571					$codespan = $this->makeCodeSpan($matches[1]);
1572					return $this->hashPart($codespan);
1573				}
1574				return $token; // return as text since no ending marker found.
1575			default:
1576				return $this->hashPart($token);
1577		}
1578	}
1579
1580
1581	function outdent($text) {
1582	#
1583	# Remove one level of line-leading tabs or spaces
1584	#
1585		return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1586	}
1587
1588
1589	# String length function for detab. `_initDetab` will create a function to 
1590	# hanlde UTF-8 if the default function does not exist.
1591	var $utf8_strlen = 'mb_strlen';
1592	
1593	function detab($text) {
1594	#
1595	# Replace tabs with the appropriate amount of space.
1596	#
1597		# For each line we separate the line in blocks delemited by
1598		# tab characters. Then we reconstruct every line by adding the 
1599		# appropriate number of space between each blocks.
1600		
1601		$text = preg_replace_callback('/^.*\t.*$/m',
1602			array(&$this, '_detab_callback'), $text);
1603
1604		return $text;
1605	}
1606	function _detab_callback($matches) {
1607		$line = $matches[0];
1608		$strlen = $this->utf8_strlen; # strlen function for UTF-8.
1609		
1610		# Split in blocks.
1611		$blocks = explode("\t", $line);
1612		# Add each blocks to the line.
1613		$line = $blocks[0];
1614		unset($blocks[0]); # Do not add first block twice.
1615		foreach ($blocks as $block) {
1616			# Calculate amount of space, insert spaces, insert block.
1617			$amount = $this->tab_width - 
1618				$strlen($line, 'UTF-8') % $this->tab_width;
1619			$line .= str_repeat(" ", $amount) . $block;
1620		}
1621		return $line;
1622	}
1623	function _initDetab() {
1624	#
1625	# Check for the availability of the function in the `utf8_strlen` property
1626	# (initially `mb_strlen`). If the function is not available, create a 
1627	# function that will loosely count the number of UTF-8 characters with a
1628	# regular expression.
1629	#
1630		if (function_exists($this->utf8_strlen)) return;
1631		$this->utf8_strlen = create_function('$text', 'return preg_match_all(
1632			"/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1633			$text, $m);');
1634	}
1635
1636
1637	function unhash($text) {
1638	#
1639	# Swap back in all the tags hashed by _HashHTMLBlocks.
1640	#
1641		return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1642			array(&$this, '_unhash_callback'), $text);
1643	}
1644	function _unhash_callback($matches) {
1645		return $this->html_hashes[$matches[0]];
1646	}
1647
1648}
1649
1650
1651#
1652# Markdown Extra Parser Class
1653#
1654
1655class MarkdownExtra_Parser extends Markdown_Parser {
1656
1657	# Prefix for footnote ids.
1658	var $fn_id_prefix = "";
1659	
1660	# Optional title attribute for footnote links and backlinks.
1661	var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1662	var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1663	
1664	# Optional class attribute for footnote links and backlinks.
1665	var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1666	var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1667	
1668	# Predefined abbreviations.
1669	var $predef_abbr = array();
1670
1671
1672	function MarkdownExtra_Parser() {
1673	#
1674	# Constructor function. Initialize the parser object.
1675	#
1676		# Add extra escapable characters before parent constructor 
1677		# initialize the table.
1678		$this->escape_chars .= ':|';
1679		
1680		# Insert extra document, block, and span transformations. 
1681		# Parent constructor will do the sorting.
1682		$this->document_gamut += array(
1683			"doFencedCodeBlocks" => 5,
1684			"stripFootnotes"     => 15,
1685			"stripAbbreviations" => 25,
1686			"appendFootnotes"    => 50,
1687			);
1688		$this->block_gamut += array(
1689			"doFencedCodeBlocks" => 5,
1690			"doTables"           => 15,
1691			"doDefLists"         => 45,
1692			);
1693		$this->span_gamut += array(
1694			"doFootnotes"        => 5,
1695			"doAbbreviations"    => 70,
1696			);
1697		
1698		parent::Markdown_Parser();
1699	}
1700	
1701	
1702	# Extra variables used during extra transformations.
1703	var $footnotes = array();
1704	var $footnotes_ordered = array();
1705	var $abbr_desciptions = array();
1706	var $abbr_word_re = '';
1707	
1708	# Give the current footnote number.
1709	var $footnote_counter = 1;
1710	
1711	
1712	function setup() {
1713	#
1714	# Setting up Extra-specific variables.
1715	#
1716		parent::setup();
1717		
1718		$this->footnotes = array();
1719		$this->footnotes_ordered = array();
1720		$this->abbr_desciptions = array();
1721		$this->abbr_word_re = '';
1722		$this->footnote_counter = 1;
1723		
1724		foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1725			if ($this->abbr_word_re)
1726				$this->abbr_word_re .= '|';
1727			$this->abbr_word_re .= preg_quote($abbr_word);
1728			$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1729		}
1730	}
1731	
1732	function teardown() {
1733	#
1734	# Clearing Extra-specific variables.
1735	#
1736		$this->footnotes = array();
1737		$this->footnotes_ordered = array();
1738		$this->abbr_desciptions = array();
1739		$this->abbr_word_re = '';
1740		
1741		parent::teardown();
1742	}
1743	
1744	
1745	### HTML Block Parser ###
1746	
1747	# Tags that are always treated as block tags:
1748	var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1749	
1750	# Tags treated as block tags only if the opening tag is alone on it's line:
1751	var $context_block_tags_re = 'script|noscript|math|ins|del';
1752	
1753	# Tags where markdown="1" default to span mode:
1754	var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1755	
1756	# Tags which must not have their contents modified, no matter where 
1757	# they appear:
1758	var $clean_tags_re = 'script|math';
1759	
1760	# Tags that do not need to be closed.
1761	var $auto_close_tags_re = 'hr|img';
1762	
1763
1764	function hashHTMLBlocks($text) {
1765	#
1766	# Hashify HTML Blocks and "clean tags".
1767	#
1768	# We only want to do this for block-level HTML tags, such as headers,
1769	# lists, and tables. That's because we still want to wrap <p>s around
1770	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1771	# phrase emphasis, and spans. The list of tags we're looking for is
1772	# hard-coded.
1773	#
1774	# This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1775	# _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1776	# attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1777	#  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1778	# These two functions are calling each other. It'…

Large files files are truncated, but you can click here to view the full file