PageRenderTime 71ms CodeModel.GetById 11ms app.highlight 46ms RepoModel.GetById 2ms app.codeStats 0ms

/apps/wiki/lib/php_markdown/markdown.php

http://zoop.googlecode.com/
PHP | 1732 lines | 1143 code | 219 blank | 370 comment | 85 complexity | 62770c8a18c6f4c3707ebb065b1174ce MD5 | raw file
   1<?php
   2#
   3# Markdown  -  A text-to-HTML conversion tool for web writers
   4#
   5# PHP Markdown
   6# Copyright (c) 2004-2009 Michel Fortin  
   7# <http://michelf.com/projects/php-markdown/>
   8#
   9# Original Markdown
  10# Copyright (c) 2004-2006 John Gruber  
  11# <http://daringfireball.net/projects/markdown/>
  12#
  13
  14
  15define( 'MARKDOWN_VERSION',  "1.0.1n" ); # Sat 10 Oct 2009
  16
  17
  18#
  19# Global default settings:
  20#
  21
  22# Change to ">" for HTML output
  23@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  24
  25# Define the width of a tab for code blocks.
  26@define( 'MARKDOWN_TAB_WIDTH',     4 );
  27
  28
  29#
  30# WordPress settings:
  31#
  32
  33# Change to false to remove Markdown from posts and/or comments.
  34@define( 'MARKDOWN_WP_POSTS',      true );
  35@define( 'MARKDOWN_WP_COMMENTS',   true );
  36
  37
  38
  39### Standard Function Interface ###
  40
  41@define( 'MARKDOWN_PARSER_CLASS',  'Markdown_Parser' );
  42
  43function Markdown($text) {
  44#
  45# Initialize the parser and return the result of its transform method.
  46#
  47	# Setup static parser variable.
  48	static $parser;
  49	if (!isset($parser)) {
  50		$parser_class = MARKDOWN_PARSER_CLASS;
  51		$parser = new $parser_class;
  52	}
  53
  54	# Transform text using parser.
  55	return $parser->transform($text);
  56}
  57
  58
  59### WordPress Plugin Interface ###
  60
  61/*
  62Plugin Name: Markdown
  63Plugin URI: http://michelf.com/projects/php-markdown/
  64Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
  65Version: 1.0.1n
  66Author: Michel Fortin
  67Author URI: http://michelf.com/
  68*/
  69
  70if (isset($wp_version)) {
  71	# More details about how it works here:
  72	# <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  73	
  74	# Post content and excerpts
  75	# - Remove WordPress paragraph generator.
  76	# - Run Markdown on excerpt, then remove all tags.
  77	# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  78	if (MARKDOWN_WP_POSTS) {
  79		remove_filter('the_content',     'wpautop');
  80        remove_filter('the_content_rss', 'wpautop');
  81		remove_filter('the_excerpt',     'wpautop');
  82		add_filter('the_content',     'Markdown', 6);
  83        add_filter('the_content_rss', 'Markdown', 6);
  84		add_filter('get_the_excerpt', 'Markdown', 6);
  85		add_filter('get_the_excerpt', 'trim', 7);
  86		add_filter('the_excerpt',     'mdwp_add_p');
  87		add_filter('the_excerpt_rss', 'mdwp_strip_p');
  88		
  89		remove_filter('content_save_pre',  'balanceTags', 50);
  90		remove_filter('excerpt_save_pre',  'balanceTags', 50);
  91		add_filter('the_content',  	  'balanceTags', 50);
  92		add_filter('get_the_excerpt', 'balanceTags', 9);
  93	}
  94	
  95	# Comments
  96	# - Remove WordPress paragraph generator.
  97	# - Remove WordPress auto-link generator.
  98	# - Scramble important tags before passing them to the kses filter.
  99	# - Run Markdown on excerpt then remove paragraph tags.
 100	if (MARKDOWN_WP_COMMENTS) {
 101		remove_filter('comment_text', 'wpautop', 30);
 102		remove_filter('comment_text', 'make_clickable');
 103		add_filter('pre_comment_content', 'Markdown', 6);
 104		add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 105		add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 106		add_filter('get_comment_text',    'Markdown', 6);
 107		add_filter('get_comment_excerpt', 'Markdown', 6);
 108		add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 109	
 110		global $mdwp_hidden_tags, $mdwp_placeholders;
 111		$mdwp_hidden_tags = explode(' ',
 112			'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 113		$mdwp_placeholders = explode(' ', str_rot13(
 114			'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 115			'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 116	}
 117	
 118	function mdwp_add_p($text) {
 119		if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 120			$text = '<p>'.$text.'</p>';
 121			$text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 122		}
 123		return $text;
 124	}
 125	
 126	function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 127
 128	function mdwp_hide_tags($text) {
 129		global $mdwp_hidden_tags, $mdwp_placeholders;
 130		return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 131	}
 132	function mdwp_show_tags($text) {
 133		global $mdwp_hidden_tags, $mdwp_placeholders;
 134		return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 135	}
 136}
 137
 138
 139### bBlog Plugin Info ###
 140
 141function identify_modifier_markdown() {
 142	return array(
 143		'name'			=> 'markdown',
 144		'type'			=> 'modifier',
 145		'nicename'		=> 'Markdown',
 146		'description'	=> 'A text-to-HTML conversion tool for web writers',
 147		'authors'		=> 'Michel Fortin and John Gruber',
 148		'licence'		=> 'BSD-like',
 149		'version'		=> MARKDOWN_VERSION,
 150		'help'			=> '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>'
 151	);
 152}
 153
 154
 155### Smarty Modifier Interface ###
 156
 157function smarty_modifier_markdown($text) {
 158	return Markdown($text);
 159}
 160
 161
 162### Textile Compatibility Mode ###
 163
 164# Rename this file to "classTextile.php" and it can replace Textile everywhere.
 165
 166if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 167	# Try to include PHP SmartyPants. Should be in the same directory.
 168	@include_once 'smartypants.php';
 169	# Fake Textile class. It calls Markdown instead.
 170	class Textile {
 171		function TextileThis($text, $lite='', $encode='') {
 172			if ($lite == '' && $encode == '')    $text = Markdown($text);
 173			if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 174			return $text;
 175		}
 176		# Fake restricted version: restrictions are not supported for now.
 177		function TextileRestricted($text, $lite='', $noimage='') {
 178			return $this->TextileThis($text, $lite);
 179		}
 180		# Workaround to ensure compatibility with TextPattern 4.0.3.
 181		function blockLite($text) { return $text; }
 182	}
 183}
 184
 185
 186
 187#
 188# Markdown Parser Class
 189#
 190
 191class Markdown_Parser {
 192
 193	# Regex to match balanced [brackets].
 194	# Needed to insert a maximum bracked depth while converting to PHP.
 195	var $nested_brackets_depth = 6;
 196	var $nested_brackets_re;
 197	
 198	var $nested_url_parenthesis_depth = 4;
 199	var $nested_url_parenthesis_re;
 200
 201	# Table of hash values for escaped characters:
 202	var $escape_chars = '\`*_{}[]()>#+-.!';
 203	var $escape_chars_re;
 204
 205	# Change to ">" for HTML output.
 206	var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 207	var $tab_width = MARKDOWN_TAB_WIDTH;
 208	
 209	# Change to `true` to disallow markup or entities.
 210	var $no_markup = false;
 211	var $no_entities = false;
 212	
 213	# Predefined urls and titles for reference links and images.
 214	var $predef_urls = array();
 215	var $predef_titles = array();
 216
 217
 218	function Markdown_Parser() {
 219	#
 220	# Constructor function. Initialize appropriate member variables.
 221	#
 222		$this->_initDetab();
 223		$this->prepareItalicsAndBold();
 224	
 225		$this->nested_brackets_re = 
 226			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 227			str_repeat('\])*', $this->nested_brackets_depth);
 228	
 229		$this->nested_url_parenthesis_re = 
 230			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 231			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 232		
 233		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 234		
 235		# Sort document, block, and span gamut in ascendent priority order.
 236		asort($this->document_gamut);
 237		asort($this->block_gamut);
 238		asort($this->span_gamut);
 239	}
 240
 241
 242	# Internal hashes used during transformation.
 243	var $urls = array();
 244	var $titles = array();
 245	var $html_hashes = array();
 246	
 247	# Status flag to avoid invalid nesting.
 248	var $in_anchor = false;
 249	
 250	
 251	function setup() {
 252	#
 253	# Called before the transformation process starts to setup parser 
 254	# states.
 255	#
 256		# Clear global hashes.
 257		$this->urls = $this->predef_urls;
 258		$this->titles = $this->predef_titles;
 259		$this->html_hashes = array();
 260		
 261		$in_anchor = false;
 262	}
 263	
 264	function teardown() {
 265	#
 266	# Called after the transformation process to clear any variable 
 267	# which may be taking up memory unnecessarly.
 268	#
 269		$this->urls = array();
 270		$this->titles = array();
 271		$this->html_hashes = array();
 272	}
 273
 274
 275	function transform($text) {
 276	#
 277	# Main function. Performs some preprocessing on the input text
 278	# and pass it through the document gamut.
 279	#
 280		$this->setup();
 281	
 282		# Remove UTF-8 BOM and marker character in input, if present.
 283		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 284
 285		# Standardize line endings:
 286		#   DOS to Unix and Mac to Unix
 287		$text = preg_replace('{\r\n?}', "\n", $text);
 288
 289		# Make sure $text ends with a couple of newlines:
 290		$text .= "\n\n";
 291
 292		# Convert all tabs to spaces.
 293		$text = $this->detab($text);
 294
 295		# Turn block-level HTML blocks into hash entries
 296		$text = $this->hashHTMLBlocks($text);
 297
 298		# Strip any lines consisting only of spaces and tabs.
 299		# This makes subsequent regexen easier to write, because we can
 300		# match consecutive blank lines with /\n+/ instead of something
 301		# contorted like /[ ]*\n+/ .
 302		$text = preg_replace('/^[ ]+$/m', '', $text);
 303
 304		# Run document gamut methods.
 305		foreach ($this->document_gamut as $method => $priority) {
 306			$text = $this->$method($text);
 307		}
 308		
 309		$this->teardown();
 310
 311		return $text . "\n";
 312	}
 313	
 314	var $document_gamut = array(
 315		# Strip link definitions, store in hashes.
 316		"stripLinkDefinitions" => 20,
 317		
 318		"runBasicBlockGamut"   => 30,
 319		);
 320
 321
 322	function stripLinkDefinitions($text) {
 323	#
 324	# Strips link definitions from text, stores the URLs and titles in
 325	# hash references.
 326	#
 327		$less_than_tab = $this->tab_width - 1;
 328
 329		# Link defs are in the form: ^[id]: url "optional title"
 330		$text = preg_replace_callback('{
 331							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
 332							  [ ]*
 333							  \n?				# maybe *one* newline
 334							  [ ]*
 335							(?:
 336							  <(.+?)>			# url = $2
 337							|
 338							  (\S+?)			# url = $3
 339							)
 340							  [ ]*
 341							  \n?				# maybe one newline
 342							  [ ]*
 343							(?:
 344								(?<=\s)			# lookbehind for whitespace
 345								["(]
 346								(.*?)			# title = $4
 347								[")]
 348								[ ]*
 349							)?	# title is optional
 350							(?:\n+|\Z)
 351			}xm',
 352			array(&$this, '_stripLinkDefinitions_callback'),
 353			$text);
 354		return $text;
 355	}
 356	function _stripLinkDefinitions_callback($matches) {
 357		$link_id = strtolower($matches[1]);
 358		$url = $matches[2] == '' ? $matches[3] : $matches[2];
 359		$this->urls[$link_id] = $url;
 360		$this->titles[$link_id] =& $matches[4];
 361		return ''; # String that will replace the block
 362	}
 363
 364
 365	function hashHTMLBlocks($text) {
 366		if ($this->no_markup)  return $text;
 367
 368		$less_than_tab = $this->tab_width - 1;
 369
 370		# Hashify HTML blocks:
 371		# We only want to do this for block-level HTML tags, such as headers,
 372		# lists, and tables. That's because we still want to wrap <p>s around
 373		# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 374		# phrase emphasis, and spans. The list of tags we're looking for is
 375		# hard-coded:
 376		#
 377		# *  List "a" is made of tags which can be both inline or block-level.
 378		#    These will be treated block-level when the start tag is alone on 
 379		#    its line, otherwise they're not matched here and will be taken as 
 380		#    inline later.
 381		# *  List "b" is made of tags which are always block-level;
 382		#
 383		$block_tags_a_re = 'ins|del';
 384		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 385						   'script|noscript|form|fieldset|iframe|math';
 386
 387		# Regular expression for the content of a block tag.
 388		$nested_tags_level = 4;
 389		$attr = '
 390			(?>				# optional tag attributes
 391			  \s			# starts with whitespace
 392			  (?>
 393				[^>"/]+		# text outside quotes
 394			  |
 395				/+(?!>)		# slash not followed by ">"
 396			  |
 397				"[^"]*"		# text inside double quotes (tolerate ">")
 398			  |
 399				\'[^\']*\'	# text inside single quotes (tolerate ">")
 400			  )*
 401			)?	
 402			';
 403		$content =
 404			str_repeat('
 405				(?>
 406				  [^<]+			# content without tag
 407				|
 408				  <\2			# nested opening tag
 409					'.$attr.'	# attributes
 410					(?>
 411					  />
 412					|
 413					  >', $nested_tags_level).	# end of opening tag
 414					  '.*?'.					# last level nested tag content
 415			str_repeat('
 416					  </\2\s*>	# closing nested tag
 417					)
 418				  |				
 419					<(?!/\2\s*>	# other tags with a different name
 420				  )
 421				)*',
 422				$nested_tags_level);
 423		$content2 = str_replace('\2', '\3', $content);
 424
 425		# First, look for nested blocks, e.g.:
 426		# 	<div>
 427		# 		<div>
 428		# 		tags for inner block must be indented.
 429		# 		</div>
 430		# 	</div>
 431		#
 432		# The outermost tags must start at the left margin for this to match, and
 433		# the inner nested divs must be indented.
 434		# We need to do this before the next, more liberal match, because the next
 435		# match will start at the first `<div>` and stop at the first `</div>`.
 436		$text = preg_replace_callback('{(?>
 437			(?>
 438				(?<=\n\n)		# Starting after a blank line
 439				|				# or
 440				\A\n?			# the beginning of the doc
 441			)
 442			(						# save in $1
 443
 444			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
 445			  # in between.
 446					
 447						[ ]{0,'.$less_than_tab.'}
 448						<('.$block_tags_b_re.')# start tag = $2
 449						'.$attr.'>			# attributes followed by > and \n
 450						'.$content.'		# content, support nesting
 451						</\2>				# the matching end tag
 452						[ ]*				# trailing spaces/tabs
 453						(?=\n+|\Z)	# followed by a newline or end of document
 454
 455			| # Special version for tags of group a.
 456
 457						[ ]{0,'.$less_than_tab.'}
 458						<('.$block_tags_a_re.')# start tag = $3
 459						'.$attr.'>[ ]*\n	# attributes followed by >
 460						'.$content2.'		# content, support nesting
 461						</\3>				# the matching end tag
 462						[ ]*				# trailing spaces/tabs
 463						(?=\n+|\Z)	# followed by a newline or end of document
 464					
 465			| # Special case just for <hr />. It was easier to make a special 
 466			  # case than to make the other regex more complicated.
 467			
 468						[ ]{0,'.$less_than_tab.'}
 469						<(hr)				# start tag = $2
 470						'.$attr.'			# attributes
 471						/?>					# the matching end tag
 472						[ ]*
 473						(?=\n{2,}|\Z)		# followed by a blank line or end of document
 474			
 475			| # Special case for standalone HTML comments:
 476			
 477					[ ]{0,'.$less_than_tab.'}
 478					(?s:
 479						<!-- .*? -->
 480					)
 481					[ ]*
 482					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 483			
 484			| # PHP and ASP-style processor instructions (<? and <%)
 485			
 486					[ ]{0,'.$less_than_tab.'}
 487					(?s:
 488						<([?%])			# $2
 489						.*?
 490						\2>
 491					)
 492					[ ]*
 493					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 494					
 495			)
 496			)}Sxmi',
 497			array(&$this, '_hashHTMLBlocks_callback'),
 498			$text);
 499
 500		return $text;
 501	}
 502	function _hashHTMLBlocks_callback($matches) {
 503		$text = $matches[1];
 504		$key  = $this->hashBlock($text);
 505		return "\n\n$key\n\n";
 506	}
 507	
 508	
 509	function hashPart($text, $boundary = 'X') {
 510	#
 511	# Called whenever a tag must be hashed when a function insert an atomic 
 512	# element in the text stream. Passing $text to through this function gives
 513	# a unique text-token which will be reverted back when calling unhash.
 514	#
 515	# The $boundary argument specify what character should be used to surround
 516	# the token. By convension, "B" is used for block elements that needs not
 517	# to be wrapped into paragraph tags at the end, ":" is used for elements
 518	# that are word separators and "X" is used in the general case.
 519	#
 520		# Swap back any tag hash found in $text so we do not have to `unhash`
 521		# multiple times at the end.
 522		$text = $this->unhash($text);
 523		
 524		# Then hash the block.
 525		static $i = 0;
 526		$key = "$boundary\x1A" . ++$i . $boundary;
 527		$this->html_hashes[$key] = $text;
 528		return $key; # String that will replace the tag.
 529	}
 530
 531
 532	function hashBlock($text) {
 533	#
 534	# Shortcut function for hashPart with block-level boundaries.
 535	#
 536		return $this->hashPart($text, 'B');
 537	}
 538
 539
 540	var $block_gamut = array(
 541	#
 542	# These are all the transformations that form block-level
 543	# tags like paragraphs, headers, and list items.
 544	#
 545		"doHeaders"         => 10,
 546		"doHorizontalRules" => 20,
 547		
 548		"doLists"           => 40,
 549		"doCodeBlocks"      => 50,
 550		"doBlockQuotes"     => 60,
 551		);
 552
 553	function runBlockGamut($text) {
 554	#
 555	# Run block gamut tranformations.
 556	#
 557		# We need to escape raw HTML in Markdown source before doing anything 
 558		# else. This need to be done for each block, and not only at the 
 559		# begining in the Markdown function since hashed blocks can be part of
 560		# list items and could have been indented. Indented blocks would have 
 561		# been seen as a code block in a previous pass of hashHTMLBlocks.
 562		$text = $this->hashHTMLBlocks($text);
 563		
 564		return $this->runBasicBlockGamut($text);
 565	}
 566	
 567	function runBasicBlockGamut($text) {
 568	#
 569	# Run block gamut tranformations, without hashing HTML blocks. This is 
 570	# useful when HTML blocks are known to be already hashed, like in the first
 571	# whole-document pass.
 572	#
 573		foreach ($this->block_gamut as $method => $priority) {
 574			$text = $this->$method($text);
 575		}
 576		
 577		# Finally form paragraph and restore hashed blocks.
 578		$text = $this->formParagraphs($text);
 579
 580		return $text;
 581	}
 582	
 583	
 584	function doHorizontalRules($text) {
 585		# Do Horizontal Rules:
 586		return preg_replace(
 587			'{
 588				^[ ]{0,3}	# Leading space
 589				([-*_])		# $1: First marker
 590				(?>			# Repeated marker group
 591					[ ]{0,2}	# Zero, one, or two spaces.
 592					\1			# Marker character
 593				){2,}		# Group repeated at least twice
 594				[ ]*		# Tailing spaces
 595				$			# End of line.
 596			}mx',
 597			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
 598			$text);
 599	}
 600
 601
 602	var $span_gamut = array(
 603	#
 604	# These are all the transformations that occur *within* block-level
 605	# tags like paragraphs, headers, and list items.
 606	#
 607		# Process character escapes, code spans, and inline HTML
 608		# in one shot.
 609		"parseSpan"           => -30,
 610
 611		# Process anchor and image tags. Images must come first,
 612		# because ![foo][f] looks like an anchor.
 613		"doImages"            =>  10,
 614		"doAnchors"           =>  20,
 615		
 616		# Make links out of things like `<http://example.com/>`
 617		# Must come after doAnchors, because you can use < and >
 618		# delimiters in inline links like [this](<url>).
 619		"doAutoLinks"         =>  30,
 620		"encodeAmpsAndAngles" =>  40,
 621
 622		"doItalicsAndBold"    =>  50,
 623		"doHardBreaks"        =>  60,
 624		);
 625
 626	function runSpanGamut($text) {
 627	#
 628	# Run span gamut tranformations.
 629	#
 630		foreach ($this->span_gamut as $method => $priority) {
 631			$text = $this->$method($text);
 632		}
 633
 634		return $text;
 635	}
 636	
 637	
 638	function doHardBreaks($text) {
 639		# Do hard breaks:
 640		return preg_replace_callback('/ {2,}\n/', 
 641			array(&$this, '_doHardBreaks_callback'), $text);
 642	}
 643	function _doHardBreaks_callback($matches) {
 644		return $this->hashPart("<br$this->empty_element_suffix\n");
 645	}
 646
 647
 648	function doAnchors($text) {
 649	#
 650	# Turn Markdown link shortcuts into XHTML <a> tags.
 651	#
 652		if ($this->in_anchor) return $text;
 653		$this->in_anchor = true;
 654		
 655		#
 656		# First, handle reference-style links: [link text] [id]
 657		#
 658		$text = preg_replace_callback('{
 659			(					# wrap whole match in $1
 660			  \[
 661				('.$this->nested_brackets_re.')	# link text = $2
 662			  \]
 663
 664			  [ ]?				# one optional space
 665			  (?:\n[ ]*)?		# one optional newline followed by spaces
 666
 667			  \[
 668				(.*?)		# id = $3
 669			  \]
 670			)
 671			}xs',
 672			array(&$this, '_doAnchors_reference_callback'), $text);
 673
 674		#
 675		# Next, inline-style links: [link text](url "optional title")
 676		#
 677		$text = preg_replace_callback('{
 678			(				# wrap whole match in $1
 679			  \[
 680				('.$this->nested_brackets_re.')	# link text = $2
 681			  \]
 682			  \(			# literal paren
 683				[ \n]*
 684				(?:
 685					<(.+?)>	# href = $3
 686				|
 687					('.$this->nested_url_parenthesis_re.')	# href = $4
 688				)
 689				[ \n]*
 690				(			# $5
 691				  ([\'"])	# quote char = $6
 692				  (.*?)		# Title = $7
 693				  \6		# matching quote
 694				  [ \n]*	# ignore any spaces/tabs between closing quote and )
 695				)?			# title is optional
 696			  \)
 697			)
 698			}xs',
 699			array(&$this, '_doAnchors_inline_callback'), $text);
 700
 701		#
 702		# Last, handle reference-style shortcuts: [link text]
 703		# These must come last in case you've also got [link text][1]
 704		# or [link text](/foo)
 705		#
 706		$text = preg_replace_callback('{
 707			(					# wrap whole match in $1
 708			  \[
 709				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
 710			  \]
 711			)
 712			}xs',
 713			array(&$this, '_doAnchors_reference_callback'), $text);
 714
 715		$this->in_anchor = false;
 716		return $text;
 717	}
 718	function _doAnchors_reference_callback($matches) {
 719		$whole_match =  $matches[1];
 720		$link_text   =  $matches[2];
 721		$link_id     =& $matches[3];
 722
 723		if ($link_id == "") {
 724			# for shortcut links like [this][] or [this].
 725			$link_id = $link_text;
 726		}
 727		
 728		# lower-case and turn embedded newlines into spaces
 729		$link_id = strtolower($link_id);
 730		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 731
 732		if (isset($this->urls[$link_id])) {
 733			$url = $this->urls[$link_id];
 734			$url = $this->encodeAttribute($url);
 735			
 736			$result = "<a href=\"$url\"";
 737			if ( isset( $this->titles[$link_id] ) ) {
 738				$title = $this->titles[$link_id];
 739				$title = $this->encodeAttribute($title);
 740				$result .=  " title=\"$title\"";
 741			}
 742		
 743			$link_text = $this->runSpanGamut($link_text);
 744			$result .= ">$link_text</a>";
 745			$result = $this->hashPart($result);
 746		}
 747		else {
 748			$result = $whole_match;
 749		}
 750		return $result;
 751	}
 752	function _doAnchors_inline_callback($matches) {
 753		$whole_match	=  $matches[1];
 754		$link_text		=  $this->runSpanGamut($matches[2]);
 755		$url			=  $matches[3] == '' ? $matches[4] : $matches[3];
 756		$title			=& $matches[7];
 757
 758		$url = $this->encodeAttribute($url);
 759
 760		$result = "<a href=\"$url\"";
 761		if (isset($title)) {
 762			$title = $this->encodeAttribute($title);
 763			$result .=  " title=\"$title\"";
 764		}
 765		
 766		$link_text = $this->runSpanGamut($link_text);
 767		$result .= ">$link_text</a>";
 768
 769		return $this->hashPart($result);
 770	}
 771
 772
 773	function doImages($text) {
 774	#
 775	# Turn Markdown image shortcuts into <img> tags.
 776	#
 777		#
 778		# First, handle reference-style labeled images: ![alt text][id]
 779		#
 780		$text = preg_replace_callback('{
 781			(				# wrap whole match in $1
 782			  !\[
 783				('.$this->nested_brackets_re.')		# alt text = $2
 784			  \]
 785
 786			  [ ]?				# one optional space
 787			  (?:\n[ ]*)?		# one optional newline followed by spaces
 788
 789			  \[
 790				(.*?)		# id = $3
 791			  \]
 792
 793			)
 794			}xs', 
 795			array(&$this, '_doImages_reference_callback'), $text);
 796
 797		#
 798		# Next, handle inline images:  ![alt text](url "optional title")
 799		# Don't forget: encode * and _
 800		#
 801		$text = preg_replace_callback('{
 802			(				# wrap whole match in $1
 803			  !\[
 804				('.$this->nested_brackets_re.')		# alt text = $2
 805			  \]
 806			  \s?			# One optional whitespace character
 807			  \(			# literal paren
 808				[ \n]*
 809				(?:
 810					<(\S*)>	# src url = $3
 811				|
 812					('.$this->nested_url_parenthesis_re.')	# src url = $4
 813				)
 814				[ \n]*
 815				(			# $5
 816				  ([\'"])	# quote char = $6
 817				  (.*?)		# title = $7
 818				  \6		# matching quote
 819				  [ \n]*
 820				)?			# title is optional
 821			  \)
 822			)
 823			}xs',
 824			array(&$this, '_doImages_inline_callback'), $text);
 825
 826		return $text;
 827	}
 828	function _doImages_reference_callback($matches) {
 829		$whole_match = $matches[1];
 830		$alt_text    = $matches[2];
 831		$link_id     = strtolower($matches[3]);
 832
 833		if ($link_id == "") {
 834			$link_id = strtolower($alt_text); # for shortcut links like ![this][].
 835		}
 836
 837		$alt_text = $this->encodeAttribute($alt_text);
 838		if (isset($this->urls[$link_id])) {
 839			$url = $this->encodeAttribute($this->urls[$link_id]);
 840			$result = "<img src=\"$url\" alt=\"$alt_text\"";
 841			if (isset($this->titles[$link_id])) {
 842				$title = $this->titles[$link_id];
 843				$title = $this->encodeAttribute($title);
 844				$result .=  " title=\"$title\"";
 845			}
 846			$result .= $this->empty_element_suffix;
 847			$result = $this->hashPart($result);
 848		}
 849		else {
 850			# If there's no such link ID, leave intact:
 851			$result = $whole_match;
 852		}
 853
 854		return $result;
 855	}
 856	function _doImages_inline_callback($matches) {
 857		$whole_match	= $matches[1];
 858		$alt_text		= $matches[2];
 859		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
 860		$title			=& $matches[7];
 861
 862		$alt_text = $this->encodeAttribute($alt_text);
 863		$url = $this->encodeAttribute($url);
 864		$result = "<img src=\"$url\" alt=\"$alt_text\"";
 865		if (isset($title)) {
 866			$title = $this->encodeAttribute($title);
 867			$result .=  " title=\"$title\""; # $title already quoted
 868		}
 869		$result .= $this->empty_element_suffix;
 870
 871		return $this->hashPart($result);
 872	}
 873
 874
 875	function doHeaders($text) {
 876		# Setext-style headers:
 877		#	  Header 1
 878		#	  ========
 879		#  
 880		#	  Header 2
 881		#	  --------
 882		#
 883		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 884			array(&$this, '_doHeaders_callback_setext'), $text);
 885
 886		# atx-style headers:
 887		#	# Header 1
 888		#	## Header 2
 889		#	## Header 2 with closing hashes ##
 890		#	...
 891		#	###### Header 6
 892		#
 893		$text = preg_replace_callback('{
 894				^(\#{1,6})	# $1 = string of #\'s
 895				[ ]*
 896				(.+?)		# $2 = Header text
 897				[ ]*
 898				\#*			# optional closing #\'s (not counted)
 899				\n+
 900			}xm',
 901			array(&$this, '_doHeaders_callback_atx'), $text);
 902
 903		return $text;
 904	}
 905	function _doHeaders_callback_setext($matches) {
 906		# Terrible hack to check we haven't found an empty list item.
 907		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 908			return $matches[0];
 909		
 910		$level = $matches[2]{0} == '=' ? 1 : 2;
 911		$block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 912		return "\n" . $this->hashBlock($block) . "\n\n";
 913	}
 914	function _doHeaders_callback_atx($matches) {
 915		$level = strlen($matches[1]);
 916		$block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 917		return "\n" . $this->hashBlock($block) . "\n\n";
 918	}
 919
 920
 921	function doLists($text) {
 922	#
 923	# Form HTML ordered (numbered) and unordered (bulleted) lists.
 924	#
 925		$less_than_tab = $this->tab_width - 1;
 926
 927		# Re-usable patterns to match list item bullets and number markers:
 928		$marker_ul_re  = '[*+-]';
 929		$marker_ol_re  = '\d+[.]';
 930		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 931
 932		$markers_relist = array(
 933			$marker_ul_re => $marker_ol_re,
 934			$marker_ol_re => $marker_ul_re,
 935			);
 936
 937		foreach ($markers_relist as $marker_re => $other_marker_re) {
 938			# Re-usable pattern to match any entirel ul or ol list:
 939			$whole_list_re = '
 940				(								# $1 = whole list
 941				  (								# $2
 942					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
 943					('.$marker_re.')			# $4 = first list item marker
 944					[ ]+
 945				  )
 946				  (?s:.+?)
 947				  (								# $5
 948					  \z
 949					|
 950					  \n{2,}
 951					  (?=\S)
 952					  (?!						# Negative lookahead for another list item marker
 953						[ ]*
 954						'.$marker_re.'[ ]+
 955					  )
 956					|
 957					  (?=						# Lookahead for another kind of list
 958					    \n
 959						\3						# Must have the same indentation
 960						'.$other_marker_re.'[ ]+
 961					  )
 962				  )
 963				)
 964			'; // mx
 965			
 966			# We use a different prefix before nested lists than top-level lists.
 967			# See extended comment in _ProcessListItems().
 968		
 969			if ($this->list_level) {
 970				$text = preg_replace_callback('{
 971						^
 972						'.$whole_list_re.'
 973					}mx',
 974					array(&$this, '_doLists_callback'), $text);
 975			}
 976			else {
 977				$text = preg_replace_callback('{
 978						(?:(?<=\n)\n|\A\n?) # Must eat the newline
 979						'.$whole_list_re.'
 980					}mx',
 981					array(&$this, '_doLists_callback'), $text);
 982			}
 983		}
 984
 985		return $text;
 986	}
 987	function _doLists_callback($matches) {
 988		# Re-usable patterns to match list item bullets and number markers:
 989		$marker_ul_re  = '[*+-]';
 990		$marker_ol_re  = '\d+[.]';
 991		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 992		
 993		$list = $matches[1];
 994		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 995		
 996		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 997		
 998		$list .= "\n";
 999		$result = $this->processListItems($list, $marker_any_re);
1000		
1001		$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1002		return "\n". $result ."\n\n";
1003	}
1004
1005	var $list_level = 0;
1006
1007	function processListItems($list_str, $marker_any_re) {
1008	#
1009	#	Process the contents of a single ordered or unordered list, splitting it
1010	#	into individual list items.
1011	#
1012		# The $this->list_level global keeps track of when we're inside a list.
1013		# Each time we enter a list, we increment it; when we leave a list,
1014		# we decrement. If it's zero, we're not in a list anymore.
1015		#
1016		# We do this because when we're not inside a list, we want to treat
1017		# something like this:
1018		#
1019		#		I recommend upgrading to version
1020		#		8. Oops, now this line is treated
1021		#		as a sub-list.
1022		#
1023		# As a single paragraph, despite the fact that the second line starts
1024		# with a digit-period-space sequence.
1025		#
1026		# Whereas when we're inside a list (or sub-list), that line will be
1027		# treated as the start of a sub-list. What a kludge, huh? This is
1028		# an aspect of Markdown's syntax that's hard to parse perfectly
1029		# without resorting to mind-reading. Perhaps the solution is to
1030		# change the syntax rules such that sub-lists must start with a
1031		# starting cardinal number; e.g. "1." or "a.".
1032		
1033		$this->list_level++;
1034
1035		# trim trailing blank lines:
1036		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1037
1038		$list_str = preg_replace_callback('{
1039			(\n)?							# leading line = $1
1040			(^[ ]*)							# leading whitespace = $2
1041			('.$marker_any_re.'				# list marker and space = $3
1042				(?:[ ]+|(?=\n))	# space only required if item is not empty
1043			)
1044			((?s:.*?))						# list item text   = $4
1045			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1046			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1047			}xm',
1048			array(&$this, '_processListItems_callback'), $list_str);
1049
1050		$this->list_level--;
1051		return $list_str;
1052	}
1053	function _processListItems_callback($matches) {
1054		$item = $matches[4];
1055		$leading_line =& $matches[1];
1056		$leading_space =& $matches[2];
1057		$marker_space = $matches[3];
1058		$tailing_blank_line =& $matches[5];
1059
1060		if ($leading_line || $tailing_blank_line || 
1061			preg_match('/\n{2,}/', $item))
1062		{
1063			# Replace marker with the appropriate whitespace indentation
1064			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1065			$item = $this->runBlockGamut($this->outdent($item)."\n");
1066		}
1067		else {
1068			# Recursion for sub-lists:
1069			$item = $this->doLists($this->outdent($item));
1070			$item = preg_replace('/\n+$/', '', $item);
1071			$item = $this->runSpanGamut($item);
1072		}
1073
1074		return "<li>" . $item . "</li>\n";
1075	}
1076
1077
1078	function doCodeBlocks($text) {
1079	#
1080	#	Process Markdown `<pre><code>` blocks.
1081	#
1082		$text = preg_replace_callback('{
1083				(?:\n\n|\A\n?)
1084				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1085				  (?>
1086					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1087					.*\n+
1088				  )+
1089				)
1090				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1091			}xm',
1092			array(&$this, '_doCodeBlocks_callback'), $text);
1093
1094		return $text;
1095	}
1096	function _doCodeBlocks_callback($matches) {
1097		$codeblock = $matches[1];
1098
1099		$codeblock = $this->outdent($codeblock);
1100		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1101
1102		# trim leading newlines and trailing newlines
1103		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1104
1105		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1106		return "\n\n".$this->hashBlock($codeblock)."\n\n";
1107	}
1108
1109
1110	function makeCodeSpan($code) {
1111	#
1112	# Create a code span markup for $code. Called from handleSpanToken.
1113	#
1114		$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1115		return $this->hashPart("<code>$code</code>");
1116	}
1117
1118
1119	var $em_relist = array(
1120		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',
1121		'*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
1122		'_' => '(?<=\S|^)(?<!_)_(?!_)',
1123		);
1124	var $strong_relist = array(
1125		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',
1126		'**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
1127		'__' => '(?<=\S|^)(?<!_)__(?!_)',
1128		);
1129	var $em_strong_relist = array(
1130		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',
1131		'***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
1132		'___' => '(?<=\S|^)(?<!_)___(?!_)',
1133		);
1134	var $em_strong_prepared_relist;
1135	
1136	function prepareItalicsAndBold() {
1137	#
1138	# Prepare regular expressions for searching emphasis tokens in any
1139	# context.
1140	#
1141		foreach ($this->em_relist as $em => $em_re) {
1142			foreach ($this->strong_relist as $strong => $strong_re) {
1143				# Construct list of allowed token expressions.
1144				$token_relist = array();
1145				if (isset($this->em_strong_relist["$em$strong"])) {
1146					$token_relist[] = $this->em_strong_relist["$em$strong"];
1147				}
1148				$token_relist[] = $em_re;
1149				$token_relist[] = $strong_re;
1150				
1151				# Construct master expression from list.
1152				$token_re = '{('. implode('|', $token_relist) .')}';
1153				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1154			}
1155		}
1156	}
1157	
1158	function doItalicsAndBold($text) {
1159		$token_stack = array('');
1160		$text_stack = array('');
1161		$em = '';
1162		$strong = '';
1163		$tree_char_em = false;
1164		
1165		while (1) {
1166			#
1167			# Get prepared regular expression for seraching emphasis tokens
1168			# in current context.
1169			#
1170			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1171			
1172			#
1173			# Each loop iteration search for the next emphasis token. 
1174			# Each token is then passed to handleSpanToken.
1175			#
1176			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1177			$text_stack[0] .= $parts[0];
1178			$token =& $parts[1];
1179			$text =& $parts[2];
1180			
1181			if (empty($token)) {
1182				# Reached end of text span: empty stack without emitting.
1183				# any more emphasis.
1184				while ($token_stack[0]) {
1185					$text_stack[1] .= array_shift($token_stack);
1186					$text_stack[0] .= array_shift($text_stack);
1187				}
1188				break;
1189			}
1190			
1191			$token_len = strlen($token);
1192			if ($tree_char_em) {
1193				# Reached closing marker while inside a three-char emphasis.
1194				if ($token_len == 3) {
1195					# Three-char closing marker, close em and strong.
1196					array_shift($token_stack);
1197					$span = array_shift($text_stack);
1198					$span = $this->runSpanGamut($span);
1199					$span = "<strong><em>$span</em></strong>";
1200					$text_stack[0] .= $this->hashPart($span);
1201					$em = '';
1202					$strong = '';
1203				} else {
1204					# Other closing marker: close one em or strong and
1205					# change current token state to match the other
1206					$token_stack[0] = str_repeat($token{0}, 3-$token_len);
1207					$tag = $token_len == 2 ? "strong" : "em";
1208					$span = $text_stack[0];
1209					$span = $this->runSpanGamut($span);
1210					$span = "<$tag>$span</$tag>";
1211					$text_stack[0] = $this->hashPart($span);
1212					$$tag = ''; # $$tag stands for $em or $strong
1213				}
1214				$tree_char_em = false;
1215			} else if ($token_len == 3) {
1216				if ($em) {
1217					# Reached closing marker for both em and strong.
1218					# Closing strong marker:
1219					for ($i = 0; $i < 2; ++$i) {
1220						$shifted_token = array_shift($token_stack);
1221						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1222						$span = array_shift($text_stack);
1223						$span = $this->runSpanGamut($span);
1224						$span = "<$tag>$span</$tag>";
1225						$text_stack[0] .= $this->hashPart($span);
1226						$$tag = ''; # $$tag stands for $em or $strong
1227					}
1228				} else {
1229					# Reached opening three-char emphasis marker. Push on token 
1230					# stack; will be handled by the special condition above.
1231					$em = $token{0};
1232					$strong = "$em$em";
1233					array_unshift($token_stack, $token);
1234					array_unshift($text_stack, '');
1235					$tree_char_em = true;
1236				}
1237			} else if ($token_len == 2) {
1238				if ($strong) {
1239					# Unwind any dangling emphasis marker:
1240					if (strlen($token_stack[0]) == 1) {
1241						$text_stack[1] .= array_shift($token_stack);
1242						$text_stack[0] .= array_shift($text_stack);
1243					}
1244					# Closing strong marker:
1245					array_shift($token_stack);
1246					$span = array_shift($text_stack);
1247					$span = $this->runSpanGamut($span);
1248					$span = "<strong>$span</strong>";
1249					$text_stack[0] .= $this->hashPart($span);
1250					$strong = '';
1251				} else {
1252					array_unshift($token_stack, $token);
1253					array_unshift($text_stack, '');
1254					$strong = $token;
1255				}
1256			} else {
1257				# Here $token_len == 1
1258				if ($em) {
1259					if (strlen($token_stack[0]) == 1) {
1260						# Closing emphasis marker:
1261						array_shift($token_stack);
1262						$span = array_shift($text_stack);
1263						$span = $this->runSpanGamut($span);
1264						$span = "<em>$span</em>";
1265						$text_stack[0] .= $this->hashPart($span);
1266						$em = '';
1267					} else {
1268						$text_stack[0] .= $token;
1269					}
1270				} else {
1271					array_unshift($token_stack, $token);
1272					array_unshift($text_stack, '');
1273					$em = $token;
1274				}
1275			}
1276		}
1277		return $text_stack[0];
1278	}
1279
1280
1281	function doBlockQuotes($text) {
1282		$text = preg_replace_callback('/
1283			  (								# Wrap whole match in $1
1284				(?>
1285				  ^[ ]*>[ ]?			# ">" at the start of a line
1286					.+\n					# rest of the first line
1287				  (.+\n)*					# subsequent consecutive lines
1288				  \n*						# blanks
1289				)+
1290			  )
1291			/xm',
1292			array(&$this, '_doBlockQuotes_callback'), $text);
1293
1294		return $text;
1295	}
1296	function _doBlockQuotes_callback($matches) {
1297		$bq = $matches[1];
1298		# trim one level of quoting - trim whitespace-only lines
1299		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1300		$bq = $this->runBlockGamut($bq);		# recurse
1301
1302		$bq = preg_replace('/^/m', "  ", $bq);
1303		# These leading spaces cause problem with <pre> content, 
1304		# so we need to fix that:
1305		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1306			array(&$this, '_doBlockQuotes_callback2'), $bq);
1307
1308		return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1309	}
1310	function _doBlockQuotes_callback2($matches) {
1311		$pre = $matches[1];
1312		$pre = preg_replace('/^  /m', '', $pre);
1313		return $pre;
1314	}
1315
1316
1317	function formParagraphs($text) {
1318	#
1319	#	Params:
1320	#		$text - string to process with html <p> tags
1321	#
1322		# Strip leading and trailing lines:
1323		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1324
1325		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1326
1327		#
1328		# Wrap <p> tags and unhashify HTML blocks
1329		#
1330		foreach ($grafs as $key => $value) {
1331			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1332				# Is a paragraph.
1333				$value = $this->runSpanGamut($value);
1334				$value = preg_replace('/^([ ]*)/', "<p>", $value);
1335				$value .= "</p>";
1336				$grafs[$key] = $this->unhash($value);
1337			}
1338			else {
1339				# Is a block.
1340				# Modify elements of @grafs in-place...
1341				$graf = $value;
1342				$block = $this->html_hashes[$graf];
1343				$graf = $block;
1344//				if (preg_match('{
1345//					\A
1346//					(							# $1 = <div> tag
1347//					  <div  \s+
1348//					  [^>]*
1349//					  \b
1350//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1351//					  1
1352//					  \2
1353//					  [^>]*
1354//					  >
1355//					)
1356//					(							# $3 = contents
1357//					.*
1358//					)
1359//					(</div>)					# $4 = closing tag
1360//					\z
1361//					}xs', $block, $matches))
1362//				{
1363//					list(, $div_open, , $div_content, $div_close) = $matches;
1364//
1365//					# We can't call Markdown(), because that resets the hash;
1366//					# that initialization code should be pulled into its own sub, though.
1367//					$div_content = $this->hashHTMLBlocks($div_content);
1368//					
1369//					# Run document gamut methods on the content.
1370//					foreach ($this->document_gamut as $method => $priority) {
1371//						$div_content = $this->$method($div_content);
1372//					}
1373//
1374//					$div_open = preg_replace(
1375//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1376//
1377//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1378//				}
1379				$grafs[$key] = $graf;
1380			}
1381		}
1382
1383		return implode("\n\n", $grafs);
1384	}
1385
1386
1387	function encodeAttribute($text) {
1388	#
1389	# Encode text for a double-quoted HTML attribute. This function
1390	# is *not* suitable for attributes enclosed in single quotes.
1391	#
1392		$text = $this->encodeAmpsAndAngles($text);
1393		$text = str_replace('"', '&quot;', $text);
1394		return $text;
1395	}
1396	
1397	
1398	function encodeAmpsAndAngles($text) {
1399	#
1400	# Smart processing for ampersands and angle brackets that need to 
1401	# be encoded. Valid character entities are left alone unless the
1402	# no-entities mode is set.
1403	#
1404		if ($this->no_entities) {
1405			$text = str_replace('&', '&amp;', $text);
1406		} else {
1407			# Ampersand-encoding based entirely on Nat Irons's Amputator
1408			# MT plugin: <http://bumppo.net/projects/amputator/>
1409			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1410								'&amp;', $text);;
1411		}
1412		# Encode remaining <'s
1413		$text = str_replace('<', '&lt;', $text);
1414
1415		return $text;
1416	}
1417
1418
1419	function doAutoLinks($text) {
1420		$text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1421			array(&$this, '_doAutoLinks_url_callback'), $text);
1422
1423		# Email addresses: <address@domain.foo>
1424		$text = preg_replace_callback('{
1425			<
1426			(?:mailto:)?
1427			(
1428				(?:
1429					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1430				|
1431					".*?"
1432				)
1433				\@
1434				(?:
1435					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1436				|
1437					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1438				)
1439			)
1440			>
1441			}xi',
1442			array(&$this, '_doAutoLinks_email_callback'), $text);
1443
1444		return $text;
1445	}
1446	function _doAutoLinks_url_callback($matches) {
1447		$url = $this->encodeAttribute($matches[1]);
1448		$link = "<a href=\"$url\">$url</a>";
1449		return $this->hashPart($link);
1450	}
1451	function _doAutoLinks_email_callback($matches) {
1452		$address = $matches[1];
1453		$link = $this->encodeEmailAddress($address);
1454		return $this->hashPart($link);
1455	}
1456
1457
1458	function encodeEmailAddress($addr) {
1459	#
1460	#	Input: an email address, e.g. "foo@example.com"
1461	#
1462	#	Output: the email address as a mailto link, with each character
1463	#		of the address encoded as either a decimal or hex entity, in
1464	#		the hopes of foiling most address harvesting spam bots. E.g.:
1465	#
1466	#	  <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1467	#        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1468	#        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1469	#        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1470	#
1471	#	Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1472	#   With some optimizations by Milian Wolff.
1473	#
1474		$addr = "mailto:" . $addr;
1475		$chars = preg_split('/(?<!^)(?!$)/', $addr);
1476		$seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1477		
1478		foreach ($chars as $key => $char) {
1479			$ord = ord($char);
1480			# Ignore non-ascii chars.
1481			if ($ord < 128) {
1482				$r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1483				# roughly 10% raw, 45% hex, 45% dec
1484				# '@' *must* be encoded. I insist.
1485				if ($r > 90 && $char != '@') /* do nothing */;
1486				else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1487				else              $chars[$key] = '&#'.$ord.';';
1488			}
1489		}
1490		
1491		$addr = implode('', $chars);
1492		$text = implode('', array_slice($chars, 7)); # text without `mailto:`
1493		$addr = "<a href=\"$addr\">$text</a>";
1494
1495		return $addr;
1496	}
1497
1498
1499	function parseSpan($str) {
1500	#
1501	# Take the string $str and parse it into tokens, hashing embeded HTML,
1502	# escaped characters and handling code spans.
1503	#
1504		$output = '';
1505		
1506		$span_re = '{
1507				(
1508					\\\\'.$this->escape_chars_re.'
1509				|
1510					(?<![`\\\\])
1511					`+						# code span marker
1512			'.( $this->no_markup ? '' : '
1513				|
1514					<!--    .*?     -->		# comment
1515				|
1516					<\?.*?\?> | <%.*?%>		# processing instruction
1517				|
1518					<[/!$]?[-a-zA-Z0-9:_]+	# regular tags
1519					(?>
1520						\s
1521						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1522					)?
1523					>
1524			').'
1525				)
1526				}xs';
1527
1528		while (1) {
1529			#
1530			# Each loop iteration seach for either the next tag, the next 
1531			# openning code span marker, or the next escaped character. 
1532			# Each token is then passed to handleSpanToken.
1533			#
1534			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1535			
1536			# Create token from text preceding tag.
1537			if ($parts[0] != "") {
1538				$output .= $parts[0];
1539			}
1540			
1541			# Check if we reach the end.
1542			if (isset($parts[1])) {
1543				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1544				$str = $parts[2];
1545			}
1546			else {
1547				break;
1548			}
1549		}
1550		
1551		return $output;
1552	}
1553	
1554	
1555	function handleSpanToken($token, &$str) {
1556	#
1557	# Handle $token provided by parseSpan by determining its nature and 
1558	# returning the corresponding value that should replace it.
1559	#
1560		switch ($token{0}) {
1561			case "\\":
1562				return $this->hashPart("&#". ord($token{1}). ";");
1563			case "`":
1564				# Search for end marker in remaining text.
1565				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1566					$str, $matches))
1567				{
1568					$str = $matches[2];
1569					$codespan = $this->makeCodeSpan($matches[1]);
1570					return $this->hashPart($codespan);
1571				}
1572				return $token; // return as text since no ending marker found.
1573			default:
1574				return $this->hashPart($token);
1575		}
1576	}
1577
1578
1579	function outdent($text) {
1580	#
1581	# Remove one level of line-leading tabs or spaces
1582	#
1583		return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1584	}
1585
1586
1587	# String length function for detab. `_initDetab` will create a function to 
1588	# hanlde UTF-8 if the default function does not exist.
1589	var $utf8_strlen = 'mb_strlen';
1590	
1591	function detab($text) {
1592	#
1593	# Replace tabs with the appropriate amount of space.
1594	#
1595		# For each line we separate the line in blocks delemited by
1596		# tab characters. Then we reconstruct every line by adding the 
1597		# appropriate number of space between each blocks.
1598		
1599		$text = preg_replace_callback('/^.*\t.*$/m',
1600			array(&$this, '_detab_callback'), $text);
1601
1602		return $text;
1603	}
1604	function _detab_callback($matches) {
1605		$line = $matches[0];
1606		$strlen = $this->utf8_strlen; # strlen function for UTF-8.
1607		
1608		# Split in blocks.
1609		$blocks = explode("\t", $line);
1610		# Add each blocks to the line.
1611		$line = $blocks[0];
1612		unset($blocks[0]); # Do not add first block twice.
1613		foreach ($blocks as $block) {
1614			# Calculate amount of space, insert spaces, insert block.
1615			$amount = $this->tab_width - 
1616				$strlen($line, 'UTF-8') % $this->tab_width;
1617			$line .= str_repeat(" ", $amount) . $block;
1618		}
1619		return $line;
1620	}
1621	function _initDetab() {
1622	#
1623	# Check for the availability of the function in the `utf8_strlen` property
1624	# (initially `mb_strlen`). If the function is not available, create a 
1625	# function that will loosely count the number of UTF-8 characters with a
1626	# regular expression.
1627	#
1628		if (function_exists($this->utf8_strlen)) return;
1629		$this->utf8_strlen = create_function('$text', 'return preg_match_all(
1630			"/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1631			$text, $m);');
1632	}
1633
1634
1635	function unhash($text) {
1636	#
1637	# Swap back in all the tags hashed by _HashHTMLBlocks.
1638	#
1639		return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1640			array(&$this, '_unhash_callback'), $text);
1641	}
1642	function _unhash_callback($matches) {
1643		return $this->html_hashes[$matches[0]];
1644	}
1645
1646}
1647
1648/*
1649
1650PHP Markdown
1651============
1652
1653Description
1654-----------
1655
1656This is a PHP translation of the original Markdown formatter written in
1657Perl by John Gruber.
1658
1659Markdown is a text-to-HTML filter; it translates an easy-to-read /
1660easy-to-write structured text format into HTML. Markdown's text format
1661is most similar to that of plain text email, and supports features such
1662as headers, *emphasis*, code blocks, blockquotes, and links.
1663
1664Markdown's syntax is designed not as a generic markup language, but
1665specifically to serve as a front-end to (X)HTML. You can use span-level
1666HTML tags anywhere in a Markdown document, and you can use block level
1667HTML tags (like <div> and <table> as well).
1668
1669For more information about Markdown's syntax, see:
1670
1671<http://daringfireball.net/projects/markdown/>
1672
1673
1674Bugs
1675----
1676
1677To file bug reports please send email to:
1678
1679<michel.fortin@michelf.com>
1680
1681Please include with your report: (1) the example input; (2) the output you
1682expected; (3) the output Markdown actually produced.
1683
1684
1685Version History
1686--------------- 
1687
1688See the readme file for detailed release notes for this version.
1689
1690
1691Copyright and License
1692---------------------
1693
1694PHP Markdown
1695Copyright (c) 2004-2009 Michel Fortin  
1696<http://michelf.com/>  
1697All rights reserved.
1698
1699Based on Markdown
1700Copyright (c) 2003-2006 John Gruber   
1701<http://daringfireball.net/>   
1702All rights reserved.
1703
1704Redistribution and use in source and binary forms, with or without
1705modification, are permitted provided that the following conditions are
1706met:
1707
1708*	Redistributions of source code must retain the above copyright notice,
1709	this list of conditions and the following disclaimer.
1710
1711*	Redistributions in binary form must reproduce the above copyright
1712	notice, this list of conditions and the following disclaimer in the
1713	documentation and/or other materials provided with the distribution.
1714
1715*	Neither the name "Markdown" nor the names of its contributors may
1716	be used to endorse or promote products derived from this software
1717	without specific prior written permission.
1718
1719This software is provided by the copyright holders and contributors "as
1720is" and any express or implied warranties, including, but not limited
1721to, the implied warranties of merchantability and fitness for a
1722particular purpose are disclaimed. In no event shall the copyright owner
1723or contributors be liable for any direct, indirect, incidental, special,
1724exemplary, or consequential damages (including, but not limited to,
1725procurement of substitute goods or services; loss of use, data, or
1726profits; or business interruption) however caused and on any theory of
1727liability, whether in contract, strict liability, or tort (including
1728negligence or otherwise) arising in any way out of the use of this
1729software, even if advised of the possibility of such damage.
1730
1731*/
1732?>