/parser/wp-mediawiki.php
PHP | 719 lines | 489 code | 94 blank | 136 comment | 56 complexity | a4501046d86c20aab423bb350f47d095 MD5 | raw file
- <?php
- namespace QuestPC;
- /*
- Plugin Name: MediaWiki Markup for WordPress
- Plugin URI: http://zechs.dyndns.org/wordpress/?page_id=126
- Description: Add a subset of MediaWiki markups to WordPress
- Version: 0.0.8
- Author: Ming-Hsien Tsai
- Author URI: http://zechs.dyndns.org/wordpress/
- */
- /* Copyright 2006 Ming-Hsien Tsai (email : mhtsai208@gmail.com)
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
- class WikiText {
- const LIST_TYPE_OL = "#";
- const LIST_TYPE_UL = "*";
- const LIST_TYPE_INDENT = ":";
- const LIST_TYPE_DEFINITION = ";";
- private $headings = array(); // all headings in the post
- private $id_suffix = array(); // the suffix to be appended after IDs of duplicate headings
- private $options = array(
- 'wpwiki_wiki_site' => null,
- 'wpwiki_page_title' => null,
- 'wpwiki_add_prefix_for_id' => true,
- 'wpwiki_toc_mode' => 'tag',
- 'wpwiki_toc_title' => 'Table of Content',
- # next setting enables <a> and <img> in <nowiki> and <pre>
- 'wpwiki_url_backward' => true,
- # use string value to override default "<hr />"
- 'wpwiki_hr_tag' => false
- ); // options
- private $auto_number = 0; // the counter used in anonymous external links
- private $table_level = 0;
- private $pagenum = 1; // the page number of a multipaged post
- private $list_stack = array();
- private $rmap = array();
- private $regex = array ( // the regular expressions of all rules
- 'newline' => array ('/(\r\n|\r)/', 'newline_callback'),
- 'encode' => array ('/.*/sm', 'encode'),
- 'horizontal' => array ('/^----$/m', 'horizontal_callback'),
- 'headings' => array ('/^(={1,6})(.*?)\1(?:\s|$)$/m', 'heading_callback'),
- 'lists' => array ('/\n((?:\*|#|\;|\:)+.*?\n(?!(?:\*|#|\;|\:)+))/s', 'list_callback'),
- 'preformatted' => array ('/((\n .*)+)/', 'preformatted_callback'),
- 'tables' => array ('#^\{\|(.*?)(?:^\|\+(.*?))?(^(?:((?R))|.)*?)^\|}#msi', 'table_callback'),
- 'external_links' => array ("/(\[)?((http\:\/\/|https\:\/\/|ftp\:\/\/|gopher\:\/\/|news\:\/\/)[\w|\d|\.|_|\-]+[A-Za-z0-9\/?=&%~_\-\.\:#;',]*)(?(1)([ ]+[^\]]+)?\])/i", 'url_callback'),
- 'email' => array ('/(\[)?mailto:([\w|\d|\.|_|\-]+@[\w|\d|\.|_|-]+)(?(1)\])/i', 'email_callback'),
- 'wikilinks' => array ('/\[{2}([^\||^\]|^\[]+)(?:\|([^\||^\[|^\]]+))?\]{2}/', 'wikilink_callback'),
- 'emphasis' => array ("/(?<!')'('{1,4})(.*?)\\1'(?!')/", 'emphasis_callback'),
- // 'paragraph' => array ("/^(.*)\n\n+/Ums", 'paragraph_callback'),
- 'decode' => array ('/.*/sm', 'decode'),
- 'raw' => array ('/<pre>(.*)<\/pre>/Ums', 'raw_callback'),
- 'nowiki' => array ('/<nowiki>|<\/nowiki>/i', 'nowiki_callback')
- );
- /**
- * The constructor
- */
- public function WikiText( array $options ) {
- $this->options = array_merge( $this->options, $options );
- if ( !is_string( $this->options['wpwiki_page_title'] ) ) {
- SdvException::throwError( '\'wpwiki_page_title\' option must be initialized as current page name', __METHOD__, $this->options );
- }
- if ( !is_string( $this->options['wpwiki_wiki_site'] ) ) {
- SdvException::throwError( '\'wpwiki_wiki_site\' option must be initialized to SERVER_NAME', __METHOD__, $this->options );
- }
- // calculate rmap
- $this->rmap = array (
- '[' => md5('['), ']' => md5(']'),
- '<' => md5('<'), '>' => md5('>'),
- ':' => md5(':'), '/' => md5('/'),
- '=' => md5('='), '*' => md5('*'),
- '#' => md5('#'), ';' => md5(';'),
- "'" => md5("'"), '|' => md5('|'),
- '!' => md5('!'), '-' => md5('-'),
- "\n" => md5("\n"), ' ' => md5(' ')
- );
- }
- protected function get_permalink( $pagename ) {
- return '//' . $this->options['wpwiki_wiki_site'] . '/blog/' . Gl::hsc( $pagename );
- }
- /**
- * Translate MediaWiki markups
- */
- public function transform($ret) {
- // process rules
- foreach ( $this->regex as $rule ) {
- list( $pattern, $callback ) = $rule;
- $ret = preg_replace_callback($pattern, array($this, $callback), $ret);
- Dbg\log(__METHOD__.':callback',$callback);
- Dbg\log(__METHOD__.':ret',$ret);
- }
- $ret = $this->generate_toc().$ret;
- return $ret;
- }
- /**
- * Remove the leading char of a string
- */
- private function remove_leading_char($str) {
- return substr($str, 1);
- }
- /**
- * Generate table of content
- */
- private function generate_toc() {
- if (empty($this->headings))
- return;
- $ret = "<div>
- <table id='_toc' class='toc' summary='toc'>
- <tr><td id='_tochead'><span id='_toctitle'>".$this->options['wpwiki_toc_title']."</span> [<a href='javascript:toggle_toc()'><span id='_toctoggle'>Hide</span></a>]</td></tr>
- <tr><td><div id='_toclist' class='toclist'>";
- $min = $this->headings[0]['level'];
- $level = array();
- $prev = 0;
- foreach ($this->headings as $k => $h) {
- $depth = $h['level'] - $min + 1;
- $depth = $depth < 1 ? 1 : $depth;
- if ($depth > $prev) { // add one level
- $toclevel = count($level) + 1;
- $ret .= "<ul>\n<li class='toclevel-$toclevel'>";
- $open = true;
- array_push($level, 1);
- } else if ($depth == $prev || $depth >= count($level)) { // no change
- $toclevel = count($level);
- $ret .= "</li>\n<li class='toclevel-$toclevel'>";
- $level[count($level) - 1] = ++$level[count($level) - 1];
- } else {
- $toclevel = $depth;
- while(count($level) > $depth) {
- $ret .= "</li>\n</ul>";
- array_pop($level);
- }
- $level[count($level) - 1] = ++$level[count($level) - 1];
- $ret .= "</li>\n<li class='toclevel-$toclevel'>";
- }
- $prev = $depth;
- $ret .= "<a href='".$h['link']."'><span class='tocnumber'>".implode('.', $level)."</span> <span class='toctext'>".$h['text']."</span></a>";
- }
- // close left
- while(count($level) > 0) {
- $ret .= "</li></ul>";
- array_pop($level);
- }
- $ret .= "</div></td></tr></table></div>\n";
- return $ret;
- }
- /**
- * Convert illegal chars in an ID
- */
- private function sanitize_id($id) {
- $ret = str_replace(' ', '_', $id);
- $ret = str_replace('%', '.', rawurlencode($ret));
- $ret = $this->options['wpwiki_add_prefix_for_id'] ? '_'.$ret : $ret;
- return $ret;
- }
- /**
- * Return the open tag of a list
- */
- private function open_list($type) {
- $ret = "";
- switch ($type) {
- case self::LIST_TYPE_UL:
- $ret = "<ul>";
- break;
- case self::LIST_TYPE_OL:
- $ret = "<ol>";
- break;
- case self::LIST_TYPE_DEFINITION:
- case self::LIST_TYPE_INDENT:
- $ret = "<dl>";
- break;
- }
- return $ret;
- }
- /**
- * Return the closing tag of a list
- */
- private function close_list($type) {
- $ret = "";
- switch ($type) {
- case self::LIST_TYPE_UL:
- $ret = "</ul>";
- break;
- case self::LIST_TYPE_OL:
- $ret = "</ol>";
- break;
- case self::LIST_TYPE_DEFINITION:
- case self::LIST_TYPE_INDENT:
- $ret = "</dl>";
- break;
- }
- return $ret;
- }
- /**
- * Return the open tag for list item
- */
- private function open_list_item($type) {
- $ret = "";
- switch ($type) {
- case self::LIST_TYPE_UL:
- case self::LIST_TYPE_OL:
- $ret = "<li>";
- break;
- case self::LIST_TYPE_DEFINITION:
- $ret = "<dt>";
- break;
- case self::LIST_TYPE_INDENT:
- $ret = "<dd>";
- break;
- }
- return $ret;
- }
- /**
- * Return the closing tag for list item
- */
- private function close_list_item($type) {
- $ret = "";
- switch ($type) {
- case self::LIST_TYPE_UL:
- case self::LIST_TYPE_OL:
- $ret = "\n</li>";
- break;
- case self::LIST_TYPE_DEFINITION:
- $ret = "\n</dt>";
- break;
- case self::LIST_TYPE_INDENT:
- $ret = "\n</dd>";
- break;
- }
- return $ret;
- }
- /**
- * Check whether the type of two lists are the same
- */
- private function list_type_eq($t1, $t2) {
- $ret = false;
- switch ($t1.$t2) {
- case self::LIST_TYPE_UL.self::LIST_TYPE_UL:
- case self::LIST_TYPE_OL.self::LIST_TYPE_OL:
- case self::LIST_TYPE_DEFINITION.self::LIST_TYPE_DEFINITION:
- case self::LIST_TYPE_DEFINITION.self::LIST_TYPE_INDENT:
- case self::LIST_TYPE_INDENT.self::LIST_TYPE_INDENT:
- case self::LIST_TYPE_INDENT.self::LIST_TYPE_DEFINITION:
- $ret = true;
- break;
- }
- return $ret;
- }
- /**
- * Encode special chars in <pre>, <nowiki>, <a> and <img>
- */
- private function encode($matches) {
- if ($this->options['wpwiki_url_backward'])
- $pattern = '/<(pre|nowiki)>.*<\/\1>|<a\s+[^>]*>(.*)<\/a>|<img\s+[^>]*\/>/Ums';
- else
- $pattern = '/<(pre|nowiki)>.*<\/\1>/Ums';
- return preg_replace_callback($pattern, array($this, 'encode_callback'), $matches[0]);
- }
- /**
- * Decode special chars in <pre>, <nowiki>, <a> and <img>
- */
- private function decode($matches) {
- $pattern = '/'.md5('[').'(.*)'.md5(']').'/Ums';
- return preg_replace_callback($pattern, array($this, 'decode_callback'), $matches[0]);
- }
- /**
- * The callback function for encode
- */
- private function encode_callback($matches) {
- $ret = str_replace(array('%', '-'), array(md5('%'), md5('-')), $matches[0]);
- $ret = rawurlencode($ret);
- $ret = md5('[').$ret.md5(']');
- if ($matches[1] == 'pre')
- $ret = "\n".$ret;
- return $ret;
- }
- /**
- * The callback function for decode
- */
- private function decode_callback($matches) {
- $ret = rawurldecode($matches[1]);
- $ret = str_replace(array(md5('%'), md5('-')), array('%', '-'), $ret);
- return $ret;
- }
- /**
- * Replace all continuous newlines to one "\n"
- */
- private function newline_callback($matches) {
- return "\n";
- }
- /**
- * Convert HTML special cahrs in <pre>
- */
- function raw_callback($matches) {
- $text = htmlspecialchars(htmlspecialchars_decode($matches[1]));
- $ret = "<pre>$text</pre>";
- return $ret;
- }
- /**
- * The callback function for horizontal line
- */
- private function horizontal_callback($matches) {
- $ret = $this->options['wpwiki_hr_tag'];
- $ret || $ret = '<hr/>';
- return $ret;
- }
- protected function is_single() {
- return true;
- }
- protected function is_page() {
- return true;
- }
- /**
- * The callback function for headings
- */
- private function heading_callback($matches) {
- $level = strlen($matches[1]);
- $text = trim($matches[2]);
- if ($this->is_single() || $this->is_page()) {
- $suffix = '';
- if (array_key_exists($text, $this->id_suffix)) {
- $this->id_suffix[$text]++;
- $suffix = '_'.$this->id_suffix[$text];
- } else {
- $this->id_suffix[$text] = 1;
- }
- $id = $this->sanitize_id($text).$suffix;
- $ret = "<h{$level} id=\"$id\">{$text}</h{$level}>";
- $link = $this->append_pagenum_anchor($this->get_permalink( $this->options['wpwiki_page_title'] ), $this->pagenum, $id);
- $this->headings[] = array (
- 'level' => $level,
- 'link' => $link,
- 'text' => $text
- );
- }
- else
- $ret = "<h{$level}>{$text}</h{$level}>";
- return $ret;
- }
- /**
- * The callback function for lists
- */
- private function list_callback($matches) {
- $list = array();
- $prev = "";
- $ret = "";
- preg_match_all('/^((\*|#|\;|\:|\^)+)(.*?)$/ms', $matches[1], $list, PREG_SET_ORDER);
- foreach ($list as $val) {
- $whole = $val[0];
- $type = $val[1];
- $last_type = $val[2];
- $text = $val[3];
- if (substr($text, 0, 1) == " ")
- $text = substr($text, 1);
- $size = strlen($type);
- // same list level
- if ($type == $prev) {
- $ret .= $this->close_list_item($last_type).$this->open_list_item($last_type).$text;
- } else if ($type == $prev."^") {
- // a continuation of previous list item
- $ret .= "\n".$text;
- } else {
- // different list level
- $prev_size = strlen($prev);
- $min_size = min($size, $prev_size);
- // max common prefix
- $index = 0;
- while ($this->list_type_eq($type[$index], $prev[$index]) && $index < $min_size)
- $index++;
- // close previous non-common suffix
- while(count($this->list_stack) > $index) {
- $close_tag = array_pop($this->list_stack);
- $ret .= $this->close_list_item($close_tag).$this->close_list($close_tag);
- }
- // open current non-common suffix
- $open = false;
- for ($i = $index; $i < $size; $i++) {
- $open_tag = $type[$i];
- array_push($this->list_stack, $open_tag);
- $open = true;
- $ret .= $this->open_list($open_tag).$this->open_list_item($open_tag);
- }
- if (!$open) {
- $close_tag = $prev[$index - 1];
- $open_tag = $type[$index - 1];
- // exchange stack item
- array_pop($this->list_stack);
- array_push($this->list_stack, $open_tag);
- $ret .= $this->close_list_item($close_tag).$this->open_list_item($open_tag);
- }
- $ret .= $text;
- $prev = $type;
- }
- }
- // close remainder
- while (count($this->list_stack) > 0) {
- $close_tag = array_pop($this->list_stack);
- $ret .= $this->close_list_item($close_tag).$this->close_list($close_tag);
- }
- return "\n".$ret."\n";
- }
- /**
- * The callback function for tables
- */
- private function table_callback($matches) {
- $whole = $matches[0];
- $attrs = trim($matches[1]);
- $rows = $matches[3];
- if (array_key_exists(4, $matches)) {
- $this->table_level += 3;
- $rows = preg_replace_callback($this->regex['table'][0], array($this, $this->regex['table'][1]), $rows);
- $this->table_level -= 3;
- }
- $rregex = '#(?:^(\||!)-|\G)(.*?)^(.*?)(?=(?:\|-|!-|\z))#msi';
- $rows = preg_replace_callback($rregex, array($this, 'rows_callback'), $rows);
- $start = $attrs == "" ? "<table>" : "<table {$attrs}>";
- $end = "</table>";
- // $ret =
- // str_repeat("\t", $this->table_level).$start."\n".
- // $rows.
- // str_repeat("\t", $this->table_level).$end."\n";
- $ret = $start."\n".$rows.$end."\n";
- return $ret;
- }
- /**
- * The callback function for rows in tables
- */
- private function rows_callback($matches) {
- $whole = $matches[0];
- $attrs = trim($matches[2]);
- $cells = $matches[3];
- if ($whole == "")
- return $whole;
- $cregex = '#((?:\||!|\|\||!!|\G))(?:([^|\n]*?)\|(?!\|))?(.+?)(?=\||!|\|\||!!|\z)#msi';
- $cells = preg_replace_callback($cregex, array(&$this, 'cells_callback'), $cells);
- $start = $attrs == "" ? "<tr>" : "<tr {$attrs}>";
- $end = "</tr>";
- // $ret =
- // str_repeat("\t", $this->table_level + 1).$start."\n".
- // $cells.
- // str_repeat("\t", $this->table_level + 1).$end."\n";
- $ret = $start."\n".$cells.$end."\n";
- return $ret;
- }
- /**
- * The callback function for cols in rows
- */
- private function cells_callback($matches) {
- $whole = $matches[0];
- $type = $matches[1];
- $attrs = trim($matches[2]);
- $cell = trim($matches[3]);
- if($whole == "")
- return $whole;
- if ($type == '!') {
- $start = $attrs == "" ? "<th>" : "<th {$attrs}>";
- $end = "</th>";
- } else {
- $start = $attrs == "" ? "<td>" : "<td {$attrs}>";
- $end = "</td>";
- }
- // $ret =
- // str_repeat("\t", $this->table_level + 2).$start."\n".
- // str_repeat("\t", $this->table_level + 3).$cell."\n".
- // str_repeat("\t", $this->table_level + 2).$end."\n";
- $ret = $start."\n".$cell."\n".$end."\n";
- return $ret;
- }
- /**
- * The callback function for external links
- */
- private function url_callback($matches) {
- $whole = $matches[0];
- $explicit = $matches[1]; // a left "["
- $url = $matches[2]; // url
- $protocol = $matches[3]; // protocol, eq: http://
- $desc = $matches[4]; // url description
- if (!isset($desc)) {
- if($explicit)
- $desc = "[".$this->auto_number++."]";
- else
- $desc = $url;
- }
- $desc = trim($desc);
- $ret = "<a href=\"{$url}\">{$desc}</a>";
- return $ret;
- }
- /**
- * The callback function for email links
- */
- private function email_callback($matches) {
- $whole = $matches[0];
- $addr = $matches[2];
- $ret = "<a href=\"mailto:{$addr}\">$addr</a>";
- return $ret;
- }
- /**
- * Split anchor into the anchor text and the index of the anchor
- */
- private function split_anchor($anchor) {
- $index = strrpos($anchor, '_');
- // get the anchor text and anchor index
- if ($index != false && $index < strlen($anchor) && is_numeric(substr($anchor, $index + 1))) {
- $anchor_txt = substr($anchor, 0, $index);
- $anchor_num = substr($anchor, $index + 1);
- } else {
- $anchor_txt = $anchor;
- $anchor_num = 1;
- }
- return array ($anchor_txt, $anchor_num);
- }
- /**
- * Append page number and anchor to an url to form a valid url
- */
- private function append_pagenum_anchor($url, $pagenum = 0, $anchor = '') {
- $anchor = $anchor ? '#'.$anchor : $anchor;
- if ($pagenum > 1)
- $url .= (strpos($url, '?') ? '&' : '?').'page='.$pagenum;
- $url .= $anchor;
- return $url;
- }
- private function is_toc_id($id) {
- switch ($id) {
- case "_toc":
- case "_tochead":
- case "_toctoggle":
- case "_toclist":
- $ret = true; break;
- default:
- $ret = false; break;
- }
- return $ret;
- }
- /**
- * The callback function for wikilinks
- */
- private function wikilink_callback($matches) {
- // $link: $title#$anchor
- // $title: the post title
- // $anchor: the user defined anchor or some heading
- // $desc: the displayed text
- $while = $matches[0];
- $link = $matches[1];
- $desc = $matches[2];
- list($title, $anchor) = split('#', $link);
- // set desc if desc is empty
- $desc = $desc ? $desc : $link;
- // compute url
- if ($title) {
- $url = $this->options['wpwiki_wiki_site'].$title.($anchor ? '#'.$anchor : $anchor);
- }
- $ret = "<a href=\"{$url}\">{$desc}</a>";
- return $ret;
- }
- /**
- * The callback function for bold and italic
- */
- private function emphasis_callback($matches) {
- $type = $matches[1];
- $text = $matches[2];
- switch (strlen($type)) {
- case 1:
- $ret = "<em>{$text}</em>";
- break;
- case 2:
- $ret = "<strong>{$text}</strong>";
- break;
- case 4:
- $ret = "<em><strong>{$text}</strong></em>";
- break;
- case 3:
- $delim = "'";
- $ret = "<strong>{$delim}{$text}{$delim}</strong>";
- break;
- }
- return $ret;
- }
- private function paragraph_callback($matches) {
- $text = trim($matches[1]);
- if ($text == "")
- return "";
- // skip <pre></pre>
- $regex = '/'.$this->rmap['<'].'(pre|nowiki)'.$this->rmap['>'].'(.*)'.$this->rmap['<'].$this->rmap['/'].'(\1)'.$this->rmap['>'].'/s';
- if (preg_match($regex, $text))
- return "$text\n";
- return "<p>\n$text\n</p>\n";;
- }
- /**
- * The callback function for nowiki. Simply return nothing
- */
- private function nowiki_callback($matches) {
- return "";
- }
- /**
- * The callback function for preformatted text
- */
- private function preformatted_callback($matches) {
- $ret = explode("\n", $matches[0]);
- $ret = array_map(array($this, 'remove_leading_char'), $ret);
- $ret = implode("\n", $ret);
- return $this->encode(array("\n<pre>$ret\n</pre>"));
- }
- }