/tools/wikify.py
Python | 944 lines | 785 code | 64 blank | 95 comment | 54 complexity | 590b7bd86fa179c785505b756205dbeb MD5 | raw file
Possible License(s): GPL-3.0
- #!/usr/bin/python
- #
- # wikify.py - Convert from wikitext to HTML
- # Based on large portions of JeremyRuston's TiddlyWiki JS Wikifier
- # Changed to GoogleCode wiki syntax, python by Michael Crawford <mike@dataunity.com>
- # Fixed space errors, allow # in wikiwords and remove external
- # links by Richard Quirk <richard.quirk at gmail.com>
- """ Convert wikitext to HTML """
- # Jeremy's license:
- # Copyright (c) UnaMesa Association 2004-2007
- #
- # Redistribution and use in source and binary forms, with or without modification,
- # are permitted provided that the following conditions are met:
- #
- # Redistributions of source code must retain the above copyright notice, this
- # list of conditions and the following disclaimer.
- #
- # Redistributions in binary form must reproduce the above copyright notice, this
- # list of conditions and the following disclaimer in the documentation and/or other
- # materials provided with the distribution.
- #
- # Neither the name of the UnaMesa Association nor the names of its contributors may be
- # used to endorse or promote products derived from this software without specific
- # prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- # POSSIBILITY OF SUCH DAMAGE.
- #
- # My license:
- # Copyright (c) Data Unity 2007
- #
- # Redistribution and use in source and binary forms, with or without modification,
- # are permitted provided that the following conditions are met:
- #
- # Redistributions of source code must retain the above copyright notice, this
- # list of conditions and the following disclaimer.
- #
- # Redistributions in binary form must reproduce the above copyright notice, this
- # list of conditions and the following disclaimer in the documentation and/or other
- # materials provided with the distribution.
- #
- # Neither the name of the Data Unity nor the names of its contributors may be
- # used to endorse or promote products derived from this software without
- # specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- # POSSIBILITY OF SUCH DAMAGE.
- import re, os, os.path, htmlentitydefs, urllib
- class _HTML:
- """ An HTML node factory factory. """
- class Node:
- """ An HTML element. """
- def __init__(self, parent, tagname, text="", attribs={}, empty=False, **kwargs):
- self.tagname = tagname
- self.attribs = dict(attribs)
- self.children = list()
- self.empty = empty
- if text != "":
- self.appendText(text)
- if parent is not None:
- parent.children.append(self)
- self.parent = parent
- def appendText(self, text):
- if text == "": return
- _HTML.Text(self, text)
- def __str__(self):
- attrs = " ".join([ '%s="%s"' % i for i in self.attribs.iteritems() ])
- if attrs: attrs = " " + attrs
- if self.empty:
- return "<%s%s/>" % (self.tagname, attrs)
- children = "".join([str(c) for c in self.children])
- return "<%s%s>%s</%s>" % (self.tagname, attrs, children, self.tagname)
- def isInside(self, tagname):
- k = self
- while k is not None:
- if k.tagname == tagname:
- return True
- k = k.parent
- return False
- class Text:
- """ Simple text node. """
- entities = [ (k,v)
- for k,v in htmlentitydefs.entitydefs.iteritems()
- if k != "amp" and k[0] != "#" ]
- def __init__(self, parent, text=""):
- self.text = self._clean(text)
- if parent is not None:
- parent.children.append(self)
- def _clean(self, text):
- text = text.replace("&", "&")
- for k,v in self.entities:
- text = text.replace(v, "&%s;" % k)
- return text
- def __str__(self):
- return self.text
- def __getattr__(self, attr):
- """ Return an element constructor using the attribute as the tagname """
- def factory(parent=None, **kwargs):
- return self.Node(parent, attr, **kwargs)
- return factory
- HTML = _HTML()
- URLSTR = r"(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)"
- URL = re.compile(URLSTR, re.M)
- IMGURLSTR = r".+((\.[Pp][Nn][Gg])|(\.[Gg][Ii][Ff])|(\.[Jj][Pp][Ee]?[Gg]))"
- IMGURL = re.compile(IMGURLSTR, re.M)
- YOUTUBESTR = r"http://www.youtube.com/watch\?v=([A-Za-z0-9_-]+)"
- YOUTUBEURL = re.compile(YOUTUBESTR, re.M)
- YOUTUBEREPL = r'<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&rel=1"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/hQPHf_8J8Eg&rel=1" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>'
- VIDEOURLSTR = r".+((\.[Aa][Vv][Ii])|(\.[Mm][Oo][Vv])|(\.[Mm][Pp][Ee]?[Gg]))"
- VIDEOURL = re.compile(VIDEOURLSTR, re.M)
- VIDEOREPL = r'<embed src = "%s" width="400" height="350" hidden=false autostart=true loop=1>'
- CODEURLSTR = r"http://([^\.]+).googlecode.com/svn/trunk/([^#]+)#((?:(?:(?:[\d]+)?\-)?[\d]+)|(?:[\d]+\-?))((?:\:(?:[\:]|[^\W])+))?"
- CODEURL = re.compile(CODEURLSTR, re.M)
- CODEREPL = r'<a href="%(url)s">svn://%(site)s/trunk/%(file)s</a><pre name="code" class="%(class)s">%(lines)s</pre>'
- def GoogleCode_ReadSVNFile(wikifier, domain, path, start, end):
- """ Try to read a file from subversion for inclusion in the wiki. """
- gcurl = "http://%s.googlecode.com/svn/trunk/%s" % (domain,path)
- fdata = urllib.urlopen(gcurl).readlines()
- return gcurl, fdata[start-1:end]
- def GoogleCode_IsExternalLink(wikifier, link):
- """ See if the link points outside of the wiki. """
- if GoogleCode_Exists(wikifier, link):
- return False;
- if URL.match(link):
- return True
- if '.' in link or '\\' in link or '/' in link or '#' in link:
- return True
- return False
- def GoogleCode_Exists(wikifier, wikipage):
- """ See if a wiki page exists inside this wiki. """
- path = os.path.join(wikifier.srcdir, "%s.wiki" % wikipage)
- if os.path.exists(path):
- return True
- return False
- def GoogleCode_Heading(wikifier, termRegExp=None, **kwargs):
- termMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
- if termMatch is None: return
- if (len(wikifier.output.children) and
- "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
- wikifier.output.children.pop(-1)
- if (len(wikifier.output.children) and
- "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
- wikifier.output.children.pop(-1)
- output = HTML.Node(wikifier.output, "h%i" % wikifier.matchLength)
- wikifier.outputText(output, wikifier.nextMatch, termMatch.start())
- wikifier.nextMatch = termMatch.end()
- def GoogleCode_SimpleElement(wikifier, termRegExp=None, tagName=None, **kwargs):
- if wikifier.output.isInside(tagName):
- wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch)
- return
- elif wikifier.source[wikifier.nextMatch-1] == "_":
- wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch-1)
- if termRegExp.search(wikifier.source, wikifier.nextMatch) is None: return
- output = HTML.Node(wikifier.output, tagName, **kwargs)
- wikifier.subWikifyTerm(output, termRegExp)
- #if wikifier.source[wikifer.nextMatch-2] == "_":
- # wikifier.nextMatch -= 1
- def GoogleCode_Blockquote(wikifier, termRegExp=None, **kwargs):
- sibs = wikifier.output.children
- if len(sibs) and getattr(sibs[-1], 'tagname', None) == "blockquote":
- wikifier.subWikifyTerm(sibs[-1], termRegExp)
- else:
- output = HTML.blockquote(wikifier.output, **kwargs)
- wikifier.subWikifyTerm(output, termRegExp)
- def GoogleCode_Codeblock(wikifier, tagName=None, termRegExp=None, initRegExp=None, **kwargs):
- if 'attribs' not in kwargs:
- kwargs['attribs'] = {}
- kwargs['attribs']['name'] = 'code'
- if 'class' not in kwargs['attribs']:
- kwargs['attribs']['class'] = wikifier.defaultHiLang.lower()
- else:
- kwargs['attribs']['class'] += " " + wikifier.defaultHiLang.lower()
- output = HTML.Node(wikifier.output, tagName, **kwargs)
- tcount = 1
- matchStart = wikifier.nextMatch
- # Find the matching terminator
- while tcount > 0:
- nextTermMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
- nextInitMatch = initRegExp.search(wikifier.source, wikifier.nextMatch)
- if not nextTermMatch:
- # No terminator. Syntax error, just ignore it.
- matchEnd = matchStart
- tcount = 0
- elif not nextInitMatch or nextTermMatch.start() <= nextInitMatch.start():
- # Terminator goes first.
- nextMatch = nextTermMatch
- tcount -= 1
- if tcount > 0:
- matchEnd = nextMatch.end()
- else:
- matchEnd = nextMatch.start()
- else:
- nextMatch = nextInitMatch
- tcount += 1
- matchEnd = nextMatch.end()
- wikifier.nextMatch = nextMatch.end()
- # Copy the content
- wikifier.outputText(output, matchStart, matchEnd)
- if "\n" not in wikifier.source[matchStart:matchEnd]:
- output.tagname = "code"
- def GoogleCode_WikiWord(wikifier, **kwargs):
- if wikifier.matchStart > 0:
- # Make sure we're at the start of a word?
- preRegExp = re.compile("[!A-Za-z0-9]", re.M)
- preMatch = preRegExp.search(wikifier.source, wikifier.matchStart-1)
- if (preMatch is not None and
- preMatch.start() == wikifier.matchStart-1):
- wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
- return
- if wikifier.source[wikifier.matchStart] == "!":
- wikifier.outputText(wikifier.output,wikifier.matchStart+1,wikifier.nextMatch)
- elif GoogleCode_Exists(wikifier, wikifier.matchText):
- # Full link, everybody sees it
- HTML.a(wikifier.output, text=wikifier.matchText, attribs={"href": wikifier.matchText + wikifier.suffix})
- elif wikifier.autolink:
- # Partial link - only authorized users
- wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
- link = HTML.a(wikifier.output, text="?", attribs={"href": wikifier.matchText + wikifier.suffix})
- else:
- wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
- def GoogleCode_LineBreak(wikifier, **kwargs):
- sibs = wikifier.output.children
- if wikifier.multibreak:
- HTML.br(wikifier.output, **kwargs)
- elif len(sibs) and (not hasattr(sibs[-1], 'tagname') or
- sibs[-1].tagname == "img"):
- # Only after an inline or header block.
- HTML.br(wikifier.output, **kwargs)
- HTML.br(wikifier.output, **kwargs)
- def GoogleCode_PrettyLink(wikifier, lookaheadRegExp=None, **kwargs):
- lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.matchStart)
- if lookMatch and lookMatch.start() == wikifier.matchStart:
- text = lookMatch.group(1)
- if lookMatch.group(2):
- # Pretty bracketted link
- link = text
- text = lookMatch.group(2)
- if GoogleCode_IsExternalLink(wikifier, link):
- # External link
- attribs={"href":link, "target": "_blank" }
- else:
- # Internal link
- attribs={"href":link + wikifier.suffix}
- e = HTML.a(wikifier.output, attribs=attribs)
- if URL.match(text):
- HTML.img(e, attribs={'src':text,
- 'border': '0'})
- HTML.br(wikifier.output)
- else:
- HTML.Text(e, text)
- else:
- if GoogleCode_IsExternalLink(wikifier, text):
- # External link
- attribs={"href":link, "target": "_blank" }
- else:
- # Internal link
- attribs={"href":text + wikifier.suffix}
- # Simple bracketted link
- e = HTML.a(wikifier.output, text=text, attribs=attribs)
- wikifier.nextMatch = lookMatch.end()
- def GoogleCode_UrlLink(wikifier, **kwargs):
- attribs = {"href": wikifier.matchText}
- if GoogleCode_IsExternalLink(wikifier, wikifier.matchText):
- attribs["target"] = "_blank"
- if IMGURL.match(wikifier.matchText):
- HTML.img(wikifier.output, attribs={'src':wikifier.matchText})
- HTML.br(wikifier.output)
- elif YOUTUBEURL.match(wikifier.matchText):
- match = YOUTUBEURL.match(wikifier.matchText)
- # Raw html ;)
- wikifier.output.children.append(YOUTUBEREPL % match.group(1))
- elif VIDEOURL.match(wikifier.matchText):
- # Raw html ;)
- wikifier.output.children.append(VIDEOREPL % wikifier.matchText)
- elif CODEURL.match(wikifier.matchText):
- # Raw html ;)
- # http://([^\.]+).googlecode.com/svn/trunk/([^\#]+)#([^\:]+)(?:\:([^\W]+))?
- codeMatch = CODEURL.match(wikifier.matchText)
- parts = { "class": (codeMatch.group(4) or "").lower()[1:],
- "file": codeMatch.group(2),
- "site": codeMatch.group(1)}
- lines = codeMatch.group(3)
- if '-' in lines:
- lines = lines.split('-')
- lines[0] = int(lines[0])
- lines[1] = int(lines[1])
- else:
- lines = [int(lines), int(lines)]
- parts['class'] += ":firstline[%i]" % lines[0]
- url, parts['lines'] = GoogleCode_ReadSVNFile(wikifier, parts['site'],
- parts['file'], *lines)
- parts['url'] = url
- parts['lines'] = "".join(parts['lines'])
- wikifier.output.children.append(CODEREPL % parts)
- else:
- HTML.a(wikifier.output, text=wikifier.matchText, attribs=attribs)
- def GoogleCode_Table(wikifier, sepRegExp=None, termRegExp=None, **kwargs):
- sibs = wikifier.output.children
- if len(sibs) and getattr(sibs[-1], 'tagname', None) == "table":
- table = sibs[-1]
- else:
- table = HTML.table(wikifier.output)
- row = HTML.tr(table)
- termMatch = termRegExp.search(wikifier.source, wikifier.matchStart)
- if termMatch is None:
- termEnd = termStart = len(wikifier.source)
- else:
- termStart, termEnd = termMatch.start(), termMatch.end()
- # Skip over the leading separator
- sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
- wikifier.nextMatch = wikifier.matchStart = sepMatch.end()
- sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
- attribs = { "style": "border: 1px solid #aaa; padding: 5px;" }
- while sepMatch and sepMatch.end() <= termStart:
- cell = HTML.td(row, attribs=attribs)
- wikifier.subWikifyTerm(cell, sepRegExp)
- wikifier.nextMatch = sepMatch.end()
- sepMatch = sepRegExp.search(wikifier.source, wikifier.nextMatch)
- wikifier.nextMatch = termEnd
- def GoogleCode_List(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
- currLevel = 0
- currType = None
- stack = [wikifier.output]
- indents = [currLevel]
- wikifier.nextMatch = wikifier.matchStart
- lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
- while lookMatch and lookMatch.start() == wikifier.nextMatch:
- # See what kind of list it is
- if lookMatch.group(1):
- listType = "ul"
- itemType = "li"
- elif lookMatch.group(2):
- listType = "ol"
- itemType = "li"
- listLevel = len(lookMatch.group(0))
- wikifier.nextMatch += len(lookMatch.group(0))
- # Check for any changes in list type or indentation
- if listLevel > currLevel:
- # Indent further
- indents.append(listLevel)
- if currLevel == 0:
- target = stack[-1]
- else:
- target = stack[-1].children[-1]
- stack.append(HTML.Node(target, listType))
- elif listLevel < currLevel:
- # Indent less
- while indents[-1] > listLevel:
- stack.pop(-1)
- indents.pop(-1)
- elif listLevel == currLevel and listType != currType:
- # Same level, different kind of list
- stack.pop(-1)
- stack.append(HTML.Node(stack[-1].children[-1], listType))
- currLevel = listLevel
- currType = listType
- # Output the item
- output = HTML.Node(stack[-1],itemType)
- wikifier.subWikifyTerm(output,termRegExp)
- # Roll again
- lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
- GoogleCodeWikiFormat = [
- {
- "name": "tablerow",
- "match": r"^(?:\|\|.+\|\|)",
- "termRegExp": re.compile(r"(\n)", re.M),
- "sepRegExp": re.compile(r"(\|\|)", re.M),
- "handler": GoogleCode_Table
- },
- { "name": "heading",
- "match": r"^={1,6}",
- "termRegExp": re.compile(r"([=]+)", re.M),
- "handler": GoogleCode_Heading
- },
- { "name": "list",
- "match": r"^(?:[ ]+)(?:[\*#])",
- "lookaheadRegExp": re.compile(r"^(?:[ ]+)(?:(\*)|(#))",re.M),
- "termRegExp": re.compile(r"(\n)", re.M),
- "handler": GoogleCode_List
- },
- { "name": "blockquote",
- "match": r"^(?:[ ]+)",
- "termRegExp": re.compile(r"(\n)", re.M),
- "handler": GoogleCode_Blockquote,
- "tagName": "blockquote"
- },
- { "name": "codeword",
- "match": r"\`",
- "initRegExp": re.compile(r"(\`)", re.M),
- "termRegExp": re.compile(r"(\`)", re.M),
- "handler": GoogleCode_Codeblock,
- "tagName": "code"
- },
- { "name": "codeblock",
- "match": r"\{\{\{",
- "initRegExp": re.compile(r"(\{\{\{)", re.M),
- "termRegExp": re.compile(r"(\}\}\})", re.M),
- "handler": GoogleCode_Codeblock,
- "tagName": "pre",
- "attribs": { "class": "codeblock" }
- },
- { "name": "bold",
- "match": r"[\*]",
- "termRegExp": re.compile(r"([\*])", re.M),
- "handler": GoogleCode_SimpleElement,
- "tagName": "b"
- },
- { "name": "italic",
- "match": r"(?:[^\w\b]|^)[\_]",
- "termRegExp": re.compile(r"([\_])[^\w\b]", re.M),
- "handler": GoogleCode_SimpleElement,
- "tagName": "i"
- },
- { "name": "strike",
- "match": r"\~\~",
- "termRegExp": re.compile(r"(\~\~)", re.M),
- "handler": GoogleCode_SimpleElement,
- "tagName": "strike"
- },
- { "name": "superscript",
- "match": r"\^",
- "termRegExp": re.compile(r"(\^)", re.M),
- "handler": GoogleCode_SimpleElement,
- "tagName": "sup"
- },
- { "name": "subscript",
- "match": r",,",
- "termRegExp": re.compile(r"(,,)", re.M),
- "handler": GoogleCode_SimpleElement,
- "tagName": "sub"
- },
- { "name": "prettyLink",
- "match": r"\[(?:(?:[A-Za-z][A-Za-z0-9#\_\-]+)|(?:(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)))(?: .*?)?\]",
- "lookaheadRegExp": re.compile(r'\[(.*?)(?: (.*?))?\]', re.M),
- "handler": GoogleCode_PrettyLink
- },
- { "name": "wikiword",
- "match": r"(?:\!?(?:[A-Z]+[a-z]+[A-Z][A-Za-z]*)|(?:[A-Z]{2,}[a-z]+))",
- "handler": GoogleCode_WikiWord
- },
- { "name": "urlLink",
- "match": URLSTR,
- "handler": GoogleCode_UrlLink
- },
- { "name": "linebreak",
- "match": r"\n\n",
- "handler": GoogleCode_LineBreak,
- "empty": True
- },
- ]
- class Wikifier:
- def __init__(self, formatters, autolink=False, srcdir=os.getcwd(),
- multibreak=False, tabwidth=8, suffix=".html",
- hiLang="Python"):
- # Create the master regex
- forms = [ "(%s)" % r['match'] for r in formatters ]
- self.formatterRegExp = re.compile("|".join(forms), re.M)
- # Save the individual format handlers
- self.formatters = formatters
- self.autolink = autolink
- self.srcdir = srcdir
- self.multibreak = multibreak and True or False
- self.tabwidth = tabwidth
- self.suffix = suffix
- self.defaultHiLang = hiLang
- def _clean(self, text):
- text = text.replace("\r\n", "\n")
- # Out, out, damned tabs
- text = text.replace("\t", " " * self.tabwidth)
- if not self.multibreak:
- # Remove redundant line breaks
- tlen = len(text) + 1
- while tlen > len(text):
- tlen = len(text)
- text = text.replace("\n\n\n", "\n\n")
- while text.startswith("#"):
- # Process any wiki-headers
- line, text = text.split("\n", 1)
- self._header(line)
- return text
- def _header(self, line):
- tagname, content = line.split(" ", 1)
- if tagname == "#summary":
- self.summary = content
- elif tagname == "#labels":
- self.labels = tuple(content.split(","))
- def wikify(self, source, labels=None, summary=None):
- self.labels = labels
- self.summary = summary
- # Clean up the content
- self.source = self._clean(source)
- self.nextMatch = 0
- # Do it
- self.output = HTML.div(None)
- self.subWikifyUnterm()
- return "".join([str(c) for c in self.output.children])
- def findMatch(self, source, start):
- return self.formatterRegExp.search(source, start)
- def subWikifyUnterm(self, output=None):
- oldOutput = self.output
- if output is not None:
- self.output = output
- match = self.findMatch(self.source, self.nextMatch)
- while match:
- # Output any text before the match
- if match.start() > self.nextMatch:
- self.outputText(self.output, self.nextMatch, match.start())
- # Set the match parameters for the handler
- self.matchStart = match.start()
- self.matchLength = len(match.group(0))
- self.matchText = match.group(0)
- self.nextMatch = match.end()
- # Figure out which sub-group matched (zero-indexed)
- t,submatch = [ (t,s) for t, s in enumerate(match.groups()) if s ][0]
- # Handle it
- self.formatters[t]['handler'](self, **self.formatters[t])
- # Go back for more matches
- match = self.findMatch(self.source, self.nextMatch)
- if self.nextMatch < len(self.source):
- self.outputText(self.output, self.nextMatch, len(self.source))
- self.nextMatch = len(self.source)
- # Restore the destination node
- self.output = oldOutput
- def subWikifyTerm(self, output, termRegExp):
- oldOutput = self.output
- if output is not None:
- self.output = output
- # Get the first matches for the formatter and terminator RegExps
- termMatch = termRegExp.search(self.source, self.nextMatch)
- if termMatch:
- match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
- else:
- match = self.findMatch(self.source, self.nextMatch)
- while termMatch or match:
- # If the terminator comes before the next formatter match, we're done
- if termMatch and (not match or termMatch.start() <= match.start()):
- if termMatch.start() > self.nextMatch:
- self.outputText(self.output,self.nextMatch,termMatch.start())
- self.matchText = termMatch.group(1)
- self.matchLength = len(self.matchText)
- self.matchStart = termMatch.start()
- self.nextMatch = self.matchStart + self.matchLength
- self.output = oldOutput
- return
- # Output any text before the match
- if match.start() > self.nextMatch:
- self.outputText(self.output, self.nextMatch, match.start())
- # Set the match parameters for the handler
- self.matchStart = match.start()
- self.matchLength = len(match.group(0))
- self.matchText = match.group(0)
- self.nextMatch = match.end()
- # Figure out which sub-group matched (zero-indexed)
- t,submatch = [ (t,s) for t, s in enumerate(match.groups()) if s ][0]
- # Handle it
- self.formatters[t]['handler'](self, **self.formatters[t])
- termMatch = termRegExp.search(self.source, self.nextMatch)
- if termMatch:
- match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
- else:
- match = self.findMatch(self.source, self.nextMatch)
- if self.nextMatch < len(self.source):
- self.outputText(self.output, self.nextMatch,len(self.source))
- self.nextMatch = len(self.source)
- self.output = oldOutput
- def outputText(self, output, startPos, endPos):
- HTML.Text(output, self.source[startPos:endPos])
- DEFAULT_TEMPLATE = '''
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
- <html>
- <body>
- <div id="page">
- <div id='header'>
- <br style="clear: both" /><br/>
- </div>
- <div id="pagecontent">
- <div class="index">
- <!-- This is a (PRE) block. Make sure it's left aligned or your toc title will be off. -->
- %(toc)s
- </div>
- <h1>%(title)s</h1>
- <div class="summary">
- %(summary)s
- </div>
- <div class="narrow">
- %(wiki)s
- </div>
- </div>
- </div>
- </body>
- </html>
- '''
- def wikify(pages, options=None):
- # See options definition below.
- # Pass any object with those (potential) attributes
- srcdir = getattr(options, 'srcdir', os.getcwd())
- destdir = getattr(options, 'destdir', None)
- # Find all requested files
- onlyStale = False
- if getattr(options, 'all', False):
- pages = [ k for k in os.listdir(srcdir)
- if k.endswith(".wiki") ]
- onlyStale = True
- if destdir is None:
- destdir = os.getcwd()
- # Create the magic 8-ball
- w = Wikifier(GoogleCodeWikiFormat,
- autolink=getattr(options, 'autolink', False),
- tabwidth=getattr(options, 'tabwidth', 8),
- multibreak=getattr(options, 'multibreak', False),
- srcdir=srcdir,
- suffix=".html")
- rets = []
- for wikiname in pages:
- # Clean up the page name
- if wikiname.endswith(".wiki"):
- wikiname = wikiname[:-5]
- wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
- if onlyStale:
- # See if the output is fresh, and if so, skip it
- wikidestname = os.path.join(destdir, "%s.html" % wikiname)
- try:
- sstat = os.stat(wikifilename)
- except:
- continue
- try:
- dstat = os.stat(wikidestname)
- except:
- pass
- else:
- if dstat.st_mtime > sstat.st_mtime:
- continue
- # Load the wiki content
- wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
- wikisrc = file(wikifilename).read()
- # Ask a question
- wikified = w.wikify(wikisrc)
- reFind = re.compile(r'<h(\d)>\s*([^\<]*[\S])\s*</h\d>')
- strRepl = r'<h\g<1>><a name="\g<2>">\g<2></a></h\g<1>>'
- # Number the sections
- if getattr(options, 'number', True):
- sectstack = []
- matches = []
- curLevel = 0
- match = reFind.search(wikified)
- while match is not None:
- level = int(match.group(1))
- while level > len(sectstack):
- sectstack.append(1)
- while len(sectstack) > level:
- sectstack.pop(-1)
- if curLevel >= level:
- sectstack[-1] += 1
- curLevel = len(sectstack)
- sectnum = ".".join([str(n) for n in sectstack]) + "."
- matches.append((sectnum, match))
- match = reFind.search(wikified, match.end())
- matches.reverse()
- for sectnum, match in matches:
- wikified = wikified[:match.start()+4] + sectnum + " " + wikified[match.start()+4:]
- # Generate the TOC
- if getattr(options, 'toc', True):
- matches = [ '<b>%s: Contents</b>' % wikiname ]
- for match in reFind.findall(wikified):
- if int(match[0]) > getattr(options, 'levels', 3): continue
- indent = " " * ((int(match[0])) * 2)
- href = "#" + match[1]
- anchor = '%s<a href="%s">%s</a>' % (indent, href, match[1])
- matches.append(anchor)
- toc = "<br>".join(matches)
- else:
- toc = "" #-e -d /home/adam/src/CSpaceWiki/
- # Generate the body links
- if getattr(options, 'links', True):
- wikified = reFind.sub(strRepl, wikified)
- # Find a summary
- summary = ""
- if w.summary is not None:
- summary = w.summary
- if not getattr(options, 'raw', False):
- # Fill the template
- wikified = options.template % {
- "toc": toc,
- "title": wikiname,
- "wiki": wikified,
- "summary": summary }
- # Save it or write it
- if destdir is not None:
- outputname = os.path.join(destdir, "%s.html" % wikiname)
- file(outputname,"w").write(wikified)
- mainpage = getattr(options, 'mainpage', 'MainPage')
- if wikiname == mainpage:
- rets.append((wikiname, outputname))
- outputname = os.path.join(destdir, "index.html")
- file(outputname,"w").write(wikified)
- wikified = outputname
- rets.append((wikiname, wikified))
- return rets
- if __name__ == "__main__":
- from optparse import OptionParser
- import sys
- parser = OptionParser()
- # Output format options
- parser.add_option("-t", "--template", dest="template",
- help="use page template to wrap wiki output", metavar="TPLTFILE")
- parser.add_option("-n", "--number", dest="number",
- help="number the headings in the body and table of contents")
- parser.add_option("-l", "--levels", dest="levels", type="int",
- help="create toc to depth LEVELS", metavar="LEVELS")
- parser.add_option("-c", "--skiptoc", dest="toc", action="store_false",
- help="leave toc out, even if template has slot")
- parser.add_option("-u", "--unlink", dest="links", action="store_false",
- help="don't create named anchors for toc links")
- parser.add_option("-a", "--autolink", dest="autolink", action="store_false",
- help="autolink wiki words that don't exist")
- parser.add_option("-w", "--tabwidth", dest="tabwidth", type="int",
- help="replace tabs by WIDTH spaces", metavar="WIDTH")
- parser.add_option("-m", "--multibreak", dest="multibreak", action="store_true",
- help="don't collapse multiple line breaks")
- parser.add_option("-r", "--raw", dest="raw", action="store_true",
- help="raw wiki translation -- no wrapping, no toc, no links")
- parser.add_option("-p", "--mainpage", dest="mainpage", metavar="PAGENAME",
- help="set main page to PAGENAME")
- # Batch / Location options
- parser.add_option("-s", "--srcdir", dest="srcdir",
- help="wiki format sources in SRCDIR", metavar="SRCDIR")
- parser.add_option("-d", "--destdir", dest="destdir",
- help="write html output into DESTDIR", metavar="DESTDIR")
- parser.add_option("-e", "--stale", dest="all", action="store_true",
- help="convert all wiki files that are stale or missing from DESTDIR")
- parser.set_default('toc', True)
- parser.set_default('links', True)
- parser.set_default('template', None)
- parser.set_default('number', False)
- parser.set_default('levels', 3)
- parser.set_default('tabwidth', 8)
- parser.set_default('multibreak', False)
- parser.set_default('mainpage', "MainPage") # Identity of index
- parser.set_default('srcdir', os.getcwd())
- parser.set_default('destdir', None)
- parser.set_default('all', False)
- # Parse the command line
- (options, args) = parser.parse_args()
- if options.template is None:
- options.template = DEFAULT_TEMPLATE
- elif os.path.exists(options.template):
- options.template = file(options.template).read()
- else:
- print "Template not found: %s" % options.template
- parser.print_usage()
- sys.exit()
- #sys.exit()
- for wikiname, htmldata in wikify(args, options):
- if options.destdir:
- print wikiname + ":",
- if htmldata is not None:
- print htmldata
- else:
- print "Complete."
- elif htmldata is not None:
- print htmldata