wikify.py | searchcode

/luads/docs/wikify.py

http://microlua.googlecode.com/
Python | 989 lines | 847 code | 47 blank | 95 comment | 58 complexity | 3d991cd6add66bfcc8677f34dd7f00c1 MD5 | raw file
Possible License(s): GPL-3.0

#!/usr/bin/python
#
# wikify.py - Convert from wikitext to HTML
# Based on large portions of JeremyRuston's TiddlyWiki JS Wikifier
# Changed to GoogleCode wiki syntax, python by Michael Crawford <mike@dataunity.com>
# Tweaked to suit the needs of the Micro Lua project by Christophe Gragnic
# november 2009 (Grahack).
""" Convert wikitext to HTML """

# Jeremy's license:
#   Copyright (c) UnaMesa Association 2004-2007
#
#   Redistribution and use in source and binary forms, with or without modification,
#   are permitted provided that the following conditions are met:
#
#   Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
#   Redistributions in binary form must reproduce the above copyright notice, this
#   list of conditions and the following disclaimer in the documentation and/or other
#   materials provided with the distribution.
#
#   Neither the name of the UnaMesa Association nor the names of its contributors may be
#   used to endorse or promote products derived from this software without specific
#   prior written permission.
#
#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#   POSSIBILITY OF SUCH DAMAGE.
#
# My license:
#   Copyright (c) Data Unity 2007
#
#   Redistribution and use in source and binary forms, with or without modification,
#   are permitted provided that the following conditions are met:
#
#   Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
#   Redistributions in binary form must reproduce the above copyright notice, this
#   list of conditions and the following disclaimer in the documentation and/or other
#   materials provided with the distribution.
#
#   Neither the name of the Data Unity nor the names of its contributors may be
#   used to endorse or promote products derived from this software without
#   specific prior written permission.
#
#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#   POSSIBILITY OF SUCH DAMAGE.

import re, os, os.path, htmlentitydefs, urllib

def spacedwikiword(wikiword):
    ret = ''
    first = True
    for l in wikiword:
        if not first and l.isupper():
            ret += ' '
        ret += l
        first = False
    return ret

class _HTML:
    """ An HTML node factory factory. """
    
    class Node:
        """ An HTML element. """
        def __init__(self, parent, tagname, text="", attribs={}, empty=False, **kwargs):
            self.tagname = tagname
            self.attribs = dict(attribs)
            self.children = list()
            self.empty = empty
            if text != "":
                self.appendText(text)
            if parent is not None:
                parent.children.append(self)
            self.parent = parent
        
        def appendText(self, text):
            if text == "": return
            _HTML.Text(self, text)
        
        def __str__(self):
            attrs = " ".join([ '%s="%s"' % i for i in self.attribs.iteritems() ])
            if attrs: attrs = " " + attrs
            if self.empty:
                return "<%s%s/>" % (self.tagname, attrs)
            
            children = "".join([str(c) for c in self.children])
            return "<%s%s>%s</%s>" % (self.tagname, attrs, children, self.tagname)
        
        def isInside(self, tagname):
            k = self
            while k is not None:
                if k.tagname == tagname:
                    return True
                k = k.parent
            return False

    class Text:
        """ Simple text node. """
        entities = [ (k,v)
             for k,v in htmlentitydefs.entitydefs.iteritems()
             if k != "amp" and k[0] != "#" ]

        def __init__(self, parent, text=""):
            self.text = self._clean(text)
            if parent is not None:
                parent.children.append(self)

        def _clean(self, text):
            text = text.replace("&", "&amp;")
            return text

        def __str__(self):
            return self.text
    
    
    def __getattr__(self, attr):
        """ Return an element constructor using the attribute as the tagname """
        def factory(parent=None, **kwargs):
            return self.Node(parent, attr, **kwargs)
        return factory
    
HTML = _HTML()

URLSTR = r"(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)"
URL = re.compile(URLSTR, re.M)
IMGURLSTR = r".+((\.[Pp][Nn][Gg])|(\.[Gg][Ii][Ff])|(\.[Jj][Pp][Ee]?[Gg]))"
IMGURL = re.compile(IMGURLSTR, re.M)
YOUTUBESTR = r"http://www.youtube.com/watch\?v=([A-Za-z0-9_-]+)"
YOUTUBEURL = re.compile(YOUTUBESTR, re.M)
YOUTUBEREPL = r'<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&rel=1"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/hQPHf_8J8Eg&rel=1" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>'
VIDEOURLSTR = r".+((\.[Aa][Vv][Ii])|(\.[Mm][Oo][Vv])|(\.[Mm][Pp][Ee]?[Gg]))"
VIDEOURL = re.compile(VIDEOURLSTR, re.M)
VIDEOREPL = r'<embed src = "%s"  width="400" height="350"  hidden=false autostart=true loop=1>'
CODEURLSTR = r"http://([^\.]+).googlecode.com/svn/trunk/([^#]+)#((?:(?:(?:[\d]+)?\-)?[\d]+)|(?:[\d]+\-?))((?:\:(?:[\:]|[^\W])+))?"
CODEURL = re.compile(CODEURLSTR, re.M)
CODEREPL = r'<a href="%(url)s">svn://%(site)s/trunk/%(file)s</a><pre name="code" class="%(class)s">%(lines)s</pre>'

def GoogleCode_ReadSVNFile(wikifier, domain, path, start, end):
    """ Try to read a file from subversion for inclusion in the wiki. """
    
    gcurl = "http://%s.googlecode.com/svn/trunk/%s" % (domain,path)
    fdata = urllib.urlopen(gcurl).readlines()
    return gcurl, fdata[start-1:end]

def GoogleCode_IsExternalLink(wikifier, link):
    """ See if the link points outside of the wiki. """
    
    if GoogleCode_Exists(wikifier, link):
        return False;
    
    if URL.match(link):
        return True

    if '.' in link or '\\' in link or '/' in link or '#' in link:
        return True
    
    return False

def GoogleCode_Exists(wikifier, wikipage):
    """ See if a wiki page exists inside this wiki. """
    path = os.path.join(wikifier.srcdir, "%s.wiki" % wikipage)
    if os.path.exists(path):
        return True
    return False

def GoogleCode_WikiComment(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
    termMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
    if termMatch is None: return
    output = HTML.Node(wikifier.output, 'div', **kwargs)
    wikifier.outputText(output, wikifier.nextMatch, termMatch.start())
    wikifier.nextMatch = termMatch.end()

def GoogleCode_WikiToc(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
    pass

def GoogleCode_Heading(wikifier, termRegExp=None, **kwargs):
    termMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
    if termMatch is None: return
    if (len(wikifier.output.children) and
        "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
        wikifier.output.children.pop(-1)
        if (len(wikifier.output.children) and
            "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
            wikifier.output.children.pop(-1)
    output = HTML.Node(wikifier.output, "h%i" % wikifier.matchLength)
    wikifier.outputText(output, wikifier.nextMatch, termMatch.start())
    wikifier.nextMatch = termMatch.end()

def GoogleCode_SimpleElement(wikifier, termRegExp=None, tagName=None, **kwargs):
    if wikifier.output.isInside(tagName):
        wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch)
        return
    elif wikifier.source[wikifier.nextMatch-1] == "_":
        wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch-1)
        
    if termRegExp.search(wikifier.source, wikifier.nextMatch) is None: return
    output = HTML.Node(wikifier.output, tagName, **kwargs)
    wikifier.subWikifyTerm(output, termRegExp)
    
def GoogleCode_Blockquote(wikifier, termRegExp=None, **kwargs):
    sibs = wikifier.output.children
    if len(sibs) and getattr(sibs[-1], 'tagname', None) == "blockquote":
        wikifier.subWikifyTerm(sibs[-1], termRegExp)
    else:
        output = HTML.blockquote(wikifier.output, **kwargs)
        wikifier.subWikifyTerm(output, termRegExp)
    
def GoogleCode_Codeblock(wikifier, tagName=None, termRegExp=None, initRegExp=None, **kwargs):
    if 'attribs' not in kwargs:
        kwargs['attribs'] = {}
    
    kwargs['attribs']['name'] = 'code'
    if 'class' not in kwargs['attribs']:
        kwargs['attribs']['class'] = wikifier.defaultHiLang.lower()
    else:
        kwargs['attribs']['class'] += " " + wikifier.defaultHiLang.lower()
        
    output = HTML.Node(wikifier.output, tagName, **kwargs)
    tcount = 1
    matchStart = wikifier.nextMatch
    # Find the matching terminator
    while tcount > 0:
        nextTermMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
        nextInitMatch = initRegExp.search(wikifier.source, wikifier.nextMatch)
        
        if not nextTermMatch:
            # No terminator. Syntax error, just ignore it.
            matchEnd = matchStart
            tcount = 0
        elif not nextInitMatch or nextTermMatch.start() <= nextInitMatch.start():
            # Terminator goes first.
            nextMatch = nextTermMatch
            tcount -= 1
            if tcount > 0:
                matchEnd = nextMatch.end()
            else:
                matchEnd = nextMatch.start()
        else:
            nextMatch = nextInitMatch
            tcount += 1
            matchEnd = nextMatch.end()
            wikifier.nextMatch = nextMatch.end()
        
    # Copy the content
    wikifier.outputText(output, matchStart, matchEnd)
    
    if "\n" not in wikifier.source[matchStart:matchEnd]:
        output.tagname = "code"

def GoogleCode_WikiWord(wikifier, **kwargs):
    if wikifier.matchStart > 0:
        # Make sure we're at the start of a word?
        preRegExp = re.compile("[!A-Za-z0-9]", re.M)
        preMatch = preRegExp.search(wikifier.source, wikifier.matchStart-1)
        if (preMatch is not None and
            preMatch.start() == wikifier.matchStart-1):
            wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
            return

    if wikifier.source[wikifier.matchStart] == "!":
        wikifier.outputText(wikifier.output,wikifier.matchStart+1,wikifier.nextMatch)
    elif GoogleCode_Exists(wikifier, wikifier.matchText):
        # Full link, everybody sees it
        HTML.a(wikifier.output, text=wikifier.matchText, attribs={"href": wikifier.matchText + wikifier.suffix})
    elif wikifier.autolink:
        # Partial link - only authorized users
        wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
        link = HTML.a(wikifier.output, text="?", attribs={"href": wikifier.matchText + wikifier.suffix})
    else:
        wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)

def GoogleCode_LineBreak(wikifier, **kwargs):
    sibs = wikifier.output.children
    if wikifier.multibreak:
        HTML.br(wikifier.output, **kwargs)
    elif len(sibs) and (not hasattr(sibs[-1], 'tagname') or
                        sibs[-1].tagname == "img"):
        # Only after an inline or header block.
        HTML.br(wikifier.output, **kwargs)
        HTML.br(wikifier.output, **kwargs)
        
def GoogleCode_PrettyLink(wikifier, lookaheadRegExp=None, **kwargs):
    lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.matchStart)
    if lookMatch and lookMatch.start() == wikifier.matchStart:
        text = lookMatch.group(1)
        link = text
        if lookMatch.group(2):
            # Pretty bracketted link
            text = lookMatch.group(2)
            if GoogleCode_IsExternalLink(wikifier, link):
                # External link
                attribs={"href":link, "target": "_blank" }
            else:
                # Internal link
                attribs={"href":link + wikifier.suffix}
                
            e = HTML.a(wikifier.output, attribs=attribs)
            
            if URL.match(text):
                HTML.img(e, attribs={'src':text,
                                     'border': '0'})
                HTML.br(wikifier.output)
            else:
                HTML.Text(e, text)
        else:
            if GoogleCode_IsExternalLink(wikifier, text):
                # External link
                attribs={"href":link, "target": "_blank" }
            else:
                # Internal link
                attribs={"href":text + wikifier.suffix}
            
            # Simple bracketted link
            e = HTML.a(wikifier.output, text=text, attribs=attribs)
        wikifier.nextMatch = lookMatch.end()

def GoogleCode_UrlLink(wikifier, **kwargs):
    attribs = {"href": wikifier.matchText}
    if GoogleCode_IsExternalLink(wikifier, wikifier.matchText):
        attribs["target"] = "_blank"
    
    if IMGURL.match(wikifier.matchText):
        HTML.img(wikifier.output, attribs={'src':wikifier.matchText})
        HTML.br(wikifier.output)
    elif YOUTUBEURL.match(wikifier.matchText):
        match = YOUTUBEURL.match(wikifier.matchText)
        # Raw html ;)
        wikifier.output.children.append(YOUTUBEREPL % match.group(1))
    elif VIDEOURL.match(wikifier.matchText):
        # Raw html ;)
        wikifier.output.children.append(VIDEOREPL % wikifier.matchText)
    elif CODEURL.match(wikifier.matchText):
        # Raw html ;)
        # http://([^\.]+).googlecode.com/svn/trunk/([^\#]+)#([^\:]+)(?:\:([^\W]+))?
        
        codeMatch = CODEURL.match(wikifier.matchText)
        parts = { "class": (codeMatch.group(4) or "").lower()[1:],
                  "file": codeMatch.group(2),
                  "site": codeMatch.group(1)}

        lines = codeMatch.group(3)
        if '-' in lines:
            lines = lines.split('-')
            lines[0] = int(lines[0])
            lines[1] = int(lines[1])
        else:
            lines = [int(lines), int(lines)]
        
        parts['class'] += ":firstline[%i]" % lines[0]
        url, parts['lines'] = GoogleCode_ReadSVNFile(wikifier, parts['site'],
                                                parts['file'], *lines)
        parts['url'] = url
        parts['lines'] = "".join(parts['lines'])

        wikifier.output.children.append(CODEREPL % parts)
    else:
        HTML.a(wikifier.output, text=wikifier.matchText, attribs=attribs)


def GoogleCode_Table(wikifier, sepRegExp=None, termRegExp=None, **kwargs):
    sibs = wikifier.output.children
    if len(sibs) and getattr(sibs[-1], 'tagname', None) == "table":
        table = sibs[-1]
    else:
        table = HTML.table(wikifier.output)
    row = HTML.tr(table)
    
    termMatch = termRegExp.search(wikifier.source, wikifier.matchStart)
    if termMatch is None:
        termEnd = termStart = len(wikifier.source)
    else:
        termStart, termEnd = termMatch.start(), termMatch.end()
    
    # Skip over the leading separator
    sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
    wikifier.nextMatch = wikifier.matchStart = sepMatch.end()
    sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
    attribs = { "style": "border: 1px solid #aaa; padding: 5px;" }

    while sepMatch and sepMatch.end() <= termStart:
        cell = HTML.td(row, attribs=attribs)
        wikifier.subWikifyTerm(cell, sepRegExp)
        wikifier.nextMatch = sepMatch.end()
        sepMatch = sepRegExp.search(wikifier.source, wikifier.nextMatch)
    
    wikifier.nextMatch = termEnd


def GoogleCode_List(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
    currLevel = 0
    currType = None
    stack = [wikifier.output]
    indents = [currLevel]
    wikifier.nextMatch = wikifier.matchStart
    
    lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
    while lookMatch and lookMatch.start() == wikifier.nextMatch:
        # See what kind of list it is
        if lookMatch.group(1):
            listType = "ul"
            itemType = "li"
        elif lookMatch.group(2):
            listType = "ol"
            itemType = "li"
        
        listLevel = len(lookMatch.group(0))
        wikifier.nextMatch += len(lookMatch.group(0))

        # Check for any changes in list type or indentation
        if listLevel > currLevel:
            # Indent further
            indents.append(listLevel)
            if currLevel == 0:
                target = stack[-1]
            else:
                target = stack[-1].children[-1]
                
            stack.append(HTML.Node(target, listType))
                
        elif listLevel < currLevel:
            # Indent less
            while indents[-1] > listLevel:
                stack.pop(-1)
                indents.pop(-1)

        elif listLevel == currLevel and listType != currType:
            # Same level, different kind of list
            stack.pop(-1)
            stack.append(HTML.Node(stack[-1].children[-1], listType))
        
        currLevel = listLevel
        currType = listType

        # Output the item
        output = HTML.Node(stack[-1], itemType)
        wikifier.subWikifyTerm(output, termRegExp)

        # Roll again
        lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
        
GoogleCodeWikiFormat = [
    { "name": "tablerow",
      "match": r"^(?:\|\|.+\|\|)",
      "termRegExp": re.compile(r"(\n)", re.M),
      "sepRegExp": re.compile(r"(\|\|)", re.M),
      "handler": GoogleCode_Table
    },
    { "name": "heading",
      "match": r"^={1,6}",
      "termRegExp": re.compile(r"([=]+)", re.M),
      "handler": GoogleCode_Heading
    },
    { "name": "list",
      "match": r"^(?:[ ]+)(?:[\*#])",
      "lookaheadRegExp": re.compile(r"^(?:[ ]+)(?:(\*)|(#))",re.M),
      "termRegExp": re.compile(r"(\n)", re.M),
      "handler": GoogleCode_List
    },
    { "name": "blockquote",
      "match": r"^(?:[ ]+)",
      "termRegExp": re.compile(r"(\n)", re.M),
      "handler": GoogleCode_Blockquote,
      "tagName": "blockquote"
    },
    { "name": "codeword",
      "match": r"\`",
      "initRegExp": re.compile(r"(\`)", re.M),
      "termRegExp": re.compile(r"(\`)", re.M),
      "handler": GoogleCode_Codeblock,
      "tagName": "code"
    },
    { "name": "codeblock",
      "match": r"\{\{\{",
      "initRegExp": re.compile(r"(\{\{\{)", re.M),
      "termRegExp": re.compile(r"(\}\}\})", re.M),
      "handler": GoogleCode_Codeblock,
      "tagName": "pre",
      "attribs": { "class": "codeblock" }
    },
    { "name": "bold",
      "match": r"[\*]",
      "termRegExp": re.compile(r"([\*])", re.M),
      "handler": GoogleCode_SimpleElement,
      "tagName": "b"
    },
    { "name": "italic",
      "match": r"(?:[^\w\b]|^)[\_]",
      "termRegExp": re.compile(r"([\_])[^\w\b]", re.M),
      "handler": GoogleCode_SimpleElement,
      "tagName": "i"
    },
    { "name": "strike",
      "match": r"\~\~",
      "termRegExp": re.compile(r"(\~\~)", re.M),
      "handler": GoogleCode_SimpleElement,
      "tagName": "strike"
    },
    { "name": "superscript",
      "match": r"\^",
      "termRegExp": re.compile(r"(\^)", re.M),
      "handler": GoogleCode_SimpleElement,
      "tagName": "sup"
    },
    { "name": "subscript",
      "match": r",,",
      "termRegExp": re.compile(r"(,,)", re.M),
      "handler": GoogleCode_SimpleElement,
      "tagName": "sub"
    },
    { "name": "prettyLink",
      "match": r"\[(?:(?:[A-Za-z][A-Za-z0-9\_\-]+)|(?:(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)))(?: .*?)?\]",
      "lookaheadRegExp": re.compile(r'\[(.*?)(?: (.*?))?\]', re.M),
      "handler": GoogleCode_PrettyLink
    },
    { "name": "wikiword",
      "match": r"(?:\!?(?:[A-Z]+[a-z]+[A-Z][A-Za-z]*)|(?:[A-Z]{2,}[a-z]+))",
      "handler": GoogleCode_WikiWord
    },
    { "name": "urlLink",
      "match": URLSTR,
      "handler": GoogleCode_UrlLink
    },
    { "name": "linebreak",
      "match": r"\n\n",
      "handler": GoogleCode_LineBreak,
      "empty": True
    },
    { "name": "wikicomment",
      "match": r"<wiki:comment>",
      "initRegExp": re.compile(r"(<wiki:comment>)", re.M),
      "termRegExp": re.compile(r"(</wiki:comment>)", re.M),
      "handler": GoogleCode_WikiComment,
      "tagName": "div",
      "attribs": { "class": "comment" }
    },
    { "name": "wikitoc",
      "match": r"<wiki:toc />",
      "handler": GoogleCode_WikiToc
    },
]

class Wikifier:
    
    def __init__(self, formatters, autolink=False, srcdir=os.getcwd(),
                       multibreak=False, tabwidth=8, suffix=".html",
                       hiLang="Python"):
        # Create the master regex
        forms = [ "(%s)" % r['match'] for r in formatters ]
        self.formatterRegExp = re.compile("|".join(forms), re.M)
        # Save the individual format handlers
        self.formatters = formatters
        self.autolink = autolink
        self.srcdir = srcdir
        self.multibreak = multibreak and True or False
        self.tabwidth = tabwidth
        self.suffix = suffix
        self.defaultHiLang = hiLang

    def _clean(self, text):
        text = text.replace("\r\n", "\n")
        
        # Out, out, damned tabs
        text = text.replace("\t", " " * self.tabwidth)
        
        if not self.multibreak:
            # Remove redundant line breaks
            tlen = len(text) + 1
            while tlen > len(text):
                tlen = len(text)
                text = text.replace("\n\n\n", "\n\n")
        
        while text.startswith("#"):
            # Process any wiki-headers
            line, text = text.split("\n", 1)
            self._header(line)
            
        return text
    
    def _header(self, line):
        tagname, content = line.split(" ", 1)
        if tagname == "#summary":
            self.summary = content
        elif tagname == "#labels":
            self.labels = tuple(content.split(","))
            
    def wikify(self, source, labels=None, summary=None):
        self.labels = labels
        self.summary = summary
        # Clean up the content
        self.source = self._clean(source)
        self.nextMatch = 0
        # Do it
        self.output = HTML.div(None)
        self.subWikifyUnterm()

        return "".join([str(c) for c in self.output.children])

    def findMatch(self, source, start):
        return self.formatterRegExp.search(source, start)

    def subWikifyUnterm(self, output=None):
        oldOutput = self.output
        if output is not None:
            self.output = output
        
        match = self.findMatch(self.source, self.nextMatch)
        while match:
            # Output any text before the match
            if match.start() > self.nextMatch:
                self.outputText(self.output, self.nextMatch, match.start())
            
            # Set the match parameters for the handler
            self.matchStart = match.start()
            self.matchLength = len(match.group(0))
            self.matchText = match.group(0)
            self.nextMatch = match.end()
            
            # Figure out which sub-group matched (zero-indexed)
            t, submatch = [(t,s) for t, s in enumerate(match.groups()) if s][0]
            
            # Handle it
            self.formatters[t]['handler'](self, **self.formatters[t])

            # Go back for more matches
            match = self.findMatch(self.source, self.nextMatch)

        if self.nextMatch < len(self.source):
            self.outputText(self.output, self.nextMatch, len(self.source))
            self.nextMatch = len(self.source)

        # Restore the destination node
        self.output = oldOutput

    def subWikifyTerm(self, output, termRegExp):
        oldOutput = self.output
        if output is not None:
            self.output = output
        
        # Get the first matches for the formatter and terminator RegExps
        termMatch = termRegExp.search(self.source, self.nextMatch)
        if termMatch:
            match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
        else:
            match = self.findMatch(self.source, self.nextMatch)

        while termMatch or match:
            # If the terminator comes before the next formatter match, we're done
            if termMatch and (not match or termMatch.start() <= match.start()):
                if termMatch.start() > self.nextMatch:
                    self.outputText(self.output,self.nextMatch,termMatch.start())
                self.matchText = termMatch.group(1)
                self.matchLength = len(self.matchText)
                self.matchStart = termMatch.start()
                self.nextMatch = self.matchStart + self.matchLength
                self.output = oldOutput
                return
            
            # Output any text before the match
            if match.start() > self.nextMatch:
                self.outputText(self.output, self.nextMatch, match.start())
                
            # Set the match parameters for the handler
            self.matchStart = match.start()
            self.matchLength = len(match.group(0))
            self.matchText = match.group(0)
            self.nextMatch = match.end()
            
            # Figure out which sub-group matched (zero-indexed)
            t,submatch = [ (t,s) for t, s in enumerate(match.groups()) if s ][0]
            
            # Handle it
            self.formatters[t]['handler'](self, **self.formatters[t])
            
            termMatch = termRegExp.search(self.source, self.nextMatch)
            if termMatch:
                match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
            else:
                match = self.findMatch(self.source, self.nextMatch)
        
        if self.nextMatch < len(self.source):
            self.outputText(self.output, self.nextMatch,len(self.source))
            self.nextMatch = len(self.source)
        
        self.output = oldOutput

    def outputText(self, output, startPos, endPos):
        HTML.Text(output, self.source[startPos:endPos])


DEFAULT_TEMPLATE = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
    <head>
        <link type="text/css" rel="stylesheet" href="http://cptnahab.dataunity.com/cspace/images/cspace.css" />
        <link type="text/css" rel="stylesheet" href="http://cptnahab.dataunity.com/cspace/Styles/SyntaxHighlighter.css"></link>
        <script language="javascript" type="text/javascript" src="http://cptnahab.dataunity.com/cspace/Scripts/shCore.js"></script>
        <script language="javascript" type="text/javascript" src="http://cptnahab.dataunity.com/cspace/Scripts/shBrushPython.js"></script>
    </head>
    <body>
        <div id="page">
        
            <div id='header'>
                <img id='explogo' src='http://cptnahab.dataunity.com/cspace/images/experimental.png'/>
                <img id='logo' src='http://cptnahab.dataunity.com/cspace/images/cspacelogo.png'/>
                <br style="clear: both" /><br/>
                
                <img src="http://cptnahab.dataunity.com/cspace/images/headerline.png" height="4" />
                
            </div>
        
            <div id="pagecontent">
                <div class="index">
<!-- This is a (PRE) block.  Make sure it's left aligned or your toc title will be off. -->
%(toc)s
                </div>
                
                <h1>%(title)s</h1>

                <div class="summary">
                    %(summary)s
                </div>

                <div class="narrow">
                    %(wiki)s
                </div>
        
            </div>
        </div>
<script language="javascript">
dp.SyntaxHighlighter.ClipboardSwf = 'http://cptnahab.dataunity.com/cspace/Scripts/clipboard.swf';
dp.SyntaxHighlighter.HighlightAll('code');
</script>
    </body>
</html>
'''

def wikify(pages, options=None):
    # See options definition below.
    # Pass any object with those (potential) attributes
    srcdir = getattr(options, 'srcdir', os.getcwd())
    destdir = getattr(options, 'destdir', None)
    
    # Find all requested files
    onlyStale = False
    if getattr(options, 'all', False):
        pages = [ k for k in os.listdir(srcdir)
                 if k.endswith(".wiki") ]
        onlyStale = True
        if destdir is None:
            destdir = os.getcwd()

    # Create the magic 8-ball
    w = Wikifier(GoogleCodeWikiFormat,
                 autolink=getattr(options, 'autolink', False),
                 tabwidth=getattr(options, 'tabwidth', 8),
                 multibreak=getattr(options, 'multibreak', False),
                 srcdir=srcdir,
                 suffix=".html")

    rets = []
    for wikiname in pages:
        # Clean up the page name
        if wikiname.endswith(".wiki"):
            wikiname = wikiname[:-5]
        
        wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
        if onlyStale:
            # See if the output is fresh, and if so, skip it
            wikidestname = os.path.join(destdir, "%s.html" % wikiname)
            try:
                sstat = os.stat(wikifilename)
            except:
                continue
            try:
                dstat = os.stat(wikidestname)
            except:
                pass
            else:
                if dstat.st_mtime > sstat.st_mtime:
                    continue
        
        # Load the wiki content
        wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
        try:
            wikisrc = file(wikifilename).read()
        except IOError:
            wikisrc = wikifilename + ' file not found!'
            print '**** ' + wikisrc
        
        # Ask a question
        wikified = w.wikify(wikisrc)

        reFind = re.compile(r'<h(\d)>\s*([^\<]*[\S])\s*</h\d>')
        strRepl = r'<h\g<1>><a name="\g<2>">\g<2></a></h\g<1>>'
        
        # Number the sections
        if getattr(options, 'number', True):
            sectstack = []
            matches = []
            curLevel = 0
            match = reFind.search(wikified)
            while match is not None:
                level = int(match.group(1))

                while level > len(sectstack):
                    sectstack.append(1)
                
                while len(sectstack) > level:
                    sectstack.pop(-1)
                    
                if curLevel >= level:
                    sectstack[-1] += 1
                curLevel = len(sectstack)
                
                sectnum = ".".join([str(n) for n in sectstack]) + "."
                matches.append((sectnum, match))
                match = reFind.search(wikified, match.end())
            
            matches.reverse()
            for sectnum, match in matches:
                wikified = wikified[:match.start()+4] + sectnum + " " + wikified[match.start()+4:]
        

        # Generate the TOC
        if getattr(options, 'toc', True):
            matches = []
            for match in reFind.findall(wikified):
                if not matches:
                     matches.append('<b>%s: Contents</b>' % wikiname)
                if int(match[0]) > getattr(options, 'levels', 3): continue
                indent = "&nbsp;" * ((int(match[0])) * 2)
                
                href = "#" + match[1]
                anchor = '%s<a href="%s">%s</a>' % (indent, href, match[1])
                matches.append(anchor)
            toc = "<br>".join(matches)
        else:
            toc = ""
        
        # Generate the body links
        if getattr(options, 'links', True):
            wikified = reFind.sub(strRepl, wikified)

        # Find a summary
        summary = ""
        if w.summary is not None:
            summary = w.summary
            
        if not getattr(options, 'raw', False):
            # Fill the template
            wikified = options.template % {
                    "toc": toc,
                    "title": spacedwikiword(wikiname),
                    "wiki": wikified,
                    "summary": summary }
        
        # Save it or write it
        if destdir is not None:
            outputname = os.path.join(destdir, "%s.html" % wikiname)
            file(outputname,"w").write(wikified)

            mainpage = getattr(options, 'mainpage', 'MainPage')
            if wikiname == mainpage:
                rets.append((wikiname, outputname))
                outputname = os.path.join(destdir, "index.html")
                file(outputname,"w").write(wikified)

            wikified = outputname
        rets.append((wikiname, wikified))
    return rets

if __name__ == "__main__":
    from optparse import OptionParser
    import sys
    
    parser = OptionParser()
    
    # Output format options
    parser.add_option("-t", "--template", dest="template",
                        help="use TPLTFILE to wrap wiki output", metavar="TPLTFILE")
    parser.add_option("-n", "--number", dest="number", metavar="NUMSTART",
                        help="number the headings in the body and table of contents starting with level NUMSTART")
    parser.add_option("-l", "--levels", dest="levels", type="int",
                        help="create toc to depth LEVELS", metavar="LEVELS")
    parser.add_option("-c", "--skiptoc", dest="toc", action="store_false",
                        help="leave toc out, even if template has slot")
    parser.add_option("-u", "--unlink", dest="links", action="store_false",
                        help="don't create named anchors for toc links")
    parser.add_option("-a", "--autolink", dest="autolink", action="store_false",
                        help="autolink wiki words that don't exist")
    parser.add_option("-w", "--tabwidth", dest="tabwidth", type="int",
                        help="replace tabs by WIDTH spaces", metavar="WIDTH")
    parser.add_option("-m", "--multibreak", dest="multibreak", action="store_true",
                        help="don't collapse multiple line breaks")
    parser.add_option("-r", "--raw", dest="raw", action="store_true",
                        help="raw wiki translation -- no wrapping, no toc, no links")
    parser.add_option("-p", "--mainpage", dest="mainpage", metavar="PAGENAME",
                        help="set main page to PAGENAME")
    parser.add_option("-P", "--gcproject", dest="gcproject",
                        help="name of the Google Code project")
    
    # Batch / Location options
    parser.add_option("-s", "--srcdir", dest="srcdir",
                        help="wiki format sources in SRCDIR", metavar="SRCDIR")
    parser.add_option("-d", "--destdir", dest="destdir",
                        help="write html output into DESTDIR", metavar="DESTDIR")
    parser.add_option("-e", "--stale", dest="all", action="store_true",
                        help="convert all wiki files that are stale or missing from DESTDIR")
    
    
    parser.set_default('toc', True)
    parser.set_default('links', True)
    parser.set_default('template', None)
    parser.set_default('number', False)
    parser.set_default('levels', 3)
    parser.set_default('tabwidth', 8)
    parser.set_default('multibreak', False)
    parser.set_default('mainpage', "MainPage")  # Identity of index
    
    parser.set_default('srcdir', os.getcwd())
    parser.set_default('destdir', None)
    parser.set_default('all', False)

    # Parse the command line
    (options, args) = parser.parse_args()
    
    if options.template is None:
        options.template = DEFAULT_TEMPLATE
    elif os.path.exists(options.template):
        options.template = file(options.template).read()
    else:
        print "Template not found: %s" % options.template
        parser.print_usage()
        sys.exit()
    
    wiki_url = 'http://code.google.com/p/%s/wiki/' % options.gcproject
    not_offline_message = 'Please see %(page1)s <a href="%(url)s%(page2)s">online</a>.'
    not_offline_message_raw = 'Please see %(page1)s online: %(url)s%(page2)s.'
    # [:-5] to remove the .wiki extension
    for wikiname in [fname[:-5] for fname in os.listdir(options.srcdir)
                     if os.path.isfile(os.path.join(options.srcdir, fname))]:
        if wikiname not in args:
            if not getattr(options, 'raw', False):
                # Fill the template
                content = options.template % {
                        "toc": '',
                        "title": spacedwikiword(wikiname),
                        "wiki": not_offline_message % {'url':wiki_url,
                                                       'page1': wikiname,
                                                       'page2': wikiname},
                        "summary": 'unavailable' }
            else:
                content = not_offline_message_raw % {'url':wiki_url,
                                                     'page1': wikiname,
                                                     'page2': wikiname}
            if options.destdir is not None:
                outputname = os.path.join(options.destdir, "%s.html" % wikiname)
                file(outputname,"w").write(content)
    
    for wikiname, htmldata in wikify(args, options):
        if options.destdir:
            print wikiname + ":",
            if htmldata is not None:
                print htmldata
            else:
                print "Complete."
        elif htmldata is not None:
            print htmldata