PageRenderTime 67ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/luads/docs/wikify.py

http://microlua.googlecode.com/
Python | 989 lines | 847 code | 47 blank | 95 comment | 58 complexity | 3d991cd6add66bfcc8677f34dd7f00c1 MD5 | raw file
Possible License(s): GPL-3.0
  1. #!/usr/bin/python
  2. #
  3. # wikify.py - Convert from wikitext to HTML
  4. # Based on large portions of JeremyRuston's TiddlyWiki JS Wikifier
  5. # Changed to GoogleCode wiki syntax, python by Michael Crawford <mike@dataunity.com>
  6. # Tweaked to suit the needs of the Micro Lua project by Christophe Gragnic
  7. # november 2009 (Grahack).
  8. """ Convert wikitext to HTML """
  9. # Jeremy's license:
  10. # Copyright (c) UnaMesa Association 2004-2007
  11. #
  12. # Redistribution and use in source and binary forms, with or without modification,
  13. # are permitted provided that the following conditions are met:
  14. #
  15. # Redistributions of source code must retain the above copyright notice, this
  16. # list of conditions and the following disclaimer.
  17. #
  18. # Redistributions in binary form must reproduce the above copyright notice, this
  19. # list of conditions and the following disclaimer in the documentation and/or other
  20. # materials provided with the distribution.
  21. #
  22. # Neither the name of the UnaMesa Association nor the names of its contributors may be
  23. # used to endorse or promote products derived from this software without specific
  24. # prior written permission.
  25. #
  26. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. # POSSIBILITY OF SUCH DAMAGE.
  37. #
  38. # My license:
  39. # Copyright (c) Data Unity 2007
  40. #
  41. # Redistribution and use in source and binary forms, with or without modification,
  42. # are permitted provided that the following conditions are met:
  43. #
  44. # Redistributions of source code must retain the above copyright notice, this
  45. # list of conditions and the following disclaimer.
  46. #
  47. # Redistributions in binary form must reproduce the above copyright notice, this
  48. # list of conditions and the following disclaimer in the documentation and/or other
  49. # materials provided with the distribution.
  50. #
  51. # Neither the name of the Data Unity nor the names of its contributors may be
  52. # used to endorse or promote products derived from this software without
  53. # specific prior written permission.
  54. #
  55. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  56. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  57. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  58. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  59. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  60. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  61. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  62. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  63. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  64. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  65. # POSSIBILITY OF SUCH DAMAGE.
  66. import re, os, os.path, htmlentitydefs, urllib
  67. def spacedwikiword(wikiword):
  68. ret = ''
  69. first = True
  70. for l in wikiword:
  71. if not first and l.isupper():
  72. ret += ' '
  73. ret += l
  74. first = False
  75. return ret
  76. class _HTML:
  77. """ An HTML node factory factory. """
  78. class Node:
  79. """ An HTML element. """
  80. def __init__(self, parent, tagname, text="", attribs={}, empty=False, **kwargs):
  81. self.tagname = tagname
  82. self.attribs = dict(attribs)
  83. self.children = list()
  84. self.empty = empty
  85. if text != "":
  86. self.appendText(text)
  87. if parent is not None:
  88. parent.children.append(self)
  89. self.parent = parent
  90. def appendText(self, text):
  91. if text == "": return
  92. _HTML.Text(self, text)
  93. def __str__(self):
  94. attrs = " ".join([ '%s="%s"' % i for i in self.attribs.iteritems() ])
  95. if attrs: attrs = " " + attrs
  96. if self.empty:
  97. return "<%s%s/>" % (self.tagname, attrs)
  98. children = "".join([str(c) for c in self.children])
  99. return "<%s%s>%s</%s>" % (self.tagname, attrs, children, self.tagname)
  100. def isInside(self, tagname):
  101. k = self
  102. while k is not None:
  103. if k.tagname == tagname:
  104. return True
  105. k = k.parent
  106. return False
  107. class Text:
  108. """ Simple text node. """
  109. entities = [ (k,v)
  110. for k,v in htmlentitydefs.entitydefs.iteritems()
  111. if k != "amp" and k[0] != "#" ]
  112. def __init__(self, parent, text=""):
  113. self.text = self._clean(text)
  114. if parent is not None:
  115. parent.children.append(self)
  116. def _clean(self, text):
  117. text = text.replace("&", "&amp;")
  118. return text
  119. def __str__(self):
  120. return self.text
  121. def __getattr__(self, attr):
  122. """ Return an element constructor using the attribute as the tagname """
  123. def factory(parent=None, **kwargs):
  124. return self.Node(parent, attr, **kwargs)
  125. return factory
  126. HTML = _HTML()
  127. URLSTR = r"(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)"
  128. URL = re.compile(URLSTR, re.M)
  129. IMGURLSTR = r".+((\.[Pp][Nn][Gg])|(\.[Gg][Ii][Ff])|(\.[Jj][Pp][Ee]?[Gg]))"
  130. IMGURL = re.compile(IMGURLSTR, re.M)
  131. YOUTUBESTR = r"http://www.youtube.com/watch\?v=([A-Za-z0-9_-]+)"
  132. YOUTUBEURL = re.compile(YOUTUBESTR, re.M)
  133. YOUTUBEREPL = r'<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&rel=1"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/hQPHf_8J8Eg&rel=1" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>'
  134. VIDEOURLSTR = r".+((\.[Aa][Vv][Ii])|(\.[Mm][Oo][Vv])|(\.[Mm][Pp][Ee]?[Gg]))"
  135. VIDEOURL = re.compile(VIDEOURLSTR, re.M)
  136. VIDEOREPL = r'<embed src = "%s" width="400" height="350" hidden=false autostart=true loop=1>'
  137. CODEURLSTR = r"http://([^\.]+).googlecode.com/svn/trunk/([^#]+)#((?:(?:(?:[\d]+)?\-)?[\d]+)|(?:[\d]+\-?))((?:\:(?:[\:]|[^\W])+))?"
  138. CODEURL = re.compile(CODEURLSTR, re.M)
  139. CODEREPL = r'<a href="%(url)s">svn://%(site)s/trunk/%(file)s</a><pre name="code" class="%(class)s">%(lines)s</pre>'
  140. def GoogleCode_ReadSVNFile(wikifier, domain, path, start, end):
  141. """ Try to read a file from subversion for inclusion in the wiki. """
  142. gcurl = "http://%s.googlecode.com/svn/trunk/%s" % (domain,path)
  143. fdata = urllib.urlopen(gcurl).readlines()
  144. return gcurl, fdata[start-1:end]
  145. def GoogleCode_IsExternalLink(wikifier, link):
  146. """ See if the link points outside of the wiki. """
  147. if GoogleCode_Exists(wikifier, link):
  148. return False;
  149. if URL.match(link):
  150. return True
  151. if '.' in link or '\\' in link or '/' in link or '#' in link:
  152. return True
  153. return False
  154. def GoogleCode_Exists(wikifier, wikipage):
  155. """ See if a wiki page exists inside this wiki. """
  156. path = os.path.join(wikifier.srcdir, "%s.wiki" % wikipage)
  157. if os.path.exists(path):
  158. return True
  159. return False
  160. def GoogleCode_WikiComment(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
  161. termMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
  162. if termMatch is None: return
  163. output = HTML.Node(wikifier.output, 'div', **kwargs)
  164. wikifier.outputText(output, wikifier.nextMatch, termMatch.start())
  165. wikifier.nextMatch = termMatch.end()
  166. def GoogleCode_WikiToc(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
  167. pass
  168. def GoogleCode_Heading(wikifier, termRegExp=None, **kwargs):
  169. termMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
  170. if termMatch is None: return
  171. if (len(wikifier.output.children) and
  172. "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
  173. wikifier.output.children.pop(-1)
  174. if (len(wikifier.output.children) and
  175. "br" == getattr(wikifier.output.children[-1], 'tagname', '')):
  176. wikifier.output.children.pop(-1)
  177. output = HTML.Node(wikifier.output, "h%i" % wikifier.matchLength)
  178. wikifier.outputText(output, wikifier.nextMatch, termMatch.start())
  179. wikifier.nextMatch = termMatch.end()
  180. def GoogleCode_SimpleElement(wikifier, termRegExp=None, tagName=None, **kwargs):
  181. if wikifier.output.isInside(tagName):
  182. wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch)
  183. return
  184. elif wikifier.source[wikifier.nextMatch-1] == "_":
  185. wikifier.outputText(wikifier.output, wikifier.matchStart, wikifier.nextMatch-1)
  186. if termRegExp.search(wikifier.source, wikifier.nextMatch) is None: return
  187. output = HTML.Node(wikifier.output, tagName, **kwargs)
  188. wikifier.subWikifyTerm(output, termRegExp)
  189. def GoogleCode_Blockquote(wikifier, termRegExp=None, **kwargs):
  190. sibs = wikifier.output.children
  191. if len(sibs) and getattr(sibs[-1], 'tagname', None) == "blockquote":
  192. wikifier.subWikifyTerm(sibs[-1], termRegExp)
  193. else:
  194. output = HTML.blockquote(wikifier.output, **kwargs)
  195. wikifier.subWikifyTerm(output, termRegExp)
  196. def GoogleCode_Codeblock(wikifier, tagName=None, termRegExp=None, initRegExp=None, **kwargs):
  197. if 'attribs' not in kwargs:
  198. kwargs['attribs'] = {}
  199. kwargs['attribs']['name'] = 'code'
  200. if 'class' not in kwargs['attribs']:
  201. kwargs['attribs']['class'] = wikifier.defaultHiLang.lower()
  202. else:
  203. kwargs['attribs']['class'] += " " + wikifier.defaultHiLang.lower()
  204. output = HTML.Node(wikifier.output, tagName, **kwargs)
  205. tcount = 1
  206. matchStart = wikifier.nextMatch
  207. # Find the matching terminator
  208. while tcount > 0:
  209. nextTermMatch = termRegExp.search(wikifier.source, wikifier.nextMatch)
  210. nextInitMatch = initRegExp.search(wikifier.source, wikifier.nextMatch)
  211. if not nextTermMatch:
  212. # No terminator. Syntax error, just ignore it.
  213. matchEnd = matchStart
  214. tcount = 0
  215. elif not nextInitMatch or nextTermMatch.start() <= nextInitMatch.start():
  216. # Terminator goes first.
  217. nextMatch = nextTermMatch
  218. tcount -= 1
  219. if tcount > 0:
  220. matchEnd = nextMatch.end()
  221. else:
  222. matchEnd = nextMatch.start()
  223. else:
  224. nextMatch = nextInitMatch
  225. tcount += 1
  226. matchEnd = nextMatch.end()
  227. wikifier.nextMatch = nextMatch.end()
  228. # Copy the content
  229. wikifier.outputText(output, matchStart, matchEnd)
  230. if "\n" not in wikifier.source[matchStart:matchEnd]:
  231. output.tagname = "code"
  232. def GoogleCode_WikiWord(wikifier, **kwargs):
  233. if wikifier.matchStart > 0:
  234. # Make sure we're at the start of a word?
  235. preRegExp = re.compile("[!A-Za-z0-9]", re.M)
  236. preMatch = preRegExp.search(wikifier.source, wikifier.matchStart-1)
  237. if (preMatch is not None and
  238. preMatch.start() == wikifier.matchStart-1):
  239. wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
  240. return
  241. if wikifier.source[wikifier.matchStart] == "!":
  242. wikifier.outputText(wikifier.output,wikifier.matchStart+1,wikifier.nextMatch)
  243. elif GoogleCode_Exists(wikifier, wikifier.matchText):
  244. # Full link, everybody sees it
  245. HTML.a(wikifier.output, text=wikifier.matchText, attribs={"href": wikifier.matchText + wikifier.suffix})
  246. elif wikifier.autolink:
  247. # Partial link - only authorized users
  248. wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
  249. link = HTML.a(wikifier.output, text="?", attribs={"href": wikifier.matchText + wikifier.suffix})
  250. else:
  251. wikifier.outputText(wikifier.output,wikifier.matchStart,wikifier.nextMatch)
  252. def GoogleCode_LineBreak(wikifier, **kwargs):
  253. sibs = wikifier.output.children
  254. if wikifier.multibreak:
  255. HTML.br(wikifier.output, **kwargs)
  256. elif len(sibs) and (not hasattr(sibs[-1], 'tagname') or
  257. sibs[-1].tagname == "img"):
  258. # Only after an inline or header block.
  259. HTML.br(wikifier.output, **kwargs)
  260. HTML.br(wikifier.output, **kwargs)
  261. def GoogleCode_PrettyLink(wikifier, lookaheadRegExp=None, **kwargs):
  262. lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.matchStart)
  263. if lookMatch and lookMatch.start() == wikifier.matchStart:
  264. text = lookMatch.group(1)
  265. link = text
  266. if lookMatch.group(2):
  267. # Pretty bracketted link
  268. text = lookMatch.group(2)
  269. if GoogleCode_IsExternalLink(wikifier, link):
  270. # External link
  271. attribs={"href":link, "target": "_blank" }
  272. else:
  273. # Internal link
  274. attribs={"href":link + wikifier.suffix}
  275. e = HTML.a(wikifier.output, attribs=attribs)
  276. if URL.match(text):
  277. HTML.img(e, attribs={'src':text,
  278. 'border': '0'})
  279. HTML.br(wikifier.output)
  280. else:
  281. HTML.Text(e, text)
  282. else:
  283. if GoogleCode_IsExternalLink(wikifier, text):
  284. # External link
  285. attribs={"href":link, "target": "_blank" }
  286. else:
  287. # Internal link
  288. attribs={"href":text + wikifier.suffix}
  289. # Simple bracketted link
  290. e = HTML.a(wikifier.output, text=text, attribs=attribs)
  291. wikifier.nextMatch = lookMatch.end()
  292. def GoogleCode_UrlLink(wikifier, **kwargs):
  293. attribs = {"href": wikifier.matchText}
  294. if GoogleCode_IsExternalLink(wikifier, wikifier.matchText):
  295. attribs["target"] = "_blank"
  296. if IMGURL.match(wikifier.matchText):
  297. HTML.img(wikifier.output, attribs={'src':wikifier.matchText})
  298. HTML.br(wikifier.output)
  299. elif YOUTUBEURL.match(wikifier.matchText):
  300. match = YOUTUBEURL.match(wikifier.matchText)
  301. # Raw html ;)
  302. wikifier.output.children.append(YOUTUBEREPL % match.group(1))
  303. elif VIDEOURL.match(wikifier.matchText):
  304. # Raw html ;)
  305. wikifier.output.children.append(VIDEOREPL % wikifier.matchText)
  306. elif CODEURL.match(wikifier.matchText):
  307. # Raw html ;)
  308. # http://([^\.]+).googlecode.com/svn/trunk/([^\#]+)#([^\:]+)(?:\:([^\W]+))?
  309. codeMatch = CODEURL.match(wikifier.matchText)
  310. parts = { "class": (codeMatch.group(4) or "").lower()[1:],
  311. "file": codeMatch.group(2),
  312. "site": codeMatch.group(1)}
  313. lines = codeMatch.group(3)
  314. if '-' in lines:
  315. lines = lines.split('-')
  316. lines[0] = int(lines[0])
  317. lines[1] = int(lines[1])
  318. else:
  319. lines = [int(lines), int(lines)]
  320. parts['class'] += ":firstline[%i]" % lines[0]
  321. url, parts['lines'] = GoogleCode_ReadSVNFile(wikifier, parts['site'],
  322. parts['file'], *lines)
  323. parts['url'] = url
  324. parts['lines'] = "".join(parts['lines'])
  325. wikifier.output.children.append(CODEREPL % parts)
  326. else:
  327. HTML.a(wikifier.output, text=wikifier.matchText, attribs=attribs)
  328. def GoogleCode_Table(wikifier, sepRegExp=None, termRegExp=None, **kwargs):
  329. sibs = wikifier.output.children
  330. if len(sibs) and getattr(sibs[-1], 'tagname', None) == "table":
  331. table = sibs[-1]
  332. else:
  333. table = HTML.table(wikifier.output)
  334. row = HTML.tr(table)
  335. termMatch = termRegExp.search(wikifier.source, wikifier.matchStart)
  336. if termMatch is None:
  337. termEnd = termStart = len(wikifier.source)
  338. else:
  339. termStart, termEnd = termMatch.start(), termMatch.end()
  340. # Skip over the leading separator
  341. sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
  342. wikifier.nextMatch = wikifier.matchStart = sepMatch.end()
  343. sepMatch = sepRegExp.search(wikifier.source, wikifier.matchStart)
  344. attribs = { "style": "border: 1px solid #aaa; padding: 5px;" }
  345. while sepMatch and sepMatch.end() <= termStart:
  346. cell = HTML.td(row, attribs=attribs)
  347. wikifier.subWikifyTerm(cell, sepRegExp)
  348. wikifier.nextMatch = sepMatch.end()
  349. sepMatch = sepRegExp.search(wikifier.source, wikifier.nextMatch)
  350. wikifier.nextMatch = termEnd
  351. def GoogleCode_List(wikifier, lookaheadRegExp=None, termRegExp=None, **kwargs):
  352. currLevel = 0
  353. currType = None
  354. stack = [wikifier.output]
  355. indents = [currLevel]
  356. wikifier.nextMatch = wikifier.matchStart
  357. lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
  358. while lookMatch and lookMatch.start() == wikifier.nextMatch:
  359. # See what kind of list it is
  360. if lookMatch.group(1):
  361. listType = "ul"
  362. itemType = "li"
  363. elif lookMatch.group(2):
  364. listType = "ol"
  365. itemType = "li"
  366. listLevel = len(lookMatch.group(0))
  367. wikifier.nextMatch += len(lookMatch.group(0))
  368. # Check for any changes in list type or indentation
  369. if listLevel > currLevel:
  370. # Indent further
  371. indents.append(listLevel)
  372. if currLevel == 0:
  373. target = stack[-1]
  374. else:
  375. target = stack[-1].children[-1]
  376. stack.append(HTML.Node(target, listType))
  377. elif listLevel < currLevel:
  378. # Indent less
  379. while indents[-1] > listLevel:
  380. stack.pop(-1)
  381. indents.pop(-1)
  382. elif listLevel == currLevel and listType != currType:
  383. # Same level, different kind of list
  384. stack.pop(-1)
  385. stack.append(HTML.Node(stack[-1].children[-1], listType))
  386. currLevel = listLevel
  387. currType = listType
  388. # Output the item
  389. output = HTML.Node(stack[-1], itemType)
  390. wikifier.subWikifyTerm(output, termRegExp)
  391. # Roll again
  392. lookMatch = lookaheadRegExp.search(wikifier.source, wikifier.nextMatch)
  393. GoogleCodeWikiFormat = [
  394. { "name": "tablerow",
  395. "match": r"^(?:\|\|.+\|\|)",
  396. "termRegExp": re.compile(r"(\n)", re.M),
  397. "sepRegExp": re.compile(r"(\|\|)", re.M),
  398. "handler": GoogleCode_Table
  399. },
  400. { "name": "heading",
  401. "match": r"^={1,6}",
  402. "termRegExp": re.compile(r"([=]+)", re.M),
  403. "handler": GoogleCode_Heading
  404. },
  405. { "name": "list",
  406. "match": r"^(?:[ ]+)(?:[\*#])",
  407. "lookaheadRegExp": re.compile(r"^(?:[ ]+)(?:(\*)|(#))",re.M),
  408. "termRegExp": re.compile(r"(\n)", re.M),
  409. "handler": GoogleCode_List
  410. },
  411. { "name": "blockquote",
  412. "match": r"^(?:[ ]+)",
  413. "termRegExp": re.compile(r"(\n)", re.M),
  414. "handler": GoogleCode_Blockquote,
  415. "tagName": "blockquote"
  416. },
  417. { "name": "codeword",
  418. "match": r"\`",
  419. "initRegExp": re.compile(r"(\`)", re.M),
  420. "termRegExp": re.compile(r"(\`)", re.M),
  421. "handler": GoogleCode_Codeblock,
  422. "tagName": "code"
  423. },
  424. { "name": "codeblock",
  425. "match": r"\{\{\{",
  426. "initRegExp": re.compile(r"(\{\{\{)", re.M),
  427. "termRegExp": re.compile(r"(\}\}\})", re.M),
  428. "handler": GoogleCode_Codeblock,
  429. "tagName": "pre",
  430. "attribs": { "class": "codeblock" }
  431. },
  432. { "name": "bold",
  433. "match": r"[\*]",
  434. "termRegExp": re.compile(r"([\*])", re.M),
  435. "handler": GoogleCode_SimpleElement,
  436. "tagName": "b"
  437. },
  438. { "name": "italic",
  439. "match": r"(?:[^\w\b]|^)[\_]",
  440. "termRegExp": re.compile(r"([\_])[^\w\b]", re.M),
  441. "handler": GoogleCode_SimpleElement,
  442. "tagName": "i"
  443. },
  444. { "name": "strike",
  445. "match": r"\~\~",
  446. "termRegExp": re.compile(r"(\~\~)", re.M),
  447. "handler": GoogleCode_SimpleElement,
  448. "tagName": "strike"
  449. },
  450. { "name": "superscript",
  451. "match": r"\^",
  452. "termRegExp": re.compile(r"(\^)", re.M),
  453. "handler": GoogleCode_SimpleElement,
  454. "tagName": "sup"
  455. },
  456. { "name": "subscript",
  457. "match": r",,",
  458. "termRegExp": re.compile(r"(,,)", re.M),
  459. "handler": GoogleCode_SimpleElement,
  460. "tagName": "sub"
  461. },
  462. { "name": "prettyLink",
  463. "match": r"\[(?:(?:[A-Za-z][A-Za-z0-9\_\-]+)|(?:(?:file|http|https|mailto|ftp|irc|news|data):[^\s'\"]+(?:/|\b)))(?: .*?)?\]",
  464. "lookaheadRegExp": re.compile(r'\[(.*?)(?: (.*?))?\]', re.M),
  465. "handler": GoogleCode_PrettyLink
  466. },
  467. { "name": "wikiword",
  468. "match": r"(?:\!?(?:[A-Z]+[a-z]+[A-Z][A-Za-z]*)|(?:[A-Z]{2,}[a-z]+))",
  469. "handler": GoogleCode_WikiWord
  470. },
  471. { "name": "urlLink",
  472. "match": URLSTR,
  473. "handler": GoogleCode_UrlLink
  474. },
  475. { "name": "linebreak",
  476. "match": r"\n\n",
  477. "handler": GoogleCode_LineBreak,
  478. "empty": True
  479. },
  480. { "name": "wikicomment",
  481. "match": r"<wiki:comment>",
  482. "initRegExp": re.compile(r"(<wiki:comment>)", re.M),
  483. "termRegExp": re.compile(r"(</wiki:comment>)", re.M),
  484. "handler": GoogleCode_WikiComment,
  485. "tagName": "div",
  486. "attribs": { "class": "comment" }
  487. },
  488. { "name": "wikitoc",
  489. "match": r"<wiki:toc />",
  490. "handler": GoogleCode_WikiToc
  491. },
  492. ]
  493. class Wikifier:
  494. def __init__(self, formatters, autolink=False, srcdir=os.getcwd(),
  495. multibreak=False, tabwidth=8, suffix=".html",
  496. hiLang="Python"):
  497. # Create the master regex
  498. forms = [ "(%s)" % r['match'] for r in formatters ]
  499. self.formatterRegExp = re.compile("|".join(forms), re.M)
  500. # Save the individual format handlers
  501. self.formatters = formatters
  502. self.autolink = autolink
  503. self.srcdir = srcdir
  504. self.multibreak = multibreak and True or False
  505. self.tabwidth = tabwidth
  506. self.suffix = suffix
  507. self.defaultHiLang = hiLang
  508. def _clean(self, text):
  509. text = text.replace("\r\n", "\n")
  510. # Out, out, damned tabs
  511. text = text.replace("\t", " " * self.tabwidth)
  512. if not self.multibreak:
  513. # Remove redundant line breaks
  514. tlen = len(text) + 1
  515. while tlen > len(text):
  516. tlen = len(text)
  517. text = text.replace("\n\n\n", "\n\n")
  518. while text.startswith("#"):
  519. # Process any wiki-headers
  520. line, text = text.split("\n", 1)
  521. self._header(line)
  522. return text
  523. def _header(self, line):
  524. tagname, content = line.split(" ", 1)
  525. if tagname == "#summary":
  526. self.summary = content
  527. elif tagname == "#labels":
  528. self.labels = tuple(content.split(","))
  529. def wikify(self, source, labels=None, summary=None):
  530. self.labels = labels
  531. self.summary = summary
  532. # Clean up the content
  533. self.source = self._clean(source)
  534. self.nextMatch = 0
  535. # Do it
  536. self.output = HTML.div(None)
  537. self.subWikifyUnterm()
  538. return "".join([str(c) for c in self.output.children])
  539. def findMatch(self, source, start):
  540. return self.formatterRegExp.search(source, start)
  541. def subWikifyUnterm(self, output=None):
  542. oldOutput = self.output
  543. if output is not None:
  544. self.output = output
  545. match = self.findMatch(self.source, self.nextMatch)
  546. while match:
  547. # Output any text before the match
  548. if match.start() > self.nextMatch:
  549. self.outputText(self.output, self.nextMatch, match.start())
  550. # Set the match parameters for the handler
  551. self.matchStart = match.start()
  552. self.matchLength = len(match.group(0))
  553. self.matchText = match.group(0)
  554. self.nextMatch = match.end()
  555. # Figure out which sub-group matched (zero-indexed)
  556. t, submatch = [(t,s) for t, s in enumerate(match.groups()) if s][0]
  557. # Handle it
  558. self.formatters[t]['handler'](self, **self.formatters[t])
  559. # Go back for more matches
  560. match = self.findMatch(self.source, self.nextMatch)
  561. if self.nextMatch < len(self.source):
  562. self.outputText(self.output, self.nextMatch, len(self.source))
  563. self.nextMatch = len(self.source)
  564. # Restore the destination node
  565. self.output = oldOutput
  566. def subWikifyTerm(self, output, termRegExp):
  567. oldOutput = self.output
  568. if output is not None:
  569. self.output = output
  570. # Get the first matches for the formatter and terminator RegExps
  571. termMatch = termRegExp.search(self.source, self.nextMatch)
  572. if termMatch:
  573. match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
  574. else:
  575. match = self.findMatch(self.source, self.nextMatch)
  576. while termMatch or match:
  577. # If the terminator comes before the next formatter match, we're done
  578. if termMatch and (not match or termMatch.start() <= match.start()):
  579. if termMatch.start() > self.nextMatch:
  580. self.outputText(self.output,self.nextMatch,termMatch.start())
  581. self.matchText = termMatch.group(1)
  582. self.matchLength = len(self.matchText)
  583. self.matchStart = termMatch.start()
  584. self.nextMatch = self.matchStart + self.matchLength
  585. self.output = oldOutput
  586. return
  587. # Output any text before the match
  588. if match.start() > self.nextMatch:
  589. self.outputText(self.output, self.nextMatch, match.start())
  590. # Set the match parameters for the handler
  591. self.matchStart = match.start()
  592. self.matchLength = len(match.group(0))
  593. self.matchText = match.group(0)
  594. self.nextMatch = match.end()
  595. # Figure out which sub-group matched (zero-indexed)
  596. t,submatch = [ (t,s) for t, s in enumerate(match.groups()) if s ][0]
  597. # Handle it
  598. self.formatters[t]['handler'](self, **self.formatters[t])
  599. termMatch = termRegExp.search(self.source, self.nextMatch)
  600. if termMatch:
  601. match = self.findMatch(self.source[:termMatch.start()], self.nextMatch)
  602. else:
  603. match = self.findMatch(self.source, self.nextMatch)
  604. if self.nextMatch < len(self.source):
  605. self.outputText(self.output, self.nextMatch,len(self.source))
  606. self.nextMatch = len(self.source)
  607. self.output = oldOutput
  608. def outputText(self, output, startPos, endPos):
  609. HTML.Text(output, self.source[startPos:endPos])
  610. DEFAULT_TEMPLATE = '''
  611. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
  612. <html>
  613. <head>
  614. <link type="text/css" rel="stylesheet" href="http://cptnahab.dataunity.com/cspace/images/cspace.css" />
  615. <link type="text/css" rel="stylesheet" href="http://cptnahab.dataunity.com/cspace/Styles/SyntaxHighlighter.css"></link>
  616. <script language="javascript" type="text/javascript" src="http://cptnahab.dataunity.com/cspace/Scripts/shCore.js"></script>
  617. <script language="javascript" type="text/javascript" src="http://cptnahab.dataunity.com/cspace/Scripts/shBrushPython.js"></script>
  618. </head>
  619. <body>
  620. <div id="page">
  621. <div id='header'>
  622. <img id='explogo' src='http://cptnahab.dataunity.com/cspace/images/experimental.png'/>
  623. <img id='logo' src='http://cptnahab.dataunity.com/cspace/images/cspacelogo.png'/>
  624. <br style="clear: both" /><br/>
  625. <img src="http://cptnahab.dataunity.com/cspace/images/headerline.png" height="4" />
  626. </div>
  627. <div id="pagecontent">
  628. <div class="index">
  629. <!-- This is a (PRE) block. Make sure it's left aligned or your toc title will be off. -->
  630. %(toc)s
  631. </div>
  632. <h1>%(title)s</h1>
  633. <div class="summary">
  634. %(summary)s
  635. </div>
  636. <div class="narrow">
  637. %(wiki)s
  638. </div>
  639. </div>
  640. </div>
  641. <script language="javascript">
  642. dp.SyntaxHighlighter.ClipboardSwf = 'http://cptnahab.dataunity.com/cspace/Scripts/clipboard.swf';
  643. dp.SyntaxHighlighter.HighlightAll('code');
  644. </script>
  645. </body>
  646. </html>
  647. '''
  648. def wikify(pages, options=None):
  649. # See options definition below.
  650. # Pass any object with those (potential) attributes
  651. srcdir = getattr(options, 'srcdir', os.getcwd())
  652. destdir = getattr(options, 'destdir', None)
  653. # Find all requested files
  654. onlyStale = False
  655. if getattr(options, 'all', False):
  656. pages = [ k for k in os.listdir(srcdir)
  657. if k.endswith(".wiki") ]
  658. onlyStale = True
  659. if destdir is None:
  660. destdir = os.getcwd()
  661. # Create the magic 8-ball
  662. w = Wikifier(GoogleCodeWikiFormat,
  663. autolink=getattr(options, 'autolink', False),
  664. tabwidth=getattr(options, 'tabwidth', 8),
  665. multibreak=getattr(options, 'multibreak', False),
  666. srcdir=srcdir,
  667. suffix=".html")
  668. rets = []
  669. for wikiname in pages:
  670. # Clean up the page name
  671. if wikiname.endswith(".wiki"):
  672. wikiname = wikiname[:-5]
  673. wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
  674. if onlyStale:
  675. # See if the output is fresh, and if so, skip it
  676. wikidestname = os.path.join(destdir, "%s.html" % wikiname)
  677. try:
  678. sstat = os.stat(wikifilename)
  679. except:
  680. continue
  681. try:
  682. dstat = os.stat(wikidestname)
  683. except:
  684. pass
  685. else:
  686. if dstat.st_mtime > sstat.st_mtime:
  687. continue
  688. # Load the wiki content
  689. wikifilename = os.path.join(srcdir, "%s.wiki" % wikiname)
  690. try:
  691. wikisrc = file(wikifilename).read()
  692. except IOError:
  693. wikisrc = wikifilename + ' file not found!'
  694. print '**** ' + wikisrc
  695. # Ask a question
  696. wikified = w.wikify(wikisrc)
  697. reFind = re.compile(r'<h(\d)>\s*([^\<]*[\S])\s*</h\d>')
  698. strRepl = r'<h\g<1>><a name="\g<2>">\g<2></a></h\g<1>>'
  699. # Number the sections
  700. if getattr(options, 'number', True):
  701. sectstack = []
  702. matches = []
  703. curLevel = 0
  704. match = reFind.search(wikified)
  705. while match is not None:
  706. level = int(match.group(1))
  707. while level > len(sectstack):
  708. sectstack.append(1)
  709. while len(sectstack) > level:
  710. sectstack.pop(-1)
  711. if curLevel >= level:
  712. sectstack[-1] += 1
  713. curLevel = len(sectstack)
  714. sectnum = ".".join([str(n) for n in sectstack]) + "."
  715. matches.append((sectnum, match))
  716. match = reFind.search(wikified, match.end())
  717. matches.reverse()
  718. for sectnum, match in matches:
  719. wikified = wikified[:match.start()+4] + sectnum + " " + wikified[match.start()+4:]
  720. # Generate the TOC
  721. if getattr(options, 'toc', True):
  722. matches = []
  723. for match in reFind.findall(wikified):
  724. if not matches:
  725. matches.append('<b>%s: Contents</b>' % wikiname)
  726. if int(match[0]) > getattr(options, 'levels', 3): continue
  727. indent = "&nbsp;" * ((int(match[0])) * 2)
  728. href = "#" + match[1]
  729. anchor = '%s<a href="%s">%s</a>' % (indent, href, match[1])
  730. matches.append(anchor)
  731. toc = "<br>".join(matches)
  732. else:
  733. toc = ""
  734. # Generate the body links
  735. if getattr(options, 'links', True):
  736. wikified = reFind.sub(strRepl, wikified)
  737. # Find a summary
  738. summary = ""
  739. if w.summary is not None:
  740. summary = w.summary
  741. if not getattr(options, 'raw', False):
  742. # Fill the template
  743. wikified = options.template % {
  744. "toc": toc,
  745. "title": spacedwikiword(wikiname),
  746. "wiki": wikified,
  747. "summary": summary }
  748. # Save it or write it
  749. if destdir is not None:
  750. outputname = os.path.join(destdir, "%s.html" % wikiname)
  751. file(outputname,"w").write(wikified)
  752. mainpage = getattr(options, 'mainpage', 'MainPage')
  753. if wikiname == mainpage:
  754. rets.append((wikiname, outputname))
  755. outputname = os.path.join(destdir, "index.html")
  756. file(outputname,"w").write(wikified)
  757. wikified = outputname
  758. rets.append((wikiname, wikified))
  759. return rets
  760. if __name__ == "__main__":
  761. from optparse import OptionParser
  762. import sys
  763. parser = OptionParser()
  764. # Output format options
  765. parser.add_option("-t", "--template", dest="template",
  766. help="use TPLTFILE to wrap wiki output", metavar="TPLTFILE")
  767. parser.add_option("-n", "--number", dest="number", metavar="NUMSTART",
  768. help="number the headings in the body and table of contents starting with level NUMSTART")
  769. parser.add_option("-l", "--levels", dest="levels", type="int",
  770. help="create toc to depth LEVELS", metavar="LEVELS")
  771. parser.add_option("-c", "--skiptoc", dest="toc", action="store_false",
  772. help="leave toc out, even if template has slot")
  773. parser.add_option("-u", "--unlink", dest="links", action="store_false",
  774. help="don't create named anchors for toc links")
  775. parser.add_option("-a", "--autolink", dest="autolink", action="store_false",
  776. help="autolink wiki words that don't exist")
  777. parser.add_option("-w", "--tabwidth", dest="tabwidth", type="int",
  778. help="replace tabs by WIDTH spaces", metavar="WIDTH")
  779. parser.add_option("-m", "--multibreak", dest="multibreak", action="store_true",
  780. help="don't collapse multiple line breaks")
  781. parser.add_option("-r", "--raw", dest="raw", action="store_true",
  782. help="raw wiki translation -- no wrapping, no toc, no links")
  783. parser.add_option("-p", "--mainpage", dest="mainpage", metavar="PAGENAME",
  784. help="set main page to PAGENAME")
  785. parser.add_option("-P", "--gcproject", dest="gcproject",
  786. help="name of the Google Code project")
  787. # Batch / Location options
  788. parser.add_option("-s", "--srcdir", dest="srcdir",
  789. help="wiki format sources in SRCDIR", metavar="SRCDIR")
  790. parser.add_option("-d", "--destdir", dest="destdir",
  791. help="write html output into DESTDIR", metavar="DESTDIR")
  792. parser.add_option("-e", "--stale", dest="all", action="store_true",
  793. help="convert all wiki files that are stale or missing from DESTDIR")
  794. parser.set_default('toc', True)
  795. parser.set_default('links', True)
  796. parser.set_default('template', None)
  797. parser.set_default('number', False)
  798. parser.set_default('levels', 3)
  799. parser.set_default('tabwidth', 8)
  800. parser.set_default('multibreak', False)
  801. parser.set_default('mainpage', "MainPage") # Identity of index
  802. parser.set_default('srcdir', os.getcwd())
  803. parser.set_default('destdir', None)
  804. parser.set_default('all', False)
  805. # Parse the command line
  806. (options, args) = parser.parse_args()
  807. if options.template is None:
  808. options.template = DEFAULT_TEMPLATE
  809. elif os.path.exists(options.template):
  810. options.template = file(options.template).read()
  811. else:
  812. print "Template not found: %s" % options.template
  813. parser.print_usage()
  814. sys.exit()
  815. wiki_url = 'http://code.google.com/p/%s/wiki/' % options.gcproject
  816. not_offline_message = 'Please see %(page1)s <a href="%(url)s%(page2)s">online</a>.'
  817. not_offline_message_raw = 'Please see %(page1)s online: %(url)s%(page2)s.'
  818. # [:-5] to remove the .wiki extension
  819. for wikiname in [fname[:-5] for fname in os.listdir(options.srcdir)
  820. if os.path.isfile(os.path.join(options.srcdir, fname))]:
  821. if wikiname not in args:
  822. if not getattr(options, 'raw', False):
  823. # Fill the template
  824. content = options.template % {
  825. "toc": '',
  826. "title": spacedwikiword(wikiname),
  827. "wiki": not_offline_message % {'url':wiki_url,
  828. 'page1': wikiname,
  829. 'page2': wikiname},
  830. "summary": 'unavailable' }
  831. else:
  832. content = not_offline_message_raw % {'url':wiki_url,
  833. 'page1': wikiname,
  834. 'page2': wikiname}
  835. if options.destdir is not None:
  836. outputname = os.path.join(options.destdir, "%s.html" % wikiname)
  837. file(outputname,"w").write(content)
  838. for wikiname, htmldata in wikify(args, options):
  839. if options.destdir:
  840. print wikiname + ":",
  841. if htmldata is not None:
  842. print htmldata
  843. else:
  844. print "Complete."
  845. elif htmldata is not None:
  846. print htmldata