PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/gluon/contrib/markmin/markmin2latex.py

https://code.google.com/p/web2py/
Python | 291 lines | 286 code | 2 blank | 3 comment | 1 complexity | a872567d1daded8dd23ea3497c1ff001 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, MIT, BSD-3-Clause, Apache-2.0
  1. #!/usr/bin/env python
  2. # created my Massimo Di Pierro
  3. # license MIT/BSD/GPL
  4. import re
  5. import cgi
  6. import sys
  7. import doctest
  8. from optparse import OptionParser
  9. __all__ = ['render','markmin2latex']
  10. META = 'META'
  11. regex_newlines = re.compile('(\n\r)|(\r\n)')
  12. regex_dd=re.compile('\$\$(?P<latex>.*?)\$\$')
  13. regex_code = re.compile('('+META+')|(``(?P<t>.*?)``(:(?P<c>\w+))?)',re.S)
  14. regex_title = re.compile('^#{1} (?P<t>[^\n]+)',re.M)
  15. regex_maps = [
  16. (re.compile('[ \t\r]+\n'),'\n'),
  17. (re.compile('\*\*(?P<t>[^\s\*]+( +[^\s\*]+)*)\*\*'),'{\\\\bf \g<t>}'),
  18. (re.compile("''(?P<t>[^\s']+( +[^\s']+)*)''"),'{\\it \g<t>}'),
  19. (re.compile('^#{5,6}\s*(?P<t>[^\n]+)',re.M),'\n\n{\\\\bf \g<t>}\n'),
  20. (re.compile('^#{4}\s*(?P<t>[^\n]+)',re.M),'\n\n\\\\goodbreak\\subsubsection{\g<t>}\n'),
  21. (re.compile('^#{3}\s*(?P<t>[^\n]+)',re.M),'\n\n\\\\goodbreak\\subsection{\g<t>}\n'),
  22. (re.compile('^#{2}\s*(?P<t>[^\n]+)',re.M),'\n\n\\\\goodbreak\\section{\g<t>}\n'),
  23. (re.compile('^#{1}\s*(?P<t>[^\n]+)',re.M),''),
  24. (re.compile('^\- +(?P<t>.*)',re.M),'\\\\begin{itemize}\n\\item \g<t>\n\\end{itemize}'),
  25. (re.compile('^\+ +(?P<t>.*)',re.M),'\\\\begin{itemize}\n\\item \g<t>\n\\end{itemize}'),
  26. (re.compile('\\\\end\{itemize\}\s+\\\\begin\{itemize\}'),'\n'),
  27. (re.compile('\n\s+\n'),'\n\n')]
  28. regex_table = re.compile('^\-{4,}\n(?P<t>.*?)\n\-{4,}(:(?P<c>\w+))?\n',re.M|re.S)
  29. regex_anchor = re.compile('\[\[(?P<t>\S+)\]\]')
  30. regex_bibitem = re.compile('\-\s*\[\[(?P<t>\S+)\]\]')
  31. regex_image_width = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center) +(?P<w>\d+px)\]\]')
  32. regex_image = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center)\]\]')
  33. #regex_video = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +video\]\]')
  34. #regex_audio = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +audio\]\]')
  35. regex_link = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+)\]\]')
  36. regex_auto = re.compile('(?<!["\w])(?P<k>\w+://[\w\.\-\?&%\:]+)',re.M)
  37. regex_commas = re.compile('[ ]+(?P<t>[,;\.])')
  38. regex_noindent = re.compile('\n\n(?P<t>[a-z])')
  39. #regex_quote_left = re.compile('"(?=\w)')
  40. #regex_quote_right = re.compile('(?=\w\.)"')
  41. def latex_escape(text,pound=True):
  42. text=text.replace('\\','{\\textbackslash}')
  43. for c in '^_&$%{}': text=text.replace(c,'\\'+c)
  44. text=text.replace('\\{\\textbackslash\\}','{\\textbackslash}')
  45. if pound: text=text.replace('#','\\#')
  46. return text
  47. def render(text,
  48. extra={},
  49. allowed={},
  50. sep='p',
  51. image_mapper=lambda x:x,
  52. chapters=False):
  53. #############################################################
  54. # replace all blocks marked with ``...``:class with META
  55. # store them into segments they will be treated as code
  56. #############################################################
  57. text = str(text or '')
  58. segments, i = [], 0
  59. text = regex_dd.sub('``\g<latex>``:latex ',text)
  60. text = regex_newlines.sub('\n',text)
  61. while True:
  62. item = regex_code.search(text,i)
  63. if not item: break
  64. if item.group()==META:
  65. segments.append((None,None))
  66. text = text[:item.start()]+META+text[item.end():]
  67. else:
  68. c = item.group('c') or ''
  69. if 'code' in allowed and not c in allowed['code']: c = ''
  70. code = item.group('t').replace('!`!','`')
  71. segments.append((code,c))
  72. text = text[:item.start()]+META+text[item.end():]
  73. i=item.start()+3
  74. #############################################################
  75. # do h1,h2,h3,h4,h5,h6,b,i,ol,ul and normalize spaces
  76. #############################################################
  77. title = regex_title.search(text)
  78. if not title: title='Title'
  79. else: title=title.group('t')
  80. text = latex_escape(text,pound=False)
  81. texts = text.split('## References',1)
  82. text = regex_anchor.sub('\\label{\g<t>}', texts[0])
  83. if len(texts)==2:
  84. text += '\n\\begin{thebibliography}{999}\n'
  85. text += regex_bibitem.sub('\n\\\\bibitem{\g<t>}', texts[1])
  86. text += '\n\\end{thebibliography}\n'
  87. text = '\n'.join(t.strip() for t in text.split('\n'))
  88. for regex, sub in regex_maps:
  89. text = regex.sub(sub,text)
  90. text=text.replace('#','\\#')
  91. text=text.replace('`',"'")
  92. #############################################################
  93. # process tables and blockquotes
  94. #############################################################
  95. while True:
  96. item = regex_table.search(text)
  97. if not item: break
  98. c = item.group('c') or ''
  99. if 'table' in allowed and not c in allowed['table']: c = ''
  100. content = item.group('t')
  101. if ' | ' in content:
  102. rows = content.replace('\n','\\\\\n').replace(' | ',' & ')
  103. row0,row2 = rows.split('\\\\\n',1)
  104. cols=row0.count(' & ')+1
  105. cal='{'+''.join('l' for j in range(cols))+'}'
  106. tabular = '\\begin{center}\n{\\begin{tabular}'+cal+'\\hline\n' + row0+'\\\\ \\hline\n'+row2 + ' \\\\ \\hline\n\\end{tabular}}\n\\end{center}'
  107. if row2.count('\n')>20: tabular='\\newpage\n'+tabular
  108. text = text[:item.start()] + tabular + text[item.end():]
  109. else:
  110. text = text[:item.start()] + '\\begin{quote}' + content + '\\end{quote}' + text[item.end():]
  111. #############################################################
  112. # deal with images, videos, audios and links
  113. #############################################################
  114. def sub(x):
  115. f=image_mapper(x.group('k'))
  116. if not f: return None
  117. return '\n\\begin{center}\\includegraphics[width=8cm]{%s}\\end{center}\n' % (f)
  118. text = regex_image_width.sub(sub,text)
  119. text = regex_image.sub(sub,text)
  120. text = regex_link.sub('{\\\\footnotesize\\href{\g<k>}{\g<t>}}', text)
  121. text = regex_commas.sub('\g<t>',text)
  122. text = regex_noindent.sub('\n\\\\noindent \g<t>',text)
  123. ### fix paths in images
  124. regex=re.compile('\\\\_\w*\.(eps|png|jpg|gif)')
  125. while True:
  126. match=regex.search(text)
  127. if not match: break
  128. text=text[:match.start()]+text[match.start()+1:]
  129. #text = regex_quote_left.sub('``',text)
  130. #text = regex_quote_right.sub("''",text)
  131. if chapters:
  132. text=text.replace(r'\section*{',r'\chapter*{')
  133. text=text.replace(r'\section{',r'\chapter{')
  134. text=text.replace(r'subsection{',r'section{')
  135. #############################################################
  136. # process all code text
  137. #############################################################
  138. parts = text.split(META)
  139. text = parts[0]
  140. authors = []
  141. for i,(code,b) in enumerate(segments):
  142. if code==None:
  143. html = META
  144. else:
  145. if b=='hidden':
  146. html=''
  147. elif b=='author':
  148. author = latex_escape(code.strip())
  149. authors.append(author)
  150. html=''
  151. elif b=='inxx':
  152. html='\inxx{%s}' % latex_escape(code)
  153. elif b=='cite':
  154. html='~\cite{%s}' % latex_escape(code.strip())
  155. elif b=='ref':
  156. html='~\ref{%s}' % latex_escape(code.strip())
  157. elif b=='latex':
  158. if '\n' in code:
  159. html='\n\\begin{equation}\n%s\n\\end{equation}\n' % code.strip()
  160. else:
  161. html='$%s$' % code.strip()
  162. elif b=='latex_eqnarray':
  163. code=code.strip()
  164. code='\\\\'.join(x.replace('=','&=&',1) for x in code.split('\\\\'))
  165. html='\n\\begin{eqnarray}\n%s\n\\end{eqnarray}\n' % code
  166. elif b.startswith('latex_'):
  167. key=b[6:]
  168. html='\\begin{%s}%s\\end{%s}' % (key,code,key)
  169. elif b in extra:
  170. if code[:1]=='\n': code=code[1:]
  171. if code[-1:]=='\n': code=code[:-1]
  172. html = extra[b](code)
  173. elif code[:1]=='\n' or code[:-1]=='\n':
  174. if code[:1]=='\n': code=code[1:]
  175. if code[-1:]=='\n': code=code[:-1]
  176. if code.startswith('<') or code.startswith('{{') or code.startswith('http'):
  177. html = '\\begin{lstlisting}[keywords={}]\n%s\n\\end{lstlisting}' % code
  178. else:
  179. html = '\\begin{lstlisting}\n%s\n\\end{lstlisting}' % code
  180. else:
  181. if code[:1]=='\n': code=code[1:]
  182. if code[-1:]=='\n': code=code[:-1]
  183. html = '{\\ft %s}' % latex_escape(code)
  184. try:
  185. text = text+html+parts[i+1]
  186. except:
  187. text = text + '... WIKI PROCESSING ERROR ...'
  188. break
  189. text = text.replace(' ~\\cite','~\\cite')
  190. return text, title, authors
  191. WRAPPER = """
  192. \\documentclass[12pt]{article}
  193. \\usepackage{hyperref}
  194. \\usepackage{listings}
  195. \\usepackage{upquote}
  196. \\usepackage{color}
  197. \\usepackage{graphicx}
  198. \\usepackage{grffile}
  199. \\usepackage[utf8x]{inputenc}
  200. \\definecolor{lg}{rgb}{0.9,0.9,0.9}
  201. \\definecolor{dg}{rgb}{0.3,0.3,0.3}
  202. \\def\\ft{\\small\\tt}
  203. \\lstset{
  204. basicstyle=\\footnotesize,
  205. breaklines=true, basicstyle=\\ttfamily\\color{black}\\footnotesize,
  206. keywordstyle=\\bf\\ttfamily,
  207. commentstyle=\\it\\ttfamily,
  208. stringstyle=\\color{dg}\\it\\ttfamily,
  209. numbers=left, numberstyle=\\color{dg}\\tiny, stepnumber=1, numbersep=5pt,
  210. backgroundcolor=\\color{lg}, tabsize=4, showspaces=false,
  211. showstringspaces=false
  212. }
  213. \\title{%(title)s}
  214. \\author{%(author)s}
  215. \\begin{document}
  216. \\maketitle
  217. \\tableofcontents
  218. \\newpage
  219. %(body)s
  220. \\end{document}
  221. """
  222. def markmin2latex(data, image_mapper=lambda x:x, extra={},
  223. wrapper=WRAPPER):
  224. body, title, authors = render(data, extra=extra, image_mapper=image_mapper)
  225. author = '\n\\and\n'.join(a.replace('\n','\\\\\n\\footnotesize ') for a in authors)
  226. return wrapper % dict(title=title, author=author, body=body)
  227. if __name__ == '__main__':
  228. parser = OptionParser()
  229. parser.add_option("-i", "--info", dest="info",
  230. help="markmin help")
  231. parser.add_option("-t", "--test", dest="test", action="store_true",
  232. default=False)
  233. parser.add_option("-n", "--no_wrapper", dest="no_wrapper",
  234. action="store_true",default=False)
  235. parser.add_option("-c", "--chapters", dest="chapters",action="store_true",
  236. default=False,help="switch section for chapter")
  237. parser.add_option("-w", "--wrapper", dest="wrapper", default=False,
  238. help="latex file containing header and footer")
  239. (options, args) = parser.parse_args()
  240. if options.info:
  241. import markmin2html
  242. markmin2latex(markmin2html.__doc__)
  243. elif options.test:
  244. doctest.testmod()
  245. else:
  246. if options.wrapper:
  247. fwrapper = open(options.wrapper,'rb')
  248. try:
  249. wrapper = fwrapper.read()
  250. finally:
  251. fwrapper.close()
  252. elif options.no_wrapper:
  253. wrapper = '%(body)s'
  254. else:
  255. wrapper = WRAPPER
  256. for f in args:
  257. fargs = open(f,'r')
  258. content_data = []
  259. try:
  260. content_data.append(fargs.read())
  261. finally:
  262. fargs.close()
  263. content = '\n'.join(content_data)
  264. output= markmin2latex(content,
  265. wrapper=wrapper,
  266. chapters=options.chapters)
  267. print output