PageRenderTime 1560ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/graphingwiki/graphingwiki/plugin/parser/link_collect.py

https://bitbucket.org/clarifiednetworks/graphingwiki/
Python | 331 lines | 324 code | 3 blank | 4 comment | 9 complexity | 2222ddbb42b9d10cc7c9b1f64bcb4d59 MD5 | raw file
  1. # -*- coding: utf-8 -*-"
  2. """
  3. Link harvesting "parser"
  4. """
  5. from MoinMoin.wikiutil import resolve_interwiki, join_wiki
  6. from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
  7. from MoinMoin import macro, wikiutil
  8. from string import rsplit
  9. from graphingwiki.util import category_regex
  10. from wiki_form import Parser as listParser
  11. Dependencies = []
  12. class Parser(WikiParser):
  13. def __init__(self, raw, request, **kw):
  14. self.pagename = request.page.page_name
  15. self.definitions = {}
  16. self.curdef = ''
  17. self.prevdef = ''
  18. self.ddline = 0
  19. # Cannot use super as the Moin classes are old-style
  20. apply(WikiParser.__init__, (self, raw, request), kw)
  21. self.currentitems = []
  22. self.in_dd = 0
  23. self.cat_re=category_regex(request)
  24. def __add_textmeta(self, word, groups):
  25. val = ''
  26. if self.in_dd:
  27. for type, value in self.__nonempty_groups(groups):
  28. val += self.__add_meta(value, groups)
  29. return val
  30. def __add_meta(self, word, groups):
  31. if not word.strip():
  32. return ''
  33. if self.in_dd:
  34. ## FIXME? for now, only accept entries on the same line for
  35. ## meta value to minimise surprise
  36. if not self.ddline == self.lineno:
  37. return ''
  38. self.formatter.textstorage.append(word)
  39. return ''
  40. def _interwiki_repl(self, word, groups):
  41. """Handle InterWiki links."""
  42. wiki = groups.get('interwiki_wiki')
  43. page = groups.get('interwiki_page')
  44. wikipage = "%s:%s" % (wiki, page)
  45. self.__add_meta(wikipage, groups)
  46. _, iw_wiki, iw_page, _ = resolve_interwiki(self.request,
  47. wiki, page)
  48. # Accept and store all interwiki-style links at this time, so
  49. # that changing the interwiki list does not require rehashing
  50. # the wikis. This means you need to check the validity of iw
  51. # links at runtime.
  52. iw_url = join_wiki(iw_wiki, iw_page)
  53. self.currentitems.append(('interwiki', (wikipage, wikipage)))
  54. self.new_item = False
  55. return u''
  56. _interwiki_wiki_repl = _interwiki_repl
  57. _interwiki_page_repl = _interwiki_repl
  58. def _word_repl(self, word, groups):
  59. """Handle WikiNames."""
  60. bang_present = groups.get('word_bang')
  61. if bang_present:
  62. if self.cfg.bang_meta:
  63. return self.formatter.nowikiword("!%s" % word)
  64. else:
  65. self.formatter.text('!')
  66. name = groups.get('word_name')
  67. current_page = self.formatter.page.page_name
  68. abs_name = wikiutil.AbsPageName(current_page, name)
  69. if abs_name == current_page:
  70. self.currentitems.append(('wikilink', (abs_name, abs_name)))
  71. self.__add_meta(abs_name, groups)
  72. return u''
  73. else:
  74. # handle anchors
  75. try:
  76. abs_name, anchor = rsplit(abs_name, "#", 1)
  77. except ValueError:
  78. anchor = ""
  79. if self.cat_re.match(abs_name):
  80. self.currentitems.append(('category', (abs_name)))
  81. self.__add_meta(abs_name, groups)
  82. else:
  83. if not anchor:
  84. wholename = abs_name
  85. else:
  86. wholename = "%s#%s" % (abs_name, anchor)
  87. self.currentitems.append(('wikilink', (wholename, abs_name)))
  88. self.__add_meta(wholename, groups)
  89. return u''
  90. _word_bang_repl = _word_repl
  91. _word_name_repl = _word_repl
  92. _word_anchor_repl = _word_repl
  93. def _url_repl(self, word, groups):
  94. """Handle literal URLs."""
  95. scheme = groups.get('url_scheme', 'http')
  96. target = groups.get('url_target', '')
  97. self.__add_meta(target, groups)
  98. self.currentitems.append(('url', (target, target)))
  99. return u''
  100. _url_target_repl = _url_repl
  101. _url_scheme_repl = _url_repl
  102. def _macro_repl(self, word, groups):
  103. """Handle macros.
  104. All that really seems to be needed is to pass the raw markup. """
  105. macro_name = groups.get('macro_name')
  106. macro_args = groups.get('macro_args')
  107. macro = groups.get('macro')
  108. if macro_name == 'Include':
  109. # Add includes
  110. page_args = word.split(',')[0]
  111. self.currentitems.append(('include', (page_args, word)))
  112. return self.__add_meta(macro, {})
  113. _macro_name_repl = _macro_repl
  114. _macro_args_repl = _macro_repl
  115. def __nonempty_groups(self, groups):
  116. # Only handle x_off and x_off, not x (groups will have both x
  117. # and x_[on|off] with identical content)
  118. return [(x, y) for x, y in groups.iteritems() if y and
  119. x not in ['strike', 'small', 'big', 'remark']]
  120. def _fix_attach_uri(self, target):
  121. split = target.split(":", 1)
  122. if len(split) != 2:
  123. return target
  124. scheme, att = split
  125. if scheme in ('attachment', 'inline', 'drawing'):
  126. if len(att.split('/')) == 1:
  127. target = "%s:%s/%s" % (scheme, self.pagename, att)
  128. return target
  129. def _link_repl(self, word, groups):
  130. raw = groups.get('link', '')
  131. target = groups.get('link_target', '')
  132. desc = groups.get('link_desc', '')
  133. self.__add_meta(raw, groups)
  134. target = self._fix_attach_uri(target)
  135. # Add extended links, where applicable
  136. if desc and ': ' in desc and not self.in_dd:
  137. key = desc.split(': ')[0]
  138. self.definitions.setdefault(key, list()).append(('wikilink',
  139. (raw, target)))
  140. else:
  141. self.currentitems.append(('wikilink', (raw, target)))
  142. return u''
  143. _link_target_repl = _link_repl
  144. _link_desc_repl = _link_repl
  145. _link_params_repl = _link_repl
  146. def _transclude_repl(self, word, groups):
  147. raw = groups.get('transclude', '')
  148. target = groups.get('transclude_target', '')
  149. self.__add_meta(raw, groups)
  150. target = self._fix_attach_uri(target)
  151. self.currentitems.append(('wikilink', (raw, target)))
  152. return u''
  153. _transclude_target_repl = _transclude_repl
  154. _transclude_desc_repl = _transclude_repl
  155. _transclude_params_repl = _transclude_repl
  156. def _email_repl(self, word, groups):
  157. self.__add_meta(word, groups)
  158. self.currentitems.append(('wikilink', (word, 'mailto:%s' % word)))
  159. return u''
  160. _big_repl = __add_textmeta
  161. _big_on_repl = __add_textmeta
  162. _big_off_repl = __add_textmeta
  163. _emph_ibb_repl = __add_textmeta
  164. _emph_ibi_repl = __add_textmeta
  165. _emph_ib_or_bi_repl = __add_textmeta
  166. _emph_repl = __add_textmeta
  167. _small_repl = __add_textmeta
  168. _small_on_repl = __add_textmeta
  169. _small_off_repl = __add_textmeta
  170. _smiley_repl = __add_textmeta
  171. _strike_repl = __add_textmeta
  172. _strike_on_repl = __add_textmeta
  173. _strike_off_repl = __add_textmeta
  174. _entity_repl = __add_meta
  175. _remark_repl = __add_meta
  176. _remark_on_repl = __add_meta
  177. _remark_off_repl = __add_meta
  178. _sgml_entity_repl = __add_meta
  179. _sub_repl = __add_meta
  180. _sub_text_repl = __add_meta
  181. _sup_repl = __add_meta
  182. _sup_text_repl = __add_meta
  183. _tt_bt_repl = __add_meta
  184. _tt_bt_text_repl = __add_meta
  185. _tt_repl = __add_meta
  186. _tt_text_repl = __add_meta
  187. _u_repl = __add_meta
  188. def _dl_repl(self, match, groups):
  189. """Handle definition lists."""
  190. if self.in_pre:
  191. return u''
  192. # Flush pre-dd links and previous dd:s not undented
  193. if self.currentitems and not self.curdef:
  194. self.definitions.setdefault('_notype',
  195. list()).extend(self.currentitems)
  196. elif self.currentitems:
  197. self.definitions.setdefault(self.curdef,
  198. list()).extend(self.currentitems)
  199. self.currentitems=[]
  200. self.ddline = self.lineno
  201. result = []
  202. self._close_item(result)
  203. self.in_dd = 1
  204. self.formatter.textstorage = list()
  205. definition = match[1:-3].strip(' ')
  206. if definition != "":
  207. self.curdef=definition
  208. else:
  209. self.curdef=self.prevdef
  210. return u''
  211. def _undent(self):
  212. if self.in_dd:
  213. curkey = self.definitions.setdefault(self.curdef, list())
  214. # Only account for non-empty text
  215. if ''.join(self.formatter.textstorage).strip():
  216. # Add the metas, prepare to populate next key
  217. curkey.append(('meta', ''.join(self.formatter.textstorage)))
  218. self.formatter.textstorage = list()
  219. if self.currentitems:
  220. curkey.extend(self.currentitems)
  221. else:
  222. self.definitions.setdefault('_notype',
  223. list()).extend(self.currentitems)
  224. # self.ddline is not reset here, as the last
  225. # items on line may be added after the undent
  226. self.in_dd = 0
  227. self.prevdef = self.curdef
  228. self.curdef = '_notype'
  229. self.currentitems = []
  230. return u''
  231. def _close_item(self, result):
  232. if self.in_dd:
  233. self._undent()
  234. def _parser_repl(self, word, groups):
  235. parser_name = groups.get('parser_name', None)
  236. self.in_pre = 'search_parser'
  237. # If there's a parser on the begin of the parser line, stop
  238. # searching for parsers. If wiki parser, process metas.
  239. if parser_name:
  240. if parser_name == 'wiki':
  241. self.in_pre = False
  242. elif parser_name.strip():
  243. self.in_pre = True
  244. return self.__add_meta(word, groups)
  245. # Catch the wiki parser within the parsed content
  246. def _parser_content(self, line):
  247. if self.in_pre == 'search_parser' and line.strip():
  248. line = line.strip()
  249. if line.startswith("#!"):
  250. parser_name = line[2:].split()
  251. if parser_name and parser_name[0] == 'wiki':
  252. self.in_pre = False
  253. return ''
  254. # If the first line with content is not a parser spec -> no parser
  255. self.in_pre = True
  256. return ''
  257. _parser_unique_repl = _parser_repl
  258. _parser_line_repl = _parser_repl
  259. _parser_name_repl = _parser_repl
  260. _parser_args_repl = _parser_repl
  261. _parser_nothing_repl = _parser_repl
  262. def _parser_end_repl(self, word, groups):
  263. self.in_pre = False
  264. return self.__add_meta(word, groups)