/py/lib/pywikipedia/titletranslate.py

https://github.com/malectro/Project-OPEN · Python · 162 lines · 131 code · 7 blank · 24 comment · 44 complexity · 50111c94884f8e4858c3f58c573c1275 MD5 · raw file

  1. # -*- coding: utf-8 -*-
  2. #
  3. # (C) Rob W.W. Hooft, 2003
  4. # (C) Yuri Astrakhan, 2005
  5. # (C) Pywikipedia bot team, 2003-2010
  6. #
  7. # Distributed under the terms of the MIT license.
  8. #
  9. __version__ = '$Id: titletranslate.py 9042 2011-03-13 10:14:47Z xqt $'
  10. #
  11. import re
  12. import wikipedia as pywikibot
  13. import date
  14. def _join_to_(result, join):
  15. for x in join:
  16. if x not in result:
  17. result.append(x)
  18. def translate(page, hints = None, auto = True, removebrackets = False, site = None, family = None):
  19. """
  20. Please comment your source code! --Daniel
  21. Does some magic stuff. Returns a list of pages.
  22. Goes through all entries in 'hints'. Returns a list of pages.
  23. Entries for single page titles list those pages. Page titles for entries
  24. such as "all:" or "xyz:" or "20:" are first built from the page title of
  25. 'page' and then listed. When 'removebrackets' is True, a trailing pair of
  26. brackets and the text between them is removed from the page title.
  27. If 'auto' is true, known year and date page titles are autotranslated
  28. to all known target languages and inserted into the list.
  29. """
  30. result = []
  31. if site is None and page:
  32. site = page.site()
  33. if family is None and site:
  34. family = site.family
  35. if site:
  36. sitelang = site.language()
  37. if hints:
  38. for h in hints:
  39. if ':' not in h:
  40. # argument given as -hint:xy where xy is a language code
  41. codes = h
  42. newname = ''
  43. else:
  44. codes, newname = h.split(':', 1)
  45. if newname == '':
  46. # if given as -hint:xy or -hint:xy:, assume that there should
  47. # be a page in language xy with the same title as the page
  48. # we're currently working on ...
  49. if page is None:
  50. continue
  51. ns = page.namespace()
  52. if ns:
  53. newname = u'%s:%s' % (family.namespace('_default', ns),
  54. page.titleWithoutNamespace())
  55. else:
  56. # article in the main namespace
  57. newname = page.title()
  58. # ... unless we do want brackets
  59. if removebrackets:
  60. newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
  61. codesplit = codes.split(',')
  62. codes = []
  63. for code in codesplit:
  64. try:
  65. number = int(code)
  66. _join_to_(codes, family.languages_by_size[:number] )
  67. except ValueError:
  68. if code == 'all':
  69. _join_to_(codes, family.languages_by_size )
  70. elif code in family.language_groups:
  71. _join_to_(codes, family.language_groups[code] )
  72. elif code:
  73. _join_to_(codes, [ code ] )
  74. for newcode in codes:
  75. x = None
  76. if newcode in family.langs.keys():
  77. if page is None or \
  78. (newcode != sitelang and
  79. pywikibot.getSite().family.name
  80. not in family.interwiki_forwarded_from):
  81. x = pywikibot.Page(pywikibot.getSite(fam=family, code=newcode), newname)
  82. elif newcode in family.interwiki_forwarded_from:
  83. x = pywikibot.Page(pywikibot.getSite(fam=newcode, code=newcode), newname)
  84. else:
  85. if pywikibot.verbose:
  86. pywikibot.output(u"Ignoring the unknown language code %s" % newcode)
  87. if x:
  88. _join_to_(result, [ x ] )
  89. # Autotranslate dates into all other languages, the rest will come from
  90. # existing interwiki links.
  91. if auto and page:
  92. # search inside all dictionaries for this link
  93. dictName, value = date.getAutoFormat(sitelang, page.title())
  94. if dictName:
  95. if not (dictName == 'yearsBC' and
  96. sitelang in date.maxyearBC and
  97. value > date.maxyearBC[sitelang]) or \
  98. (dictName == 'yearsAD' and
  99. sitelang in date.maxyearAD and
  100. value > date.maxyearAD[sitelang]):
  101. pywikibot.output(
  102. u'TitleTranslate: %s was recognized as %s with value %d'
  103. % (page.title(), dictName, value))
  104. for entryLang, entry in date.formats[dictName].iteritems():
  105. if entryLang != sitelang:
  106. if dictName == 'yearsBC' and \
  107. entryLang in date.maxyearBC and \
  108. value > date.maxyearBC[entryLang]:
  109. pass
  110. elif dictName == 'yearsAD' and \
  111. entryLang in date.maxyearAD and \
  112. value > date.maxyearAD[entryLang]:
  113. pass
  114. else:
  115. newname = entry(value)
  116. x = pywikibot.Page(
  117. pywikibot.getSite(code=entryLang,
  118. fam=family), newname)
  119. _join_to_(result, [ x ] )
  120. return result
  121. bcDateErrors = [u'[[ko:%d년]]']
  122. def appendFormatedDates( result, dictName, value ):
  123. for code, func in date.formats[dictName].iteritems():
  124. result.append( u'[[%s:%s]]' % (code,func(value)) )
  125. def getPoisonedLinks(pl):
  126. """Returns a list of known corrupted links that should be removed if seen
  127. """
  128. result = []
  129. pywikibot.output(u'getting poisoned links for %s' % pl.title())
  130. dictName, value = date.getAutoFormat(pl.site().language(), pl.title())
  131. if dictName is not None:
  132. pywikibot.output( u'date found in %s' % dictName )
  133. # errors in year BC
  134. if dictName in date.bcFormats:
  135. for fmt in bcDateErrors:
  136. result.append( fmt % value )
  137. # i guess this is like friday the 13th for the years
  138. if value == 398 and dictName == 'yearsBC':
  139. appendFormatedDates(result, dictName, 399)
  140. if dictName == 'yearsBC':
  141. appendFormatedDates(result, 'decadesBC', value)
  142. appendFormatedDates(result, 'yearsAD', value)
  143. if dictName == 'yearsAD':
  144. appendFormatedDates(result, 'decadesAD', value)
  145. appendFormatedDates(result, 'yearsBC', value)
  146. if dictName == 'centuriesBC':
  147. appendFormatedDates(result, 'decadesBC', value * 100 + 1)
  148. if dictName == 'centuriesAD':
  149. appendFormatedDates(result, 'decadesAD', value * 100 + 1)
  150. return result