PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/ikazuchi/core/translator/__init__.py

https://bitbucket.org/t2y/ikazuchi/
Python | 89 lines | 84 code | 4 blank | 1 comment | 0 complexity | 20f4b702aa1e67c3ed779b7d453df02d MD5 | raw file
Possible License(s): Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. import abc
  3. import re
  4. import utils
  5. from collections import defaultdict
  6. from google import GoogleTranslator
  7. from microsoft import MicrosoftTranslator
  8. from yahoo import YahooTranslator
  9. from ikazuchi.conf import show_how_to_get_apikey
  10. __all__ = [
  11. "TRANSLATE_API",
  12. ]
  13. class BaseTranslator(object):
  14. """Base class for Translator"""
  15. __metaclass__ = abc.ABCMeta
  16. api = lambda klass: klass.__class__.__name__.replace("Translating", "")
  17. # FIXME: span tag only, cannot match for minimal when html tag is nested
  18. notrans_tag = re.compile(
  19. r"<span class=[\"']?notranslate[\"']?>(.*?)</span>", re.IGNORECASE)
  20. whitespaces = re.compile(r"\s+", re.UNICODE)
  21. zerowidth = re.compile(u"\u200b|\u200c|\u200d|\ufeff", re.UNICODE)
  22. colon = re.compile(u"\s+(:+)$", re.UNICODE)
  23. @abc.abstractmethod
  24. def __init__(self, lang_from, lang_to, handler):
  25. """Overridden by MixIn class
  26. must be set "self.handler = handler"
  27. """
  28. @abc.abstractmethod
  29. def translate(self, text): pass
  30. def call_method_with_handler(self):
  31. method = getattr(self, self.handler.method_name)
  32. self.handler._call_method(method)
  33. def parse_html(self, html):
  34. from StringIO import StringIO
  35. from formatter import (AbstractFormatter, DumbWriter)
  36. from htmllib import HTMLParser
  37. _html = re.sub(self.notrans_tag, r" \1 ", html)
  38. buf = StringIO()
  39. p = HTMLParser(AbstractFormatter(DumbWriter(buf)))
  40. p.feed(_html)
  41. _sub = re.sub(self.whitespaces, " ", buf.getvalue())
  42. # FIXME: how can zerowidth be removed more simply?
  43. _sub = re.sub(self.zerowidth, "", _sub)
  44. _sub = re.sub(self.colon, r"\1", _sub)
  45. return _sub
  46. def set_apikey(self, key):
  47. self.apikey = key
  48. def set_apikey_from_conf(self, conf):
  49. _key = conf.get(self.api().lower(), "apikey")
  50. if _key:
  51. self.apikey = _key
  52. else:
  53. show_how_to_get_apikey()
  54. def set_parameter_from_conf(self, conf):
  55. import os
  56. from functools import partial
  57. general = partial(conf.get, "general")
  58. # urllib2.build_opener() skips ProxyHandler for Python 2.6/2.7
  59. # see also: http://bugs.python.org/issue7152#msg94150
  60. # so, the proxy settings set environment variable as workaround
  61. if general("http_proxy"):
  62. os.environ["http_proxy"] = general("http_proxy")
  63. if general("https_proxy"):
  64. os.environ["https_proxy"] = general("https_proxy")
  65. self.set_apikey_from_conf(conf)
  66. # MixIn each implemented Translator
  67. class TranslatingGoogle(GoogleTranslator, BaseTranslator): pass
  68. class TranslatingMicrosoft(MicrosoftTranslator, BaseTranslator): pass
  69. class TranslatingYahoo(YahooTranslator, BaseTranslator): pass # is obsoleted
  70. TRANSLATE_API = defaultdict(lambda: TranslatingMicrosoft,
  71. {
  72. "google": TranslatingGoogle,
  73. "microsoft": TranslatingMicrosoft,
  74. }
  75. )