/distribution/libraries/Babel-1.0dev-py3.2/babel/messages/pofile.py

https://github.com/tictactatic/Superdesk · Python · 487 lines · 456 code · 8 blank · 23 comment · 1 complexity · 486a75113d10bfa385db520731e976ca MD5 · raw file

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2007-2011 Edgewall Software
  4. # All rights reserved.
  5. #
  6. # This software is licensed as described in the file COPYING, which
  7. # you should have received as part of this distribution. The terms
  8. # are also available at http://babel.edgewall.org/wiki/License.
  9. #
  10. # This software consists of voluntary contributions made by many
  11. # individuals. For the exact contribution history, see the revision
  12. # history and logs, available at http://babel.edgewall.org/log/.
  13. """Reading and writing of files in the ``gettext`` PO (portable object)
  14. format.
  15. :see: `The Format of PO Files
  16. <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
  17. """
  18. from datetime import datetime
  19. import os
  20. import re
  21. from babel.compat import u, text_type
  22. from babel.messages.catalog import Catalog, Message
  23. from babel.util import wraptext
  24. __all__ = ['read_po', 'write_po']
  25. __docformat__ = 'restructuredtext en'
  26. def unescape(string):
  27. r"""Reverse `escape` the given string.
  28. >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))
  29. Say:
  30. "hello, world!"
  31. <BLANKLINE>
  32. :param string: the string to unescape
  33. :return: the unescaped string
  34. :rtype: `str` or `unicode`
  35. """
  36. return string[1:-1].replace('\\\\', '\\') \
  37. .replace('\\t', '\t') \
  38. .replace('\\r', '\r') \
  39. .replace('\\n', '\n') \
  40. .replace('\\"', '\"')
  41. def denormalize(string):
  42. r"""Reverse the normalization done by the `normalize` function.
  43. >>> print(denormalize(r'''""
  44. ... "Say:\n"
  45. ... " \"hello, world!\"\n"'''))
  46. Say:
  47. "hello, world!"
  48. <BLANKLINE>
  49. >>> print(denormalize(r'''""
  50. ... "Say:\n"
  51. ... " \"Lorem ipsum dolor sit "
  52. ... "amet, consectetur adipisicing"
  53. ... " elit, \"\n"'''))
  54. Say:
  55. "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
  56. <BLANKLINE>
  57. :param string: the string to denormalize
  58. :return: the denormalized string
  59. :rtype: `unicode` or `str`
  60. """
  61. if string.startswith('""'):
  62. lines = []
  63. for line in string.splitlines()[1:]:
  64. lines.append(unescape(line))
  65. return ''.join(lines)
  66. else:
  67. return unescape(string)
  68. def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
  69. """Read messages from a ``gettext`` PO (portable object) file from the given
  70. file-like object and return a `Catalog`.
  71. >>> from babel.compat import StringIO
  72. >>> buf = StringIO('''
  73. ... #: main.py:1
  74. ... #, fuzzy, python-format
  75. ... msgid "foo %(name)s"
  76. ... msgstr ""
  77. ...
  78. ... # A user comment
  79. ... #. An auto comment
  80. ... #: main.py:3
  81. ... msgid "bar"
  82. ... msgid_plural "baz"
  83. ... msgstr[0] ""
  84. ... msgstr[1] ""
  85. ... ''')
  86. >>> catalog = read_po(buf)
  87. >>> catalog.revision_date = datetime(2007, 4, 1)
  88. >>> for message in catalog:
  89. ... if message.id:
  90. ... print('id(s): %s' % (isinstance(message.id, tuple) and u(',').join(message.id) or message.id))
  91. ... print('strings(s): %s' % (isinstance(message.string, tuple) and u(',').join(message.string) or message.string))
  92. ... for loc in message.locations:
  93. ... print('file: %s line: %d' % loc)
  94. ... print('flags: %s' % ' '.join(sorted(message.flags)))
  95. ... print('user comments: %s' % ','.join(message.user_comments))
  96. ... print('auto comments: %s' % ','.join(message.auto_comments))
  97. id(s): foo %(name)s
  98. strings(s):
  99. file: main.py line: 1
  100. flags: fuzzy python-format
  101. user comments:
  102. auto comments:
  103. id(s): bar,baz
  104. strings(s): ,
  105. file: main.py line: 3
  106. flags:
  107. user comments: A user comment
  108. auto comments: An auto comment
  109. :param fileobj: the file-like object to read the PO file from
  110. :param locale: the locale identifier or `Locale` object, or `None`
  111. if the catalog is not bound to a locale (which basically
  112. means it's a template)
  113. :param domain: the message domain
  114. :param ignore_obsolete: whether to ignore obsolete messages in the input
  115. :return: a catalog object representing the parsed PO file
  116. :rtype: `Catalog`
  117. """
  118. catalog = Catalog(locale=locale, domain=domain)
  119. counter = [0]
  120. offset = [0]
  121. messages = []
  122. translations = []
  123. locations = []
  124. flags = []
  125. user_comments = []
  126. auto_comments = []
  127. obsolete = [False]
  128. context = []
  129. in_msgid = [False]
  130. in_msgstr = [False]
  131. in_msgctxt = [False]
  132. def _add_message():
  133. translations.sort()
  134. if len(messages) > 1:
  135. msgid = tuple([denormalize(m) for m in messages])
  136. else:
  137. msgid = denormalize(messages[0])
  138. if isinstance(msgid, (list, tuple)):
  139. string = []
  140. for idx in range(catalog.num_plurals):
  141. try:
  142. string.append(translations[idx])
  143. except IndexError:
  144. string.append((idx, ''))
  145. string = tuple([denormalize(t[1]) for t in string])
  146. else:
  147. string = denormalize(translations[0][1])
  148. if context:
  149. msgctxt = denormalize('\n'.join(context))
  150. else:
  151. msgctxt = None
  152. message = Message(msgid, string, list(locations), set(flags),
  153. auto_comments, user_comments, lineno=offset[0] + 1,
  154. context=msgctxt)
  155. if obsolete[0]:
  156. if not ignore_obsolete:
  157. catalog.obsolete[msgid] = message
  158. else:
  159. catalog[msgid] = message
  160. del messages[:]; del translations[:]; del context[:]; del locations[:];
  161. del flags[:]; del auto_comments[:]; del user_comments[:];
  162. obsolete[0] = False
  163. counter[0] += 1
  164. def _process_message_line(lineno, line):
  165. if line.startswith('msgid_plural'):
  166. in_msgid[0] = True
  167. msg = line[12:].lstrip()
  168. messages.append(msg)
  169. elif line.startswith('msgid'):
  170. in_msgid[0] = True
  171. offset[0] = lineno
  172. txt = line[5:].lstrip()
  173. if messages:
  174. _add_message()
  175. messages.append(txt)
  176. elif line.startswith('msgstr'):
  177. in_msgid[0] = False
  178. in_msgstr[0] = True
  179. msg = line[6:].lstrip()
  180. if msg.startswith('['):
  181. idx, msg = msg[1:].split(']', 1)
  182. translations.append([int(idx), msg.lstrip()])
  183. else:
  184. translations.append([0, msg])
  185. elif line.startswith('msgctxt'):
  186. if messages:
  187. _add_message()
  188. in_msgid[0] = in_msgstr[0] = False
  189. context.append(line[7:].lstrip())
  190. elif line.startswith('"'):
  191. if in_msgid[0]:
  192. messages[-1] += u('\n') + line.rstrip()
  193. elif in_msgstr[0]:
  194. translations[-1][1] += u('\n') + line.rstrip()
  195. elif in_msgctxt[0]:
  196. context.append(line.rstrip())
  197. for lineno, line in enumerate(fileobj.readlines()):
  198. line = line.strip()
  199. if not isinstance(line, text_type):
  200. line = line.decode(catalog.charset)
  201. if line.startswith('#'):
  202. in_msgid[0] = in_msgstr[0] = False
  203. if messages and translations:
  204. _add_message()
  205. if line[1:].startswith(':'):
  206. for location in line[2:].lstrip().split():
  207. pos = location.rfind(':')
  208. if pos >= 0:
  209. try:
  210. lineno = int(location[pos + 1:])
  211. except ValueError:
  212. continue
  213. locations.append((location[:pos], lineno))
  214. elif line[1:].startswith(','):
  215. for flag in line[2:].lstrip().split(','):
  216. flags.append(flag.strip())
  217. elif line[1:].startswith('~'):
  218. obsolete[0] = True
  219. _process_message_line(lineno, line[2:].lstrip())
  220. elif line[1:].startswith('.'):
  221. # These are called auto-comments
  222. comment = line[2:].strip()
  223. if comment: # Just check that we're not adding empty comments
  224. auto_comments.append(comment)
  225. else:
  226. # These are called user comments
  227. user_comments.append(line[1:].strip())
  228. else:
  229. _process_message_line(lineno, line)
  230. if messages:
  231. _add_message()
  232. # No actual messages found, but there was some info in comments, from which
  233. # we'll construct an empty header message
  234. elif not counter[0] and (flags or user_comments or auto_comments):
  235. messages.append(u(''))
  236. translations.append([0, u('')])
  237. _add_message()
  238. return catalog
  239. WORD_SEP = re.compile('('
  240. r'\s+|' # any whitespace
  241. r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
  242. r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
  243. ')')
  244. def escape(string):
  245. r"""Escape the given string so that it can be included in double-quoted
  246. strings in ``PO`` files.
  247. >>> escape('''Say:
  248. ... "hello, world!"
  249. ... ''')
  250. '"Say:\\n \\"hello, world!\\"\\n"'
  251. :param string: the string to escape
  252. :return: the escaped string
  253. :rtype: `str` or `unicode`
  254. """
  255. return '"%s"' % string.replace('\\', '\\\\') \
  256. .replace('\t', '\\t') \
  257. .replace('\r', '\\r') \
  258. .replace('\n', '\\n') \
  259. .replace('\"', '\\"')
  260. def normalize(string, prefix='', width=76):
  261. r"""Convert a string into a format that is appropriate for .po files.
  262. >>> print(normalize('''Say:
  263. ... "hello, world!"
  264. ... ''', width=None))
  265. ""
  266. "Say:\n"
  267. " \"hello, world!\"\n"
  268. >>> print(normalize('''Say:
  269. ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
  270. ... ''', width=32))
  271. ""
  272. "Say:\n"
  273. " \"Lorem ipsum dolor sit "
  274. "amet, consectetur adipisicing"
  275. " elit, \"\n"
  276. :param string: the string to normalize
  277. :param prefix: a string that should be prepended to every line
  278. :param width: the maximum line width; use `None`, 0, or a negative number
  279. to completely disable line wrapping
  280. :return: the normalized string
  281. :rtype: `unicode`
  282. """
  283. if width and width > 0:
  284. prefixlen = len(prefix)
  285. lines = []
  286. for idx, line in enumerate(string.splitlines(True)):
  287. if len(escape(line)) + prefixlen > width:
  288. chunks = WORD_SEP.split(line)
  289. chunks.reverse()
  290. while chunks:
  291. buf = []
  292. size = 2
  293. while chunks:
  294. l = len(escape(chunks[-1])) - 2 + prefixlen
  295. if size + l < width:
  296. buf.append(chunks.pop())
  297. size += l
  298. else:
  299. if not buf:
  300. # handle long chunks by putting them on a
  301. # separate line
  302. buf.append(chunks.pop())
  303. break
  304. lines.append(u('').join(buf))
  305. else:
  306. lines.append(line)
  307. else:
  308. lines = string.splitlines(True)
  309. if len(lines) <= 1:
  310. return escape(string)
  311. # Remove empty trailing line
  312. if lines and not lines[-1]:
  313. del lines[-1]
  314. lines[-1] += '\n'
  315. return u('""\n') + u('\n').join([(prefix + escape(l)) for l in lines])
  316. def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
  317. sort_output=False, sort_by_file=False, ignore_obsolete=False,
  318. include_previous=False):
  319. r"""Write a ``gettext`` PO (portable object) template file for a given
  320. message catalog to the provided file-like object.
  321. >>> catalog = Catalog()
  322. >>> catalog.add(u('foo %(name)s'), locations=[('main.py', 1)],
  323. ... flags=('fuzzy',))
  324. <Message...>
  325. >>> catalog.add((u('bar'), u('baz')), locations=[('main.py', 3)])
  326. <Message...>
  327. >>> from babel.compat import BytesIO
  328. >>> buf = BytesIO()
  329. >>> write_po(buf, catalog, omit_header=True)
  330. >>> print(buf.getvalue().decode('latin-1'))
  331. #: main.py:1
  332. #, fuzzy, python-format
  333. msgid "foo %(name)s"
  334. msgstr ""
  335. <BLANKLINE>
  336. #: main.py:3
  337. msgid "bar"
  338. msgid_plural "baz"
  339. msgstr[0] ""
  340. msgstr[1] ""
  341. <BLANKLINE>
  342. <BLANKLINE>
  343. :param fileobj: the file-like object to write to
  344. :param catalog: the `Catalog` instance
  345. :param width: the maximum line width for the generated output; use `None`,
  346. 0, or a negative number to completely disable line wrapping
  347. :param no_location: do not emit a location comment for every message
  348. :param omit_header: do not include the ``msgid ""`` entry at the top of the
  349. output
  350. :param sort_output: whether to sort the messages in the output by msgid
  351. :param sort_by_file: whether to sort the messages in the output by their
  352. locations
  353. :param ignore_obsolete: whether to ignore obsolete messages and not include
  354. them in the output; by default they are included as
  355. comments
  356. :param include_previous: include the old msgid as a comment when
  357. updating the catalog
  358. """
  359. def _normalize(key, prefix=''):
  360. return normalize(key, prefix=prefix, width=width)
  361. def _write(text):
  362. if isinstance(text, text_type):
  363. text = text.encode(catalog.charset, 'backslashreplace')
  364. fileobj.write(text)
  365. def _write_comment(comment, prefix=''):
  366. # xgettext always wraps comments even if --no-wrap is passed;
  367. # provide the same behaviour
  368. if width and width > 0:
  369. _width = width
  370. else:
  371. _width = 76
  372. for line in wraptext(comment, _width):
  373. _write('#%s %s\n' % (prefix, line.strip()))
  374. def _write_message(message, prefix=''):
  375. if isinstance(message.id, (list, tuple)):
  376. if message.context:
  377. _write('%smsgctxt %s\n' % (prefix,
  378. _normalize(message.context, prefix)))
  379. _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
  380. _write('%smsgid_plural %s\n' % (
  381. prefix, _normalize(message.id[1], prefix)
  382. ))
  383. for idx in range(catalog.num_plurals):
  384. try:
  385. string = message.string[idx]
  386. except IndexError:
  387. string = ''
  388. _write('%smsgstr[%d] %s\n' % (
  389. prefix, idx, _normalize(string, prefix)
  390. ))
  391. else:
  392. if message.context:
  393. _write('%smsgctxt %s\n' % (prefix,
  394. _normalize(message.context, prefix)))
  395. _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
  396. _write('%smsgstr %s\n' % (
  397. prefix, _normalize(message.string or '', prefix)
  398. ))
  399. messages = list(catalog)
  400. if sort_output:
  401. messages.sort()
  402. elif sort_by_file:
  403. messages.sort(lambda x,y: cmp(x.locations, y.locations))
  404. for message in messages:
  405. if not message.id: # This is the header "message"
  406. if omit_header:
  407. continue
  408. comment_header = catalog.header_comment
  409. if width and width > 0:
  410. lines = []
  411. for line in comment_header.splitlines():
  412. lines += wraptext(line, width=width,
  413. subsequent_indent='# ')
  414. comment_header = u('\n').join(lines) + u('\n')
  415. _write(comment_header)
  416. for comment in message.user_comments:
  417. _write_comment(comment)
  418. for comment in message.auto_comments:
  419. _write_comment(comment, prefix='.')
  420. if not no_location:
  421. locs = u(' ').join([u('%s:%d') % (filename.replace(os.sep, '/'), lineno)
  422. for filename, lineno in message.locations])
  423. _write_comment(locs, prefix=':')
  424. if message.flags:
  425. _write('#%s\n' % ', '.join([''] + list(message.flags)))
  426. if message.previous_id and include_previous:
  427. _write_comment('msgid %s' % _normalize(message.previous_id[0]),
  428. prefix='|')
  429. if len(message.previous_id) > 1:
  430. _write_comment('msgid_plural %s' % _normalize(
  431. message.previous_id[1]
  432. ), prefix='|')
  433. _write_message(message)
  434. _write('\n')
  435. if not ignore_obsolete:
  436. for message in catalog.obsolete.values():
  437. for comment in message.user_comments:
  438. _write_comment(comment)
  439. _write_message(message, prefix='#~ ')
  440. _write('\n')