/ttkit/aresource.py
Python | 326 lines | 264 code | 15 blank | 47 comment | 20 complexity | 3b148563a47b6e87e60d61d7c50b9475 MD5 | raw file
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- #
- # Copyright 2012 Michal Čihař
- #
- # This file is part of the Translate Toolkit.
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, see <http://www.gnu.org/licenses/>.
- """module for handling Android resource files"""
- from lxml import etree
- import re
- from translate.storage import lisa
- from translate.storage import base
- from translate.lang import data
- EOF = None
- WHITESPACE = ' \n\t' # Whitespace that we collapse
- MULTIWHITESPACE = re.compile('[ \n\t]{2}')
- class AndroidResourceUnit(base.TranslationUnit):
- """A single term in the Android resource file."""
- rootNode = "string"
- languageNode = "string"
- def __init__(self, source, empty=False, xmlelement=None, **kwargs):
- if xmlelement is not None:
- self.xmlelement = xmlelement
- else:
- self.xmlelement = etree.Element(self.rootNode)
- self.xmlelement.tail = '\n'
- if source is not None:
- self.setid(source)
- super(AndroidResourceUnit, self).__init__(source)
- def getid(self):
- return self.xmlelement.get("name")
- def getcontext(self):
- return self.xmlelement.get("name")
- def setid(self, newid):
- return self.xmlelement.set("name", newid)
- def unescape(self, text):
- '''
- Remove escaping from Android resource.
- Code stolen from android2po
- <https://github.com/miracle2k/android2po>
- '''
- # Return text for empty elements
- if text is None:
- return ''
- # We need to collapse multiple whitespace while paying
- # attention to Android's quoting and escaping.
- space_count = 0
- active_quote = False
- active_percent = False
- active_escape = False
- formatted = False
- i = 0
- text = list(text) + [EOF]
- while i < len(text):
- c = text[i]
- # Handle whitespace collapsing
- if c is not EOF and c in WHITESPACE:
- space_count += 1
- elif space_count > 1:
- # Remove duplicate whitespace; Pay attention: We
- # don't do this if we are currently inside a quote,
- # except for one special case: If we have unbalanced
- # quotes, e.g. we reach eof while a quote is still
- # open, we *do* collapse that trailing part; this is
- # how Android does it, for some reason.
- if not active_quote or c is EOF:
- # Replace by a single space, will get rid of
- # non-significant newlines/tabs etc.
- text[i-space_count : i] = ' '
- i -= space_count - 1
- space_count = 0
- elif space_count == 1:
- # At this point we have a single whitespace character,
- # but it might be a newline or tab. If we write this
- # kind of insignificant whitespace into the .po file,
- # it will be considered significant on import. So,
- # make sure that this kind of whitespace is always a
- # standard space.
- text[i-1] = ' '
- space_count = 0
- else:
- space_count = 0
- # Handle quotes
- if c == '"' and not active_escape:
- active_quote = not active_quote
- del text[i]
- i -= 1
- # If the string is run through a formatter, it will have
- # percentage signs for String.format
- if c == '%' and not active_escape:
- active_percent = not active_percent
- elif not active_escape and active_percent:
- formatted = True
- active_percent = False
- # Handle escapes
- if c == '\\':
- if not active_escape:
- active_escape = True
- else:
- # A double-backslash represents a single;
- # simply deleting the current char will do.
- del text[i]
- i -= 1
- active_escape = False
- else:
- if active_escape:
- # Handle the limited amount of escape codes
- # that we support.
- # TODO: What about \r, or \r\n?
- if c is EOF:
- # Basically like any other char, but put
- # this first so we can use the ``in`` operator
- # in the clauses below without issue.
- pass
- elif c == 'n' or c == 'N':
- text[i-1 : i+1] = '\n' # an actual newline
- i -= 1
- elif c == 't' or c == 'T':
- text[i-1 : i+1] = '\t' # an actual tab
- i -= 1
- elif c == ' ':
- text[i-1 : i+1] = ' ' # an actual space
- i -= 1
- elif c in '"\'@':
- text[i-1 : i] = '' # remove the backslash
- i -= 1
- elif c == 'u':
- # Unicode sequence. Android is nice enough to deal
- # with those in a way which let's us just capture
- # the next 4 characters and raise an error if they
- # are not valid (rather than having to use a new
- # state to parse the unicode sequence).
- # Exception: In case we are at the end of the
- # string, we support incomplete sequences by
- # prefixing the missing digits with zeros.
- # Note: max(len()) is needed in the slice due to
- # trailing ``None`` element.
- max_slice = min(i+5, len(text)-1)
- codepoint_str = "".join(text[i+1 : max_slice])
- if len(codepoint_str) < 4:
- codepoint_str = u"0" * (4-len(codepoint_str)) + codepoint_str
- try:
- # We can't trust int() to raise a ValueError,
- # it will ignore leading/trailing whitespace.
- if not codepoint_str.isalnum():
- raise ValueError(codepoint_str)
- codepoint = unichr(int(codepoint_str, 16))
- except ValueError:
- raise ValueError('bad unicode escape sequence')
- text[i-1 : max_slice] = codepoint
- i -= 1
- else:
- # All others, remove, like Android does as well.
- text[i-1 : i+1] = ''
- i -= 1
- active_escape = False
- i += 1
- # Join the string together again, but w/o EOF marker
- return "".join(text[:-1])
- def escape(self, text):
- '''
- Escape all the characters which need to be escaped in an Android XML file.
- '''
- if text is None:
- return
- if len(text) == 0:
- return ''
- text = text.replace('\\', '\\\\')
- text = text.replace('\n', '\\n')
- # This will add non intrusive real newlines to
- # ones in translation improving readability of result
- text = text.replace(' \\n', '\n\\n')
- text = text.replace('\t', '\\t')
- text = text.replace('\'', '\\\'')
- text = text.replace('"', '\\"')
- # @ needs to be escaped at start
- if text.startswith('@'):
- text = '\\@' + text[1:]
- # Quote strings with more whitespace
- if text[0] in WHITESPACE or text[-1] in WHITESPACE or len(MULTIWHITESPACE.findall(text)) > 0:
- return '"%s"' % text
- return text
- def setsource(self, source):
- super(AndroidResourceUnit, self).setsource(source)
- def getsource(self, lang=None):
- if (super(AndroidResourceUnit, self).source is None):
- return self.target
- else:
- return super(AndroidResourceUnit, self).source
- source = property(getsource, setsource)
- def settarget(self, target):
- if '<' in target:
- # Handle text with possible markup
- target = target.replace('&', '&')
- try:
- # Try as XML
- newstring = etree.fromstring('<string>%s</string>' % target)
- except:
- # Fallback to string with XML escaping
- target = target.replace('<', '<')
- newstring = etree.fromstring('<string>%s</string>' % target)
- # Update text
- if newstring.text is None:
- self.xmlelement.text = ''
- else:
- self.xmlelement.text = newstring.text
- # Remove old elements
- for x in self.xmlelement.iterchildren():
- self.xmlelement.remove(x)
- # Add new elements
- for x in newstring.iterchildren():
- self.xmlelement.append(x)
- else:
- # Handle text only
- self.xmlelement.text = self.escape(target)
- super(AndroidResourceUnit, self).settarget(target)
- def gettarget(self, lang=None):
- # Grab inner text
- target = self.unescape(self.xmlelement.text or u'')
- # Include markup as well
- target += u''.join([data.forceunicode(etree.tostring(child, encoding='utf-8')) for child in self.xmlelement.iterchildren()])
- return target
- target = property(gettarget, settarget)
- def getlanguageNode(self, lang=None, index=None):
- return self.xmlelement
- def createfromxmlElement(cls, element):
- term = cls(None, xmlelement = element)
- return term
- createfromxmlElement = classmethod(createfromxmlElement)
- # Notes are handled as previous sibling comments.
- def addnote(self, text, origin=None, position="append"):
- if origin in ['programmer', 'developer', 'source code', None]:
- self.xmlelement.addprevious(etree.Comment(text))
- else:
- return super(AndroidResourceUnit, self).addnote(text, origin=origin,
- position=position)
- def getnotes(self, origin=None):
- if origin in ['programmer', 'developer', 'source code', None]:
- comments = []
- if (self.xmlelement is not None):
- prevSibling = self.xmlelement.getprevious()
- while ((prevSibling is not None) and (prevSibling.tag is etree.Comment)):
- comments.insert(0, prevSibling.text)
- prevSibling = prevSibling.getprevious()
- return u'\n'.join(comments)
- else:
- return super(AndroidResourceUnit, self).getnotes(origin)
- def removenotes(self):
- if ((self.xmlelement is not None) and (self.xmlelement.getparent is not None)):
- prevSibling = self.xmlelement.getprevious()
- while ((prevSibling is not None) and (prevSibling.tag is etree.Comment)):
- prevSibling.getparent().remove(prevSibling)
- prevSibling = self.xmlelement.getprevious()
- super(AndroidResourceUnit, self).removenotes()
- def __str__(self):
- return etree.tostring(self.xmlelement, pretty_print=True,
- encoding='utf-8')
- def __eq__(self, other):
- return (str(self) == str(other))
- class AndroidResourceFile(lisa.LISAfile):
- """Class representing a Android resource file store."""
- UnitClass = AndroidResourceUnit
- Name = _("Android String Resource")
- Mimetypes = ["application/xml"]
- Extensions = ["xml"]
- rootNode = "resources"
- bodyNode = "resources"
- XMLskeleton = '''<?xml version="1.0" encoding="utf-8"?>
- <resources></resources>'''
- def initbody(self):
- """Initialises self.body so it never needs to be retrieved from the
- XML again."""
- self.namespace = self.document.getroot().nsmap.get(None, None)
- self.body = self.document.getroot()