escaping.py - This Python code is a custom unescaper for XM…

/src/robot/utils/escaping.py

https://code.google.com/p/robotframework/ · Python · 116 lines · 78 code · 23 blank · 15 comment · 22 complexity · aba662bc58c6525f1df50ceb0b939b9e MD5 · raw file


#  Copyright 2008-2014 Nokia Solutions and Networks
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import re


_SEQS_TO_BE_ESCAPED = ('\\', '${', '@{', '%{', '&{', '*{', '=')


def escape(item):
    if not isinstance(item, basestring):
        return item
    for seq in _SEQS_TO_BE_ESCAPED:
        if seq in item:
            item = item.replace(seq, '\\' + seq)
    return item


def unescape(item):
    if not (isinstance(item, basestring) and '\\' in item):
        return item
    return Unescaper().unescape(item)


class Unescaper(object):
    _escaped = re.compile(r'(\\+)([^\\]*)')

    def unescape(self, string):
        return ''.join(self._yield_unescaped(string))

    def _yield_unescaped(self, string):
        while '\\' in string:
            finder = EscapeFinder(string)
            yield finder.before + finder.backslashes
            if finder.escaped and finder.text:
                yield self._unescape(finder.text)
            else:
                yield finder.text
            string = finder.after
        yield string

    def _unescape(self, text):
        try:
            escape = str(text[0])
        except UnicodeError:
            return text
        try:
            unescaper = getattr(self, '_unescaper_for_' + escape)
        except AttributeError:
            return text
        else:
            return unescaper(text[1:])

    def _unescaper_for_n(self, text):
        if text.startswith(' '):
            text = text[1:]
        return '\n' + text

    def _unescaper_for_r(self, text):
        return '\r' + text

    def _unescaper_for_t(self, text):
        return '\t' + text

    def _unescaper_for_x(self, text):
        return self._unescape_character(text, 2, 'x')

    def _unescaper_for_u(self, text):
        return self._unescape_character(text, 4, 'u')

    def _unescaper_for_U(self, text):
        return self._unescape_character(text, 8, 'U')

    def _unescape_character(self, text, length, escape):
        try:
            char = self._get_character(text[:length], length)
        except ValueError:
            return escape + text
        else:
            return char + text[length:]

    def _get_character(self, text, length):
        if len(text) < length or not text.isalnum():
            raise ValueError
        ordinal = int(text, 16)
        # No Unicode code points above 0x10FFFF
        if ordinal > 0x10FFFF:
            raise ValueError
        # unichr only supports ordinals up to 0xFFFF with narrow Python builds
        if ordinal > 0xFFFF:
            return eval("u'\\U%08x'" % ordinal)
        return unichr(ordinal)


class EscapeFinder(object):
    _escaped = re.compile(r'(\\+)([^\\]*)')

    def __init__(self, string):
        res = self._escaped.search(string)
        self.before = string[:res.start()]
        escape_chars = len(res.group(1))
        self.backslashes = '\\' * (escape_chars // 2)
        self.escaped = bool(escape_chars % 2)
        self.text = res.group(2)
        self.after = string[res.end():]

Summary ✨

This Python code is a custom unescaper for XML-like syntax, specifically designed to handle escaped characters in strings. It replaces backslashes with double backslashes and then recursively unescapes any remaining characters, allowing for proper interpretation of escaped text. The code supports various escape sequences, including those for newline (\n), carriage return (\r), tab (\t), and Unicode characters.

Tech Fingerprint

Standard Library: String & Text

Alerts (5)

'def' Ensure functions have docstrings for documentation
21 30 39
'isinstance(' Overuse may indicate design issues; consider polymorphism
22
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
102