literals.py | searchcode

/documentor/libraries/Sphinx-1.1.3-py3.2/sphinx/pycode/pgen2/literals.py

https://github.com/tictactatic/Superdesk
Python | 96 lines | 86 code | 6 blank | 4 comment | 3 complexity | 77d61b57a3d667686c1101ae2c4484e3 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-3.0, GPL-2.0

# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.

# Extended to handle raw and unicode literals by Georg Brandl.

"""Safely evaluate Python string literals without using eval()."""

import re

simple_escapes = {"a": "\a",
                  "b": "\b",
                  "f": "\f",
                  "n": "\n",
                  "r": "\r",
                  "t": "\t",
                  "v": "\v",
                  "'": "'",
                  '"': '"',
                  "\\": "\\"}

def convert_hex(x, n):
    if len(x) < n+1:
        raise ValueError("invalid hex string escape ('\\%s')" % x)
    try:
        return int(x[1:], 16)
    except ValueError:
        raise ValueError("invalid hex string escape ('\\%s')" % x)

def escape(m):
    all, tail = m.group(0, 1)
    assert all.startswith("\\")
    esc = simple_escapes.get(tail)
    if esc is not None:
        return esc
    elif tail.startswith("x"):
        return chr(convert_hex(tail, 2))
    elif tail.startswith('u'):
        return chr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return chr(convert_hex(tail, 8))
    elif tail.startswith('N'):
        import unicodedata
        try:
            return unicodedata.lookup(tail[1:-1])
        except KeyError:
            raise ValueError("undefined character name %r" % tail[1:-1])
    else:
        try:
            return chr(int(tail, 8))
        except ValueError:
            raise ValueError("invalid octal string escape ('\\%s')" % tail)

def escaperaw(m):
    all, tail = m.group(0, 1)
    if tail.startswith('u'):
        return chr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return chr(convert_hex(tail, 8))
    else:
        return all

escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
                           r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")

def evalString(s, encoding=None):
    regex = escape_re
    repl = escape
    if encoding and not isinstance(s, str):
        s = s.decode(encoding)
    if s.startswith('u') or s.startswith('U'):
        regex = uni_escape_re
        s = s[1:]
    if s.startswith('r') or s.startswith('R'):
        repl = escaperaw
        s = s[1:]
    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
    q = s[0]
    if s[:3] == q*3:
        q = q*3
    assert s.endswith(q), repr(s[-len(q):])
    assert len(s) >= 2*len(q)
    s = s[len(q):-len(q)]
    return regex.sub(repl, s)

def test():
    for i in range(256):
        c = chr(i)
        s = repr(c)
        e = evalString(s)
        if e != c:
            print(i, c, s, e)


if __name__ == "__main__":
    test()