PageRenderTime 67ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/objspace/std/strutil.py

https://bitbucket.org/dac_io/pypy
Python | 188 lines | 144 code | 22 blank | 22 comment | 46 complexity | 33cf1ba7a8666dce446b56fb37c05ed7 MD5 | raw file
  1. """
  2. Pure Python implementation of string utilities.
  3. """
  4. from pypy.rlib.rarithmetic import ovfcheck
  5. from pypy.rlib.rfloat import rstring_to_float, INFINITY, NAN
  6. from pypy.rlib.rbigint import rbigint, parse_digit_string
  7. from pypy.interpreter.error import OperationError
  8. import math
  9. # XXX factor more functions out of stringobject.py.
  10. # This module is independent from PyPy.
  11. def strip_spaces(s):
  12. # XXX this is not locale-dependent
  13. p = 0
  14. q = len(s)
  15. while p < q and s[p] in ' \f\n\r\t\v':
  16. p += 1
  17. while p < q and s[q-1] in ' \f\n\r\t\v':
  18. q -= 1
  19. assert q >= p # annotator hint, don't remove
  20. return s[p:q]
  21. class ParseStringError(Exception):
  22. def __init__(self, msg):
  23. self.msg = msg
  24. class ParseStringOverflowError(Exception):
  25. def __init__(self, parser):
  26. self.parser = parser
  27. # iterator-like class
  28. class NumberStringParser:
  29. def error(self):
  30. raise ParseStringError("invalid literal for %s() with base %d: '%s'" %
  31. (self.fname, self.original_base, self.literal))
  32. def __init__(self, s, literal, base, fname):
  33. self.literal = literal
  34. self.fname = fname
  35. sign = 1
  36. if s.startswith('-'):
  37. sign = -1
  38. s = strip_spaces(s[1:])
  39. elif s.startswith('+'):
  40. s = strip_spaces(s[1:])
  41. self.sign = sign
  42. self.original_base = base
  43. if base == 0:
  44. if s.startswith('0x') or s.startswith('0X'):
  45. base = 16
  46. elif s.startswith('0b') or s.startswith('0B'):
  47. base = 2
  48. elif s.startswith('0'): # also covers the '0o' case
  49. base = 8
  50. else:
  51. base = 10
  52. elif base < 2 or base > 36:
  53. raise ParseStringError, "%s() base must be >= 2 and <= 36" % (fname,)
  54. self.base = base
  55. if base == 16 and (s.startswith('0x') or s.startswith('0X')):
  56. s = s[2:]
  57. if base == 8 and (s.startswith('0o') or s.startswith('0O')):
  58. s = s[2:]
  59. if base == 2 and (s.startswith('0b') or s.startswith('0B')):
  60. s = s[2:]
  61. if not s:
  62. self.error()
  63. self.s = s
  64. self.n = len(s)
  65. self.i = 0
  66. def rewind(self):
  67. self.i = 0
  68. def next_digit(self): # -1 => exhausted
  69. if self.i < self.n:
  70. c = self.s[self.i]
  71. digit = ord(c)
  72. if '0' <= c <= '9':
  73. digit -= ord('0')
  74. elif 'A' <= c <= 'Z':
  75. digit = (digit - ord('A')) + 10
  76. elif 'a' <= c <= 'z':
  77. digit = (digit - ord('a')) + 10
  78. else:
  79. self.error()
  80. if digit >= self.base:
  81. self.error()
  82. self.i += 1
  83. return digit
  84. else:
  85. return -1
  86. def string_to_int(s, base=10):
  87. """Utility to converts a string to an integer.
  88. If base is 0, the proper base is guessed based on the leading
  89. characters of 's'. Raises ParseStringError in case of error.
  90. Raises ParseStringOverflowError in case the result does not fit.
  91. """
  92. s = literal = strip_spaces(s)
  93. p = NumberStringParser(s, literal, base, 'int')
  94. base = p.base
  95. result = 0
  96. while True:
  97. digit = p.next_digit()
  98. if digit == -1:
  99. return result
  100. if p.sign == -1:
  101. digit = -digit
  102. try:
  103. result = ovfcheck(result * base)
  104. result = ovfcheck(result + digit)
  105. except OverflowError:
  106. raise ParseStringOverflowError(p)
  107. def string_to_bigint(s, base=10, parser=None):
  108. """As string_to_int(), but ignores an optional 'l' or 'L' suffix
  109. and returns an rbigint."""
  110. if parser is None:
  111. s = literal = strip_spaces(s)
  112. if (s.endswith('l') or s.endswith('L')) and base < 22:
  113. # in base 22 and above, 'L' is a valid digit! try: long('L',22)
  114. s = s[:-1]
  115. p = NumberStringParser(s, literal, base, 'long')
  116. else:
  117. p = parser
  118. return parse_digit_string(p)
  119. # Tim's comment:
  120. # 57 bits are more than needed in any case.
  121. # to allow for some rounding, we take one
  122. # digit more.
  123. # In the PyPy case, we can compute everything at compile time:
  124. # XXX move this stuff to some central place, it is now also
  125. # in _float_formatting.
  126. def calc_mantissa_bits():
  127. bits = 1 # I know it is almost always 53, but let it compute...
  128. while 1:
  129. pattern = (1L << bits) - 1
  130. comp = long(float(pattern))
  131. if comp != pattern:
  132. return bits - 1
  133. bits += 1
  134. MANTISSA_BITS = calc_mantissa_bits()
  135. del calc_mantissa_bits
  136. MANTISSA_DIGITS = len(str( (1L << MANTISSA_BITS)-1 )) + 1
  137. def string_to_float(s):
  138. """
  139. Conversion of string to float.
  140. This version tries to only raise on invalid literals.
  141. Overflows should be converted to infinity whenever possible.
  142. Expects an unwrapped string and return an unwrapped float.
  143. """
  144. s = strip_spaces(s)
  145. if not s:
  146. raise ParseStringError("empty string for float()")
  147. low = s.lower()
  148. if low == "-inf" or low == "-infinity":
  149. return -INFINITY
  150. elif low == "inf" or low == "+inf":
  151. return INFINITY
  152. elif low == "infinity" or low == "+infinity":
  153. return INFINITY
  154. elif low == "nan" or low == "+nan":
  155. return NAN
  156. elif low == "-nan":
  157. return -NAN
  158. try:
  159. return rstring_to_float(s)
  160. except ValueError:
  161. raise ParseStringError("invalid literal for float()")