PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/objspace/std/formatting.py

https://bitbucket.org/pypy/pypy/
Python | 576 lines | 533 code | 24 blank | 19 comment | 58 complexity | 3e7a13424db7c51e1db0c7a9f46f5ad9 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. """String formatting routines"""
  2. import sys
  3. from rpython.rlib import jit
  4. from rpython.rlib.rarithmetic import INT_MAX
  5. from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf
  6. from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
  7. from rpython.rlib.unroll import unrolling_iterable
  8. from rpython.tool.sourcetools import func_with_new_name
  9. from pypy.interpreter.error import OperationError, oefmt
  10. class BaseStringFormatter(object):
  11. def __init__(self, space, values_w, w_valuedict):
  12. self.space = space
  13. self.fmtpos = 0
  14. self.values_w = values_w
  15. self.values_pos = 0
  16. self.w_valuedict = w_valuedict
  17. def forward(self):
  18. # move current position forward
  19. self.fmtpos += 1
  20. def nextinputvalue(self):
  21. # return the next value in the tuple of input arguments
  22. try:
  23. w_result = self.values_w[self.values_pos]
  24. except IndexError:
  25. raise oefmt(self.space.w_TypeError,
  26. "not enough arguments for format string")
  27. else:
  28. self.values_pos += 1
  29. return w_result
  30. def checkconsumed(self):
  31. if self.values_pos < len(self.values_w) and self.w_valuedict is None:
  32. raise oefmt(self.space.w_TypeError,
  33. "not all arguments converted during string formatting")
  34. def std_wp_int(self, r, prefix='', keep_zero=False):
  35. # use self.prec to add some '0' on the left of the number
  36. if self.prec >= 0:
  37. if self.prec > 1000:
  38. raise oefmt(self.space.w_OverflowError,
  39. "formatted integer is too long (precision too "
  40. "large?)")
  41. sign = r[0] == '-'
  42. padding = self.prec - (len(r)-int(sign))
  43. if padding > 0:
  44. if sign:
  45. r = '-' + '0'*padding + r[1:]
  46. else:
  47. r = '0'*padding + r
  48. elif self.prec == 0 and r == '0' and not keep_zero:
  49. r = ''
  50. self.std_wp_number(r, prefix)
  51. def fmt_d(self, w_value):
  52. "int formatting"
  53. r = int_num_helper(self.space, w_value)
  54. self.std_wp_int(r)
  55. def fmt_x(self, w_value):
  56. "hex formatting"
  57. r = hex_num_helper(self.space, w_value)
  58. if self.f_alt:
  59. prefix = '0x'
  60. else:
  61. prefix = ''
  62. self.std_wp_int(r, prefix)
  63. def fmt_X(self, w_value):
  64. "HEX formatting"
  65. r = hex_num_helper(self.space, w_value)
  66. if self.f_alt:
  67. prefix = '0X'
  68. else:
  69. prefix = ''
  70. self.std_wp_int(r.upper(), prefix)
  71. def fmt_o(self, w_value):
  72. "oct formatting"
  73. r = oct_num_helper(self.space, w_value)
  74. keep_zero = False
  75. if self.f_alt:
  76. if r == '0':
  77. keep_zero = True
  78. elif r.startswith('-'):
  79. r = '-0' + r[1:]
  80. else:
  81. r = '0' + r
  82. self.std_wp_int(r, keep_zero=keep_zero)
  83. fmt_i = fmt_d
  84. fmt_u = fmt_d
  85. def fmt_e(self, w_value):
  86. self.format_float(w_value, 'e')
  87. def fmt_f(self, w_value):
  88. self.format_float(w_value, 'f')
  89. def fmt_g(self, w_value):
  90. self.format_float(w_value, 'g')
  91. def fmt_E(self, w_value):
  92. self.format_float(w_value, 'E')
  93. def fmt_F(self, w_value):
  94. self.format_float(w_value, 'F')
  95. def fmt_G(self, w_value):
  96. self.format_float(w_value, 'G')
  97. def format_float(self, w_value, char):
  98. space = self.space
  99. x = space.float_w(maybe_float(space, w_value))
  100. if isnan(x):
  101. if char in 'EFG':
  102. r = 'NAN'
  103. else:
  104. r = 'nan'
  105. elif isinf(x):
  106. if x < 0:
  107. if char in 'EFG':
  108. r = '-INF'
  109. else:
  110. r = '-inf'
  111. else:
  112. if char in 'EFG':
  113. r = 'INF'
  114. else:
  115. r = 'inf'
  116. else:
  117. prec = self.prec
  118. if prec < 0:
  119. prec = 6
  120. if char in 'fF' and x/1e25 > 1e25:
  121. char = chr(ord(char) + 1) # 'f' => 'g'
  122. flags = 0
  123. if self.f_alt:
  124. flags |= DTSF_ALT
  125. r = formatd(x, char, prec, flags)
  126. self.std_wp_number(r)
  127. def std_wp_number(self, r, prefix=''):
  128. raise NotImplementedError
  129. def make_formatter_subclass(do_unicode):
  130. # to build two subclasses of the BaseStringFormatter class,
  131. # each one getting its own subtle differences and RPython types.
  132. if do_unicode:
  133. const = unicode
  134. else:
  135. const = str
  136. class StringFormatter(BaseStringFormatter):
  137. def __init__(self, space, fmt, values_w, w_valuedict):
  138. BaseStringFormatter.__init__(self, space, values_w, w_valuedict)
  139. self.fmt = fmt # either a string or a unicode
  140. def peekchr(self):
  141. # return the 'current' character
  142. try:
  143. return self.fmt[self.fmtpos]
  144. except IndexError:
  145. raise oefmt(self.space.w_ValueError, "incomplete format")
  146. # Only shows up if we've already started inlining format(), so just
  147. # unconditionally unroll this.
  148. @jit.unroll_safe
  149. def getmappingkey(self):
  150. # return the mapping key in a '%(key)s' specifier
  151. fmt = self.fmt
  152. i = self.fmtpos + 1 # first character after '('
  153. i0 = i
  154. pcount = 1
  155. while 1:
  156. try:
  157. c = fmt[i]
  158. except IndexError:
  159. space = self.space
  160. raise oefmt(space.w_ValueError, "incomplete format key")
  161. if c == ')':
  162. pcount -= 1
  163. if pcount == 0:
  164. break
  165. elif c == '(':
  166. pcount += 1
  167. i += 1
  168. self.fmtpos = i + 1 # first character after ')'
  169. return fmt[i0:i]
  170. def getmappingvalue(self, key):
  171. # return the value corresponding to a key in the input dict
  172. space = self.space
  173. if self.w_valuedict is None:
  174. raise oefmt(space.w_TypeError, "format requires a mapping")
  175. w_key = space.wrap(key)
  176. return space.getitem(self.w_valuedict, w_key)
  177. def parse_fmt(self):
  178. if self.peekchr() == '(':
  179. w_value = self.getmappingvalue(self.getmappingkey())
  180. else:
  181. w_value = None
  182. self.peel_flags()
  183. self.width = self.peel_num('width', sys.maxint)
  184. if self.width < 0:
  185. # this can happen: '%*s' % (-5, "hi")
  186. self.f_ljust = True
  187. self.width = -self.width
  188. if self.peekchr() == '.':
  189. self.forward()
  190. self.prec = self.peel_num('prec', INT_MAX)
  191. if self.prec < 0:
  192. self.prec = 0 # this can happen: '%.*f' % (-5, 3)
  193. else:
  194. self.prec = -1
  195. c = self.peekchr()
  196. if c == 'h' or c == 'l' or c == 'L':
  197. self.forward()
  198. return w_value
  199. # Same as getmappingkey
  200. @jit.unroll_safe
  201. def peel_flags(self):
  202. self.f_ljust = False
  203. self.f_sign = False
  204. self.f_blank = False
  205. self.f_alt = False
  206. self.f_zero = False
  207. while True:
  208. c = self.peekchr()
  209. if c == '-':
  210. self.f_ljust = True
  211. elif c == '+':
  212. self.f_sign = True
  213. elif c == ' ':
  214. self.f_blank = True
  215. elif c == '#':
  216. self.f_alt = True
  217. elif c == '0':
  218. self.f_zero = True
  219. else:
  220. break
  221. self.forward()
  222. # Same as getmappingkey
  223. @jit.unroll_safe
  224. def peel_num(self, name, maxval):
  225. space = self.space
  226. c = self.peekchr()
  227. if c == '*':
  228. self.forward()
  229. w_value = self.nextinputvalue()
  230. if name == 'width':
  231. return space.int_w(w_value)
  232. elif name == 'prec':
  233. return space.c_int_w(w_value)
  234. else:
  235. assert False
  236. result = 0
  237. while True:
  238. digit = ord(c) - ord('0')
  239. if not (0 <= digit <= 9):
  240. break
  241. if result > (maxval - digit) / 10:
  242. raise oefmt(space.w_ValueError, "%s too big", name)
  243. result = result * 10 + digit
  244. self.forward()
  245. c = self.peekchr()
  246. return result
  247. @jit.look_inside_iff(lambda self: jit.isconstant(self.fmt))
  248. def format(self):
  249. lgt = len(self.fmt) + 4 * len(self.values_w) + 10
  250. if do_unicode:
  251. result = UnicodeBuilder(lgt)
  252. else:
  253. result = StringBuilder(lgt)
  254. self.result = result
  255. while True:
  256. # fast path: consume as many characters as possible
  257. fmt = self.fmt
  258. i = i0 = self.fmtpos
  259. while i < len(fmt):
  260. if fmt[i] == '%':
  261. break
  262. i += 1
  263. else:
  264. result.append_slice(fmt, i0, len(fmt))
  265. break # end of 'fmt' string
  266. result.append_slice(fmt, i0, i)
  267. self.fmtpos = i + 1
  268. # interpret the next formatter
  269. w_value = self.parse_fmt()
  270. c = self.peekchr()
  271. self.forward()
  272. if c == '%':
  273. self.std_wp(const('%'))
  274. continue
  275. if w_value is None:
  276. w_value = self.nextinputvalue()
  277. # dispatch on the formatter
  278. # (this turns into a switch after translation)
  279. for c1 in FORMATTER_CHARS:
  280. if c == c1:
  281. # 'c1' is an annotation constant here,
  282. # so this getattr() is ok
  283. do_fmt = getattr(self, 'fmt_' + c1)
  284. do_fmt(w_value)
  285. break
  286. else:
  287. self.unknown_fmtchar()
  288. self.checkconsumed()
  289. return result.build()
  290. def unknown_fmtchar(self):
  291. space = self.space
  292. c = self.fmt[self.fmtpos - 1]
  293. if do_unicode:
  294. w_defaultencoding = space.call_function(
  295. space.sys.get('getdefaultencoding'))
  296. w_s = space.call_method(space.wrap(c),
  297. "encode",
  298. w_defaultencoding,
  299. space.wrap('replace'))
  300. s = space.str_w(w_s)
  301. else:
  302. s = c
  303. raise oefmt(space.w_ValueError,
  304. "unsupported format character '%s' (%s) at index %d",
  305. s, hex(ord(c)), self.fmtpos - 1)
  306. def std_wp(self, r):
  307. length = len(r)
  308. if do_unicode and isinstance(r, str):
  309. # convert string to unicode using the default encoding
  310. r = self.space.unicode_w(self.space.wrap(r))
  311. prec = self.prec
  312. if prec == -1 and self.width == 0:
  313. # fast path
  314. self.result.append(const(r))
  315. return
  316. if prec >= 0 and prec < length:
  317. length = prec # ignore the end of the string if too long
  318. result = self.result
  319. padding = self.width - length
  320. if padding < 0:
  321. padding = 0
  322. assert padding >= 0
  323. if not self.f_ljust and padding > 0:
  324. result.append_multiple_char(const(' '), padding)
  325. # add any padding at the left of 'r'
  326. padding = 0
  327. result.append_slice(r, 0, length) # add 'r' itself
  328. if padding > 0:
  329. result.append_multiple_char(const(' '), padding)
  330. # add any remaining padding at the right
  331. std_wp._annspecialcase_ = 'specialize:argtype(1)'
  332. def std_wp_number(self, r, prefix=''):
  333. result = self.result
  334. if len(prefix) == 0 and len(r) >= self.width:
  335. # this is strictly a fast path: no prefix, and no padding
  336. # needed. It is more efficient code both in the non-jit
  337. # case (less testing stuff) and in the jit case (uses only
  338. # result.append(), and no startswith() if not f_sign and
  339. # not f_blank).
  340. if self.f_sign and not r.startswith('-'):
  341. result.append(const('+'))
  342. elif self.f_blank and not r.startswith('-'):
  343. result.append(const(' '))
  344. result.append(const(r))
  345. return
  346. # add a '+' or ' ' sign if necessary
  347. sign = r.startswith('-')
  348. if not sign:
  349. if self.f_sign:
  350. r = '+' + r
  351. sign = True
  352. elif self.f_blank:
  353. r = ' ' + r
  354. sign = True
  355. # do the padding requested by self.width and the flags,
  356. # without building yet another RPython string but directly
  357. # by pushing the pad character into self.result
  358. padding = self.width - len(r) - len(prefix)
  359. if padding <= 0:
  360. padding = 0
  361. if self.f_ljust:
  362. padnumber = '<'
  363. elif self.f_zero:
  364. padnumber = '0'
  365. else:
  366. padnumber = '>'
  367. assert padding >= 0
  368. if padnumber == '>':
  369. result.append_multiple_char(const(' '), padding)
  370. # pad with spaces on the left
  371. if sign:
  372. result.append(const(r[0])) # the sign
  373. result.append(const(prefix)) # the prefix
  374. if padnumber == '0':
  375. result.append_multiple_char(const('0'), padding)
  376. # pad with zeroes
  377. result.append_slice(const(r), int(sign), len(r))
  378. # the rest of the number
  379. if padnumber == '<': # spaces on the right
  380. result.append_multiple_char(const(' '), padding)
  381. def string_formatting(self, w_value):
  382. space = self.space
  383. w_impl = space.lookup(w_value, '__str__')
  384. if w_impl is None:
  385. raise oefmt(space.w_TypeError,
  386. "operand does not support unary str")
  387. w_result = space.get_and_call_function(w_impl, w_value)
  388. if space.isinstance_w(w_result,
  389. space.w_unicode):
  390. raise NeedUnicodeFormattingError
  391. return space.str_w(w_result)
  392. def fmt_s(self, w_value):
  393. space = self.space
  394. got_unicode = space.isinstance_w(w_value,
  395. space.w_unicode)
  396. if not do_unicode:
  397. if got_unicode:
  398. raise NeedUnicodeFormattingError
  399. s = self.string_formatting(w_value)
  400. else:
  401. if not got_unicode:
  402. w_value = space.call_function(space.w_unicode, w_value)
  403. else:
  404. from pypy.objspace.std.unicodeobject import unicode_from_object
  405. w_value = unicode_from_object(space, w_value)
  406. s = space.unicode_w(w_value)
  407. self.std_wp(s)
  408. def fmt_r(self, w_value):
  409. self.std_wp(self.space.str_w(self.space.repr(w_value)))
  410. def fmt_c(self, w_value):
  411. self.prec = -1 # just because
  412. space = self.space
  413. if space.isinstance_w(w_value, space.w_str):
  414. s = space.str_w(w_value)
  415. if len(s) != 1:
  416. raise oefmt(space.w_TypeError, "%c requires int or char")
  417. self.std_wp(s)
  418. elif space.isinstance_w(w_value, space.w_unicode):
  419. if not do_unicode:
  420. raise NeedUnicodeFormattingError
  421. ustr = space.unicode_w(w_value)
  422. if len(ustr) != 1:
  423. raise oefmt(space.w_TypeError, "%c requires int or unichar")
  424. self.std_wp(ustr)
  425. else:
  426. n = space.int_w(w_value)
  427. if do_unicode:
  428. try:
  429. c = unichr(n)
  430. except ValueError:
  431. raise oefmt(space.w_OverflowError,
  432. "unicode character code out of range")
  433. self.std_wp(c)
  434. else:
  435. try:
  436. s = chr(n)
  437. except ValueError:
  438. raise oefmt(space.w_OverflowError,
  439. "character code not in range(256)")
  440. self.std_wp(s)
  441. return StringFormatter
  442. class NeedUnicodeFormattingError(Exception):
  443. pass
  444. StringFormatter = make_formatter_subclass(do_unicode=False)
  445. UnicodeFormatter = make_formatter_subclass(do_unicode=True)
  446. UnicodeFormatter.__name__ = 'UnicodeFormatter'
  447. # an "unrolling" list of all the known format characters,
  448. # collected from which fmt_X() functions are defined in the class
  449. FORMATTER_CHARS = unrolling_iterable(
  450. [_name[-1] for _name in dir(StringFormatter)
  451. if len(_name) == 5 and _name.startswith('fmt_')])
  452. def format(space, w_fmt, values_w, w_valuedict, do_unicode):
  453. "Entry point"
  454. if not do_unicode:
  455. fmt = space.str_w(w_fmt)
  456. formatter = StringFormatter(space, fmt, values_w, w_valuedict)
  457. try:
  458. result = formatter.format()
  459. except NeedUnicodeFormattingError:
  460. # fall through to the unicode case
  461. pass
  462. else:
  463. return space.wrap(result)
  464. fmt = space.unicode_w(w_fmt)
  465. formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
  466. result = formatter.format()
  467. return space.wrap(result)
  468. def mod_format(space, w_format, w_values, do_unicode=False):
  469. if space.isinstance_w(w_values, space.w_tuple):
  470. values_w = space.fixedview(w_values)
  471. return format(space, w_format, values_w, None, do_unicode)
  472. else:
  473. # we check directly for dict to avoid obscure checking
  474. # in simplest case
  475. if space.isinstance_w(w_values, space.w_dict) or \
  476. (space.lookup(w_values, '__getitem__') and
  477. not space.isinstance_w(w_values, space.w_basestring)):
  478. return format(space, w_format, [w_values], w_values, do_unicode)
  479. else:
  480. return format(space, w_format, [w_values], None, do_unicode)
  481. # ____________________________________________________________
  482. # Formatting helpers
  483. def maybe_int(space, w_value):
  484. # make sure that w_value is a wrapped integer
  485. return space.int(w_value)
  486. def maybe_float(space, w_value):
  487. # make sure that w_value is a wrapped float
  488. return space.float(w_value)
  489. def format_num_helper_generator(fmt, digits):
  490. def format_num_helper(space, w_value):
  491. try:
  492. w_value = maybe_int(space, w_value)
  493. except OperationError:
  494. try:
  495. w_value = space.long(w_value)
  496. except OperationError as operr:
  497. if operr.match(space, space.w_TypeError):
  498. raise oefmt(
  499. space.w_TypeError,
  500. "%s format: a number is required, not %T", fmt, w_value)
  501. else:
  502. raise
  503. try:
  504. value = space.int_w(w_value)
  505. return fmt % (value,)
  506. except OperationError as operr:
  507. if not operr.match(space, space.w_OverflowError):
  508. raise
  509. num = space.bigint_w(w_value)
  510. return num.format(digits)
  511. return func_with_new_name(format_num_helper,
  512. 'base%d_num_helper' % len(digits))
  513. int_num_helper = format_num_helper_generator('%d', '0123456789')
  514. oct_num_helper = format_num_helper_generator('%o', '01234567')
  515. hex_num_helper = format_num_helper_generator('%x', '0123456789abcdef')