PageRenderTime 44ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/objspace/std/newformat.py

https://bitbucket.org/pypy/pypy/
Python | 1159 lines | 1115 code | 26 blank | 18 comment | 81 complexity | 809c4260a045e135bda6ab2803e37f5d MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. """The unicode/str format() method"""
  2. import sys
  3. import string
  4. from pypy.interpreter.error import OperationError, oefmt
  5. from rpython.rlib import rstring, runicode, rlocale, rfloat, jit
  6. from rpython.rlib.objectmodel import specialize
  7. from rpython.rlib.rfloat import copysign, formatd
  8. from rpython.rlib.rarithmetic import r_uint, intmask
  9. @specialize.argtype(1)
  10. @jit.look_inside_iff(lambda space, s, start, end:
  11. jit.isconstant(s) and
  12. jit.isconstant(start) and
  13. jit.isconstant(end))
  14. def _parse_int(space, s, start, end):
  15. """Parse a number and check for overflows"""
  16. result = 0
  17. i = start
  18. while i < end:
  19. digit = ord(s[i]) - ord('0')
  20. if 0 <= digit <= 9:
  21. if result > (sys.maxint - digit) / 10:
  22. raise oefmt(space.w_ValueError,
  23. "too many decimal digits in format string")
  24. result = result * 10 + digit
  25. else:
  26. break
  27. i += 1
  28. if i == start:
  29. result = -1
  30. return result, i
  31. # Auto number state
  32. ANS_INIT = 1
  33. ANS_AUTO = 2
  34. ANS_MANUAL = 3
  35. def make_template_formatting_class():
  36. class TemplateFormatter(object):
  37. parser_list_w = None
  38. def __init__(self, space, is_unicode, template):
  39. self.space = space
  40. self.is_unicode = is_unicode
  41. self.empty = u"" if is_unicode else ""
  42. self.template = template
  43. def build(self, args):
  44. self.args, self.kwargs = args.unpack()
  45. self.auto_numbering = 0
  46. self.auto_numbering_state = ANS_INIT
  47. return self._build_string(0, len(self.template), 2)
  48. def _build_string(self, start, end, level):
  49. space = self.space
  50. if self.is_unicode:
  51. out = rstring.UnicodeBuilder()
  52. else:
  53. out = rstring.StringBuilder()
  54. if not level:
  55. raise oefmt(space.w_ValueError, "Recursion depth exceeded")
  56. level -= 1
  57. s = self.template
  58. return self._do_build_string(start, end, level, out, s)
  59. @jit.look_inside_iff(lambda self, start, end, level, out, s: jit.isconstant(s))
  60. def _do_build_string(self, start, end, level, out, s):
  61. space = self.space
  62. last_literal = i = start
  63. while i < end:
  64. c = s[i]
  65. i += 1
  66. if c == "{" or c == "}":
  67. at_end = i == end
  68. # Find escaped "{" and "}"
  69. markup_follows = True
  70. if c == "}":
  71. if at_end or s[i] != "}":
  72. raise oefmt(space.w_ValueError, "Single '}'")
  73. i += 1
  74. markup_follows = False
  75. if c == "{":
  76. if at_end:
  77. raise oefmt(space.w_ValueError, "Single '{'")
  78. if s[i] == "{":
  79. i += 1
  80. markup_follows = False
  81. # Attach literal data, ending with { or }
  82. out.append_slice(s, last_literal, i - 1)
  83. if not markup_follows:
  84. if self.parser_list_w is not None:
  85. end_literal = i - 1
  86. assert end_literal > last_literal
  87. literal = self.template[last_literal:end_literal]
  88. w_entry = space.newtuple([
  89. space.wrap(literal),
  90. space.w_None, space.w_None, space.w_None])
  91. self.parser_list_w.append(w_entry)
  92. self.last_end = i
  93. last_literal = i
  94. continue
  95. nested = 1
  96. field_start = i
  97. recursive = False
  98. while i < end:
  99. c = s[i]
  100. if c == "{":
  101. recursive = True
  102. nested += 1
  103. elif c == "}":
  104. nested -= 1
  105. if not nested:
  106. break
  107. i += 1
  108. if nested:
  109. raise oefmt(space.w_ValueError, "Unmatched '{'")
  110. rendered = self._render_field(field_start, i, recursive, level)
  111. out.append(rendered)
  112. i += 1
  113. last_literal = i
  114. out.append_slice(s, last_literal, end)
  115. return out.build()
  116. # This is only ever called if we're already unrolling _do_build_string
  117. @jit.unroll_safe
  118. def _parse_field(self, start, end):
  119. s = self.template
  120. # Find ":" or "!"
  121. i = start
  122. while i < end:
  123. c = s[i]
  124. if c == ":" or c == "!":
  125. end_name = i
  126. if c == "!":
  127. i += 1
  128. if i == end:
  129. raise oefmt(self.space.w_ValueError,
  130. "expected conversion")
  131. conversion = s[i]
  132. i += 1
  133. if i < end:
  134. if s[i] != ':':
  135. raise oefmt(self.space.w_ValueError,
  136. "expected ':' after format "
  137. "specifier")
  138. i += 1
  139. else:
  140. conversion = None
  141. i += 1
  142. return s[start:end_name], conversion, i
  143. i += 1
  144. return s[start:end], None, end
  145. @jit.unroll_safe
  146. def _get_argument(self, name):
  147. # First, find the argument.
  148. space = self.space
  149. i = 0
  150. end = len(name)
  151. while i < end:
  152. c = name[i]
  153. if c == "[" or c == ".":
  154. break
  155. i += 1
  156. empty = not i
  157. if empty:
  158. index = -1
  159. else:
  160. index, stop = _parse_int(self.space, name, 0, i)
  161. if stop != i:
  162. index = -1
  163. use_numeric = empty or index != -1
  164. if self.auto_numbering_state == ANS_INIT and use_numeric:
  165. if empty:
  166. self.auto_numbering_state = ANS_AUTO
  167. else:
  168. self.auto_numbering_state = ANS_MANUAL
  169. if use_numeric:
  170. if self.auto_numbering_state == ANS_MANUAL:
  171. if empty:
  172. raise oefmt(space.w_ValueError,
  173. "switching from manual to automatic "
  174. "numbering")
  175. elif not empty:
  176. raise oefmt(space.w_ValueError,
  177. "switching from automatic to manual numbering")
  178. if empty:
  179. index = self.auto_numbering
  180. self.auto_numbering += 1
  181. if index == -1:
  182. kwarg = name[:i]
  183. if self.is_unicode:
  184. try:
  185. arg_key = kwarg.encode("latin-1")
  186. except UnicodeEncodeError:
  187. # Not going to be found in a dict of strings.
  188. raise OperationError(space.w_KeyError, space.wrap(kwarg))
  189. else:
  190. arg_key = kwarg
  191. try:
  192. w_arg = self.kwargs[arg_key]
  193. except KeyError:
  194. raise OperationError(space.w_KeyError, space.wrap(arg_key))
  195. else:
  196. try:
  197. w_arg = self.args[index]
  198. except IndexError:
  199. raise oefmt(space.w_IndexError, "out of range")
  200. return self._resolve_lookups(w_arg, name, i, end)
  201. @jit.unroll_safe
  202. def _resolve_lookups(self, w_obj, name, start, end):
  203. # Resolve attribute and item lookups.
  204. space = self.space
  205. i = start
  206. while i < end:
  207. c = name[i]
  208. if c == ".":
  209. i += 1
  210. start = i
  211. while i < end:
  212. c = name[i]
  213. if c == "[" or c == ".":
  214. break
  215. i += 1
  216. if start == i:
  217. raise oefmt(space.w_ValueError,
  218. "Empty attribute in format string")
  219. w_attr = space.wrap(name[start:i])
  220. if w_obj is not None:
  221. w_obj = space.getattr(w_obj, w_attr)
  222. else:
  223. self.parser_list_w.append(space.newtuple([
  224. space.w_True, w_attr]))
  225. elif c == "[":
  226. got_bracket = False
  227. i += 1
  228. start = i
  229. while i < end:
  230. c = name[i]
  231. if c == "]":
  232. got_bracket = True
  233. break
  234. i += 1
  235. if not got_bracket:
  236. raise oefmt(space.w_ValueError, "Missing ']'")
  237. index, reached = _parse_int(self.space, name, start, i)
  238. if index != -1 and reached == i:
  239. w_item = space.wrap(index)
  240. else:
  241. w_item = space.wrap(name[start:i])
  242. i += 1 # Skip "]"
  243. if w_obj is not None:
  244. w_obj = space.getitem(w_obj, w_item)
  245. else:
  246. self.parser_list_w.append(space.newtuple([
  247. space.w_False, w_item]))
  248. else:
  249. raise oefmt(space.w_ValueError,
  250. "Only '[' and '.' may follow ']'")
  251. return w_obj
  252. def formatter_field_name_split(self):
  253. space = self.space
  254. name = self.template
  255. i = 0
  256. end = len(name)
  257. while i < end:
  258. c = name[i]
  259. if c == "[" or c == ".":
  260. break
  261. i += 1
  262. if i == 0:
  263. index = -1
  264. else:
  265. index, stop = _parse_int(self.space, name, 0, i)
  266. if stop != i:
  267. index = -1
  268. if index >= 0:
  269. w_first = space.wrap(index)
  270. else:
  271. w_first = space.wrap(name[:i])
  272. #
  273. self.parser_list_w = []
  274. self._resolve_lookups(None, name, i, end)
  275. #
  276. return space.newtuple([w_first,
  277. space.iter(space.newlist(self.parser_list_w))])
  278. def _convert(self, w_obj, conversion):
  279. space = self.space
  280. conv = conversion[0]
  281. if conv == "r":
  282. return space.repr(w_obj)
  283. elif conv == "s":
  284. if self.is_unicode:
  285. return space.call_function(space.w_unicode, w_obj)
  286. return space.str(w_obj)
  287. else:
  288. raise oefmt(space.w_ValueError, "invalid conversion")
  289. def _render_field(self, start, end, recursive, level):
  290. name, conversion, spec_start = self._parse_field(start, end)
  291. spec = self.template[spec_start:end]
  292. #
  293. if self.parser_list_w is not None:
  294. # used from formatter_parser()
  295. if level == 1: # ignore recursive calls
  296. space = self.space
  297. startm1 = start - 1
  298. assert startm1 >= self.last_end
  299. w_entry = space.newtuple([
  300. space.wrap(self.template[self.last_end:startm1]),
  301. space.wrap(name),
  302. space.wrap(spec),
  303. space.wrap(conversion)])
  304. self.parser_list_w.append(w_entry)
  305. self.last_end = end + 1
  306. return self.empty
  307. #
  308. w_obj = self._get_argument(name)
  309. if conversion is not None:
  310. w_obj = self._convert(w_obj, conversion)
  311. if recursive:
  312. spec = self._build_string(spec_start, end, level)
  313. w_rendered = self.space.format(w_obj, self.space.wrap(spec))
  314. unwrapper = "unicode_w" if self.is_unicode else "str_w"
  315. to_interp = getattr(self.space, unwrapper)
  316. return to_interp(w_rendered)
  317. def formatter_parser(self):
  318. self.parser_list_w = []
  319. self.last_end = 0
  320. self._build_string(0, len(self.template), 2)
  321. #
  322. space = self.space
  323. if self.last_end < len(self.template):
  324. w_lastentry = space.newtuple([
  325. space.wrap(self.template[self.last_end:]),
  326. space.w_None,
  327. space.w_None,
  328. space.w_None])
  329. self.parser_list_w.append(w_lastentry)
  330. return space.iter(space.newlist(self.parser_list_w))
  331. return TemplateFormatter
  332. StrTemplateFormatter = make_template_formatting_class()
  333. UnicodeTemplateFormatter = make_template_formatting_class()
  334. def str_template_formatter(space, template):
  335. return StrTemplateFormatter(space, False, template)
  336. def unicode_template_formatter(space, template):
  337. return UnicodeTemplateFormatter(space, True, template)
  338. def format_method(space, w_string, args, is_unicode):
  339. if is_unicode:
  340. template = unicode_template_formatter(space,
  341. space.unicode_w(w_string))
  342. return space.wrap(template.build(args))
  343. else:
  344. template = str_template_formatter(space, space.str_w(w_string))
  345. return space.wrap(template.build(args))
  346. class NumberSpec(object):
  347. pass
  348. class BaseFormatter(object):
  349. def format_int_or_long(self, w_num, kind):
  350. raise NotImplementedError
  351. def format_float(self, w_num):
  352. raise NotImplementedError
  353. def format_complex(self, w_num):
  354. raise NotImplementedError
  355. INT_KIND = 1
  356. LONG_KIND = 2
  357. NO_LOCALE = 1
  358. DEFAULT_LOCALE = 2
  359. CURRENT_LOCALE = 3
  360. LONG_DIGITS = string.digits + string.ascii_lowercase
  361. def make_formatting_class():
  362. class Formatter(BaseFormatter):
  363. """__format__ implementation for builtin types."""
  364. _grouped_digits = None
  365. def __init__(self, space, is_unicode, spec):
  366. self.space = space
  367. self.is_unicode = is_unicode
  368. self.empty = u"" if is_unicode else ""
  369. self.spec = spec
  370. def _is_alignment(self, c):
  371. return (c == "<" or
  372. c == ">" or
  373. c == "=" or
  374. c == "^")
  375. def _is_sign(self, c):
  376. return (c == " " or
  377. c == "+" or
  378. c == "-")
  379. def _parse_spec(self, default_type, default_align):
  380. space = self.space
  381. self._fill_char = self._lit(" ")[0]
  382. self._align = default_align
  383. self._alternate = False
  384. self._sign = "\0"
  385. self._thousands_sep = False
  386. self._precision = -1
  387. the_type = default_type
  388. spec = self.spec
  389. if not spec:
  390. return True
  391. length = len(spec)
  392. i = 0
  393. got_align = True
  394. got_fill_char = False
  395. if length - i >= 2 and self._is_alignment(spec[i + 1]):
  396. self._align = spec[i + 1]
  397. self._fill_char = spec[i]
  398. got_fill_char = True
  399. i += 2
  400. elif length - i >= 1 and self._is_alignment(spec[i]):
  401. self._align = spec[i]
  402. i += 1
  403. else:
  404. got_align = False
  405. if length - i >= 1 and self._is_sign(spec[i]):
  406. self._sign = spec[i]
  407. i += 1
  408. if length - i >= 1 and spec[i] == "#":
  409. self._alternate = True
  410. i += 1
  411. if not got_fill_char and length - i >= 1 and spec[i] == "0":
  412. self._fill_char = self._lit("0")[0]
  413. if not got_align:
  414. self._align = "="
  415. i += 1
  416. self._width, i = _parse_int(self.space, spec, i, length)
  417. if length != i and spec[i] == ",":
  418. self._thousands_sep = True
  419. i += 1
  420. if length != i and spec[i] == ".":
  421. i += 1
  422. self._precision, i = _parse_int(self.space, spec, i, length)
  423. if self._precision == -1:
  424. raise oefmt(space.w_ValueError, "no precision given")
  425. if length - i > 1:
  426. raise oefmt(space.w_ValueError, "invalid format spec")
  427. if length - i == 1:
  428. presentation_type = spec[i]
  429. if self.is_unicode:
  430. try:
  431. the_type = spec[i].encode("ascii")[0]
  432. except UnicodeEncodeError:
  433. raise oefmt(space.w_ValueError,
  434. "invalid presentation type")
  435. else:
  436. the_type = presentation_type
  437. i += 1
  438. self._type = the_type
  439. if self._thousands_sep:
  440. tp = self._type
  441. if (tp == "d" or
  442. tp == "e" or
  443. tp == "f" or
  444. tp == "g" or
  445. tp == "E" or
  446. tp == "G" or
  447. tp == "%" or
  448. tp == "F" or
  449. tp == "\0"):
  450. # ok
  451. pass
  452. else:
  453. raise oefmt(space.w_ValueError, "invalid type with ','")
  454. return False
  455. def _calc_padding(self, string, length):
  456. """compute left and right padding, return total width of string"""
  457. if self._width != -1 and length < self._width:
  458. total = self._width
  459. else:
  460. total = length
  461. align = self._align
  462. if align == ">":
  463. left = total - length
  464. elif align == "^":
  465. left = (total - length) / 2
  466. elif align == "<" or align == "=":
  467. left = 0
  468. else:
  469. raise AssertionError("shouldn't be here")
  470. right = total - length - left
  471. self._left_pad = left
  472. self._right_pad = right
  473. return total
  474. def _lit(self, s):
  475. if self.is_unicode:
  476. return s.decode("latin-1")
  477. else:
  478. return s
  479. def _pad(self, string):
  480. builder = self._builder()
  481. builder.append_multiple_char(self._fill_char, self._left_pad)
  482. builder.append(string)
  483. builder.append_multiple_char(self._fill_char, self._right_pad)
  484. return builder.build()
  485. def _builder(self):
  486. if self.is_unicode:
  487. return rstring.UnicodeBuilder()
  488. else:
  489. return rstring.StringBuilder()
  490. def _unknown_presentation(self, tp):
  491. raise oefmt(self.space.w_ValueError,
  492. "unknown presentation for %s: '%s'", tp, self._type)
  493. def format_string(self, string):
  494. space = self.space
  495. if self._parse_spec("s", "<"):
  496. return space.wrap(string)
  497. if self._type != "s":
  498. self._unknown_presentation("string")
  499. if self._sign != "\0":
  500. raise oefmt(space.w_ValueError,
  501. "Sign not allowed in string format specifier")
  502. if self._alternate:
  503. raise oefmt(space.w_ValueError,
  504. "Alternate form (#) not allowed in string format "
  505. "specifier")
  506. if self._align == "=":
  507. raise oefmt(space.w_ValueError,
  508. "'=' alignment not allowed in string format "
  509. "specifier")
  510. length = len(string)
  511. precision = self._precision
  512. if precision != -1 and length >= precision:
  513. assert precision >= 0
  514. length = precision
  515. string = string[:precision]
  516. self._calc_padding(string, length)
  517. return space.wrap(self._pad(string))
  518. def _get_locale(self, tp):
  519. if tp == "n":
  520. dec, thousands, grouping = rlocale.numeric_formatting()
  521. elif self._thousands_sep:
  522. dec = "."
  523. thousands = ","
  524. grouping = "\3"
  525. else:
  526. dec = "."
  527. thousands = ""
  528. grouping = "\xFF" # special value to mean 'stop'
  529. if self.is_unicode:
  530. self._loc_dec = dec.decode("latin-1")
  531. self._loc_thousands = thousands.decode("latin-1")
  532. else:
  533. self._loc_dec = dec
  534. self._loc_thousands = thousands
  535. self._loc_grouping = grouping
  536. def _calc_num_width(self, n_prefix, sign_char, to_number, n_number,
  537. n_remainder, has_dec, digits):
  538. """Calculate widths of all parts of formatted number.
  539. Output will look like:
  540. <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal>
  541. <remainder> <rpadding>
  542. sign is computed from self._sign, and the sign of the number
  543. prefix is given
  544. digits is known
  545. """
  546. spec = NumberSpec()
  547. spec.n_digits = n_number - n_remainder - has_dec
  548. spec.n_prefix = n_prefix
  549. spec.n_lpadding = 0
  550. spec.n_decimal = int(has_dec)
  551. spec.n_remainder = n_remainder
  552. spec.n_spadding = 0
  553. spec.n_rpadding = 0
  554. spec.n_min_width = 0
  555. spec.n_total = 0
  556. spec.sign = "\0"
  557. spec.n_sign = 0
  558. sign = self._sign
  559. if sign == "+":
  560. spec.n_sign = 1
  561. spec.sign = "-" if sign_char == "-" else "+"
  562. elif sign == " ":
  563. spec.n_sign = 1
  564. spec.sign = "-" if sign_char == "-" else " "
  565. elif sign_char == "-":
  566. spec.n_sign = 1
  567. spec.sign = "-"
  568. extra_length = (spec.n_sign + spec.n_prefix + spec.n_decimal +
  569. spec.n_remainder) # Not padding or digits
  570. if self._fill_char == "0" and self._align == "=":
  571. spec.n_min_width = self._width - extra_length
  572. if self._loc_thousands:
  573. self._group_digits(spec, digits[to_number:])
  574. n_grouped_digits = len(self._grouped_digits)
  575. else:
  576. n_grouped_digits = spec.n_digits
  577. n_padding = self._width - (extra_length + n_grouped_digits)
  578. if n_padding > 0:
  579. align = self._align
  580. if align == "<":
  581. spec.n_rpadding = n_padding
  582. elif align == ">":
  583. spec.n_lpadding = n_padding
  584. elif align == "^":
  585. spec.n_lpadding = n_padding // 2
  586. spec.n_rpadding = n_padding - spec.n_lpadding
  587. elif align == "=":
  588. spec.n_spadding = n_padding
  589. else:
  590. raise AssertionError("shouldn't reach")
  591. spec.n_total = spec.n_lpadding + spec.n_sign + spec.n_prefix + \
  592. spec.n_spadding + n_grouped_digits + \
  593. spec.n_decimal + spec.n_remainder + spec.n_rpadding
  594. return spec
  595. def _fill_digits(self, buf, digits, d_state, n_chars, n_zeros,
  596. thousands_sep):
  597. if thousands_sep:
  598. for c in thousands_sep:
  599. buf.append(c)
  600. for i in range(d_state - 1, d_state - n_chars - 1, -1):
  601. buf.append(digits[i])
  602. for i in range(n_zeros):
  603. buf.append("0")
  604. def _group_digits(self, spec, digits):
  605. buf = []
  606. grouping = self._loc_grouping
  607. min_width = spec.n_min_width
  608. grouping_state = 0
  609. left = spec.n_digits
  610. n_ts = len(self._loc_thousands)
  611. need_separator = False
  612. done = False
  613. previous = 0
  614. while True:
  615. if grouping_state >= len(grouping):
  616. group = previous # end of string
  617. else:
  618. # else, get the next value from the string
  619. group = ord(grouping[grouping_state])
  620. if group == 0xFF: # special value to mean 'stop'
  621. break
  622. grouping_state += 1
  623. previous = group
  624. #
  625. final_grouping = min(group, max(left, max(min_width, 1)))
  626. n_zeros = max(0, final_grouping - left)
  627. n_chars = max(0, min(left, final_grouping))
  628. ts = self._loc_thousands if need_separator else None
  629. self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
  630. need_separator = True
  631. left -= n_chars
  632. min_width -= final_grouping
  633. if left <= 0 and min_width <= 0:
  634. done = True
  635. break
  636. min_width -= n_ts
  637. if not done:
  638. group = max(max(left, min_width), 1)
  639. n_zeros = max(0, group - left)
  640. n_chars = max(0, min(left, group))
  641. ts = self._loc_thousands if need_separator else None
  642. self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
  643. buf.reverse()
  644. self._grouped_digits = self.empty.join(buf)
  645. def _upcase_string(self, s):
  646. buf = []
  647. for c in s:
  648. index = ord(c)
  649. if ord("a") <= index <= ord("z"):
  650. c = chr(index - 32)
  651. buf.append(c)
  652. return self.empty.join(buf)
  653. def _fill_number(self, spec, num, to_digits, to_prefix, fill_char,
  654. to_remainder, upper, grouped_digits=None):
  655. out = self._builder()
  656. if spec.n_lpadding:
  657. out.append_multiple_char(fill_char[0], spec.n_lpadding)
  658. if spec.n_sign:
  659. if self.is_unicode:
  660. sign = spec.sign.decode("latin-1")
  661. else:
  662. sign = spec.sign
  663. out.append(sign)
  664. if spec.n_prefix:
  665. pref = num[to_prefix:to_prefix + spec.n_prefix]
  666. if upper:
  667. pref = self._upcase_string(pref)
  668. out.append(pref)
  669. if spec.n_spadding:
  670. out.append_multiple_char(fill_char[0], spec.n_spadding)
  671. if spec.n_digits != 0:
  672. if self._loc_thousands:
  673. if grouped_digits is not None:
  674. digits = grouped_digits
  675. else:
  676. digits = self._grouped_digits
  677. assert digits is not None
  678. else:
  679. stop = to_digits + spec.n_digits
  680. assert stop >= 0
  681. digits = num[to_digits:stop]
  682. if upper:
  683. digits = self._upcase_string(digits)
  684. out.append(digits)
  685. if spec.n_decimal:
  686. out.append(self._lit(".")[0])
  687. if spec.n_remainder:
  688. out.append(num[to_remainder:])
  689. if spec.n_rpadding:
  690. out.append_multiple_char(fill_char[0], spec.n_rpadding)
  691. #if complex, need to call twice - just retun the buffer
  692. return out.build()
  693. def _format_int_or_long(self, w_num, kind):
  694. space = self.space
  695. if self._precision != -1:
  696. raise oefmt(space.w_ValueError,
  697. "precision not allowed in integer type")
  698. sign_char = "\0"
  699. tp = self._type
  700. if tp == "c":
  701. if self._sign != "\0":
  702. raise oefmt(space.w_ValueError,
  703. "sign not allowed with 'c' presentation type")
  704. value = space.int_w(w_num)
  705. if self.is_unicode:
  706. result = runicode.UNICHR(value)
  707. else:
  708. result = chr(value)
  709. n_digits = 1
  710. n_remainder = 1
  711. to_remainder = 0
  712. n_prefix = 0
  713. to_prefix = 0
  714. to_numeric = 0
  715. else:
  716. if tp == "b":
  717. base = 2
  718. skip_leading = 2
  719. elif tp == "o":
  720. base = 8
  721. skip_leading = 2
  722. elif tp == "x" or tp == "X":
  723. base = 16
  724. skip_leading = 2
  725. elif tp == "n" or tp == "d":
  726. base = 10
  727. skip_leading = 0
  728. else:
  729. raise AssertionError("shouldn't reach")
  730. if kind == INT_KIND:
  731. result = self._int_to_base(base, space.int_w(w_num))
  732. else:
  733. result = self._long_to_base(base, space.bigint_w(w_num))
  734. n_prefix = skip_leading if self._alternate else 0
  735. to_prefix = 0
  736. if result[0] == "-":
  737. sign_char = "-"
  738. skip_leading += 1
  739. to_prefix += 1
  740. n_digits = len(result) - skip_leading
  741. n_remainder = 0
  742. to_remainder = 0
  743. to_numeric = skip_leading
  744. self._get_locale(tp)
  745. spec = self._calc_num_width(n_prefix, sign_char, to_numeric, n_digits,
  746. n_remainder, False, result)
  747. fill = self._fill_char
  748. upper = self._type == "X"
  749. return self.space.wrap(self._fill_number(spec, result, to_numeric,
  750. to_prefix, fill, to_remainder, upper))
  751. def _long_to_base(self, base, value):
  752. prefix = ""
  753. if base == 2:
  754. prefix = "0b"
  755. elif base == 8:
  756. prefix = "0o"
  757. elif base == 16:
  758. prefix = "0x"
  759. as_str = value.format(LONG_DIGITS[:base], prefix)
  760. if self.is_unicode:
  761. return as_str.decode("latin-1")
  762. return as_str
  763. def _int_to_base(self, base, value):
  764. if base == 10:
  765. s = str(value)
  766. if self.is_unicode:
  767. return s.decode("latin-1")
  768. return s
  769. # This part is slow.
  770. negative = value < 0
  771. base = r_uint(base)
  772. value = r_uint(value)
  773. if negative: # change the sign on the unsigned number: otherwise,
  774. value = -value # we'd risk overflow if value==-sys.maxint-1
  775. #
  776. buf = ["\0"] * (8 * 8 + 6) # Too much on 32 bit, but who cares?
  777. i = len(buf) - 1
  778. while True:
  779. div = value // base # unsigned
  780. mod = value - div * base # unsigned, always in range(0,base)
  781. digit = intmask(mod)
  782. digit += ord("0") if digit < 10 else ord("a") - 10
  783. buf[i] = chr(digit)
  784. value = div # unsigned
  785. i -= 1
  786. if not value:
  787. break
  788. if base == r_uint(2):
  789. buf[i] = "b"
  790. buf[i - 1] = "0"
  791. elif base == r_uint(8):
  792. buf[i] = "o"
  793. buf[i - 1] = "0"
  794. elif base == r_uint(16):
  795. buf[i] = "x"
  796. buf[i - 1] = "0"
  797. else:
  798. buf[i] = "#"
  799. buf[i - 1] = chr(ord("0") + intmask(base % r_uint(10)))
  800. if base > r_uint(10):
  801. buf[i - 2] = chr(ord("0") + intmask(base // r_uint(10)))
  802. i -= 1
  803. i -= 1
  804. if negative:
  805. i -= 1
  806. buf[i] = "-"
  807. assert i >= 0
  808. return self.empty.join(buf[i:])
  809. def format_int_or_long(self, w_num, kind):
  810. space = self.space
  811. if self._parse_spec("d", ">"):
  812. if self.is_unicode:
  813. return space.call_function(space.w_unicode, w_num)
  814. return self.space.str(w_num)
  815. tp = self._type
  816. if (tp == "b" or
  817. tp == "c" or
  818. tp == "d" or
  819. tp == "o" or
  820. tp == "x" or
  821. tp == "X" or
  822. tp == "n"):
  823. return self._format_int_or_long(w_num, kind)
  824. elif (tp == "e" or
  825. tp == "E" or
  826. tp == "f" or
  827. tp == "F" or
  828. tp == "g" or
  829. tp == "G" or
  830. tp == "%"):
  831. w_float = space.float(w_num)
  832. return self._format_float(w_float)
  833. else:
  834. self._unknown_presentation("int" if kind == INT_KIND else "long")
  835. def _parse_number(self, s, i):
  836. """Determine if s has a decimal point, and the index of the first #
  837. after the decimal, or the end of the number."""
  838. length = len(s)
  839. while i < length and "0" <= s[i] <= "9":
  840. i += 1
  841. rest = i
  842. dec_point = i < length and s[i] == "."
  843. if dec_point:
  844. rest += 1
  845. #differs from CPython method - CPython sets n_remainder
  846. return dec_point, rest
  847. def _format_float(self, w_float):
  848. """helper for format_float"""
  849. space = self.space
  850. flags = 0
  851. default_precision = 6
  852. if self._alternate:
  853. raise oefmt(space.w_ValueError,
  854. "Alternate form (#) not allowed in float formats")
  855. tp = self._type
  856. self._get_locale(tp)
  857. if tp == "\0":
  858. tp = "g"
  859. default_precision = 12
  860. flags |= rfloat.DTSF_ADD_DOT_0
  861. elif tp == "n":
  862. tp = "g"
  863. value = space.float_w(w_float)
  864. if tp == "%":
  865. tp = "f"
  866. value *= 100
  867. add_pct = True
  868. else:
  869. add_pct = False
  870. if self._precision == -1:
  871. self._precision = default_precision
  872. result, special = rfloat.double_to_string(value, tp,
  873. self._precision, flags)
  874. if add_pct:
  875. result += "%"
  876. n_digits = len(result)
  877. if result[0] == "-":
  878. sign = "-"
  879. to_number = 1
  880. n_digits -= 1
  881. else:
  882. sign = "\0"
  883. to_number = 0
  884. have_dec_point, to_remainder = self._parse_number(result, to_number)
  885. n_remainder = len(result) - to_remainder
  886. if self.is_unicode:
  887. digits = result.decode("latin-1")
  888. else:
  889. digits = result
  890. spec = self._calc_num_width(0, sign, to_number, n_digits,
  891. n_remainder, have_dec_point, digits)
  892. fill = self._fill_char
  893. return self.space.wrap(self._fill_number(spec, digits, to_number, 0,
  894. fill, to_remainder, False))
  895. def format_float(self, w_float):
  896. space = self.space
  897. if self._parse_spec("\0", ">"):
  898. if self.is_unicode:
  899. return space.call_function(space.w_unicode, w_float)
  900. return space.str(w_float)
  901. tp = self._type
  902. if (tp == "\0" or
  903. tp == "e" or
  904. tp == "E" or
  905. tp == "f" or
  906. tp == "F" or
  907. tp == "g" or
  908. tp == "G" or
  909. tp == "n" or
  910. tp == "%"):
  911. return self._format_float(w_float)
  912. self._unknown_presentation("float")
  913. def _format_complex(self, w_complex):
  914. space = self.space
  915. tp = self._type
  916. self._get_locale(tp)
  917. default_precision = 6
  918. if self._align == "=":
  919. # '=' alignment is invalid
  920. raise oefmt(space.w_ValueError,
  921. "'=' alignment flag is not allowed in complex "
  922. "format specifier")
  923. if self._fill_char == "0":
  924. # zero padding is invalid
  925. raise oefmt(space.w_ValueError,
  926. "Zero padding is not allowed in complex format "
  927. "specifier")
  928. if self._alternate:
  929. # alternate is invalid
  930. raise oefmt(space.w_ValueError,
  931. "Alternate form (#) not allowed in complex format "
  932. "specifier")
  933. skip_re = 0
  934. add_parens = 0
  935. if tp == "\0":
  936. #should mirror str() output
  937. tp = "g"
  938. default_precision = 12
  939. #test if real part is non-zero
  940. if (w_complex.realval == 0 and
  941. copysign(1., w_complex.realval) == 1.):
  942. skip_re = 1
  943. else:
  944. add_parens = 1
  945. if tp == "n":
  946. #same as 'g' except for locale, taken care of later
  947. tp = "g"
  948. #check if precision not set
  949. if self._precision == -1:
  950. self._precision = default_precision
  951. #might want to switch to double_to_string from formatd
  952. #in CPython it's named 're' - clashes with re module
  953. re_num = formatd(w_complex.realval, tp, self._precision)
  954. im_num = formatd(w_complex.imagval, tp, self._precision)
  955. n_re_digits = len(re_num)
  956. n_im_digits = len(im_num)
  957. to_real_number = 0
  958. to_imag_number = 0
  959. re_sign = im_sign = ''
  960. #if a sign character is in the output, remember it and skip
  961. if re_num[0] == "-":
  962. re_sign = "-"
  963. to_real_number = 1
  964. n_re_digits -= 1
  965. if im_num[0] == "-":
  966. im_sign = "-"
  967. to_imag_number = 1
  968. n_im_digits -= 1
  969. #turn off padding - do it after number composition
  970. #calc_num_width uses self._width, so assign to temporary variable,
  971. #calculate width of real and imag parts, then reassign padding, align
  972. tmp_fill_char = self._fill_char
  973. tmp_align = self._align
  974. tmp_width = self._width
  975. self._fill_char = "\0"
  976. self._align = "<"
  977. self._width = -1
  978. #determine if we have remainder, might include dec or exponent or both
  979. re_have_dec, re_remainder_ptr = self._parse_number(re_num,
  980. to_real_number)
  981. im_have_dec, im_remainder_ptr = self._parse_number(im_num,
  982. to_imag_number)
  983. if self.is_unicode:
  984. re_num = re_num.decode("latin-1")
  985. im_num = im_num.decode("latin-1")
  986. #set remainder, in CPython _parse_number sets this
  987. #using n_re_digits causes tests to fail
  988. re_n_remainder = len(re_num) - re_remainder_ptr
  989. im_n_remainder = len(im_num) - im_remainder_ptr
  990. re_spec = self._calc_num_width(0, re_sign, to_real_number, n_re_digits,
  991. re_n_remainder, re_have_dec,
  992. re_num)
  993. #capture grouped digits b/c _fill_number reads from self._grouped_digits
  994. #self._grouped_digits will get overwritten in imaginary calc_num_width
  995. re_grouped_digits = self._grouped_digits
  996. if not skip_re:
  997. self._sign = "+"
  998. im_spec = self._calc_num_width(0, im_sign, to_imag_number, n_im_digits,
  999. im_n_remainder, im_have_dec,
  1000. im_num)
  1001. im_grouped_digits = self._grouped_digits
  1002. if skip_re:
  1003. re_spec.n_total = 0
  1004. #reassign width, alignment, fill character
  1005. self._align = tmp_align
  1006. self._width = tmp_width
  1007. self._fill_char = tmp_fill_char
  1008. #compute L and R padding - stored in self._left_pad and self._right_pad
  1009. self._calc_padding(self.empty, re_spec.n_total + im_spec.n_total + 1 +
  1010. add_parens * 2)
  1011. out = self._builder()
  1012. fill = self._fill_char
  1013. #compose the string
  1014. #add left padding
  1015. out.append_multiple_char(fill, self._left_pad)
  1016. if add_parens:
  1017. out.append(self._lit('(')[0])
  1018. #if the no. has a real component, add it
  1019. if not skip_re:
  1020. out.append(self._fill_number(re_spec, re_num, to_real_number, 0,
  1021. fill, re_remainder_ptr, False,
  1022. re_grouped_digits))
  1023. #add imaginary component
  1024. out.append(self._fill_number(im_spec, im_num, to_imag_number, 0,
  1025. fill, im_remainder_ptr, False,
  1026. im_grouped_digits))
  1027. #add 'j' character
  1028. out.append(self._lit('j')[0])
  1029. if add_parens:
  1030. out.append(self._lit(')')[0])
  1031. #add right padding
  1032. out.append_multiple_char(fill, self._right_pad)
  1033. return self.space.wrap(out.build())
  1034. def format_complex(self, w_complex):
  1035. """return the string representation of a complex number"""
  1036. space = self.space
  1037. #parse format specification, set associated variables
  1038. if self._parse_spec("\0", ">"):
  1039. return space.str(w_complex)
  1040. tp = self._type
  1041. if (tp == "\0" or
  1042. tp == "e" or
  1043. tp == "E" or
  1044. tp == "f" or
  1045. tp == "F" or
  1046. tp == "g" or
  1047. tp == "G" or
  1048. tp == "n"):
  1049. return self._format_complex(w_complex)
  1050. self._unknown_presentation("complex")
  1051. return Formatter
  1052. StrFormatter = make_formatting_class()
  1053. UnicodeFormatter = make_formatting_class()
  1054. def unicode_formatter(space, spec):
  1055. return StrFormatter(space, True, spec)
  1056. def str_formatter(space, spec):
  1057. return UnicodeFormatter(space, False, spec)
  1058. @specialize.arg(2)
  1059. def run_formatter(space, w_format_spec, meth, *args):
  1060. if space.isinstance_w(w_format_spec, space.w_unicode):
  1061. formatter = unicode_formatter(space, space.unicode_w(w_format_spec))
  1062. return getattr(formatter, meth)(*args)
  1063. else:
  1064. formatter = str_formatter(space, space.str_w(w_format_spec))
  1065. return getattr(formatter, meth)(*args)