PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/rtyper/lltypesystem/rbuilder.py

https://bitbucket.org/pypy/pypy/
Python | 453 lines | 317 code | 53 blank | 83 comment | 36 complexity | baa488bba315457b3aa0f7cde44cfafc MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.rlib import rgc, jit
  2. from rpython.rlib.objectmodel import enforceargs, dont_inline, always_inline
  3. from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask
  4. from rpython.rtyper.debug import ll_assert
  5. from rpython.rlib.unroll import unrolling_iterable
  6. from rpython.rtyper.rptr import PtrRepr
  7. from rpython.rtyper.lltypesystem import lltype, rffi, rstr
  8. from rpython.rtyper.lltypesystem.lltype import staticAdtMethod, nullptr
  9. from rpython.rtyper.lltypesystem.rstr import (STR, UNICODE, char_repr,
  10. string_repr, unichar_repr, unicode_repr)
  11. from rpython.rtyper.rbuilder import AbstractStringBuilderRepr
  12. from rpython.tool.sourcetools import func_with_new_name
  13. from rpython.rtyper.annlowlevel import llstr, llunicode
  14. # ------------------------------------------------------------
  15. # Basic idea:
  16. #
  17. # - A StringBuilder has a rstr.STR of the specified initial size
  18. # (100 by default), which is filled gradually.
  19. #
  20. # - When it is full, we allocate extra buffers as an extra rstr.STR,
  21. # and the already-filled one is added to a chained list of STRINGPIECE
  22. # objects.
  23. #
  24. # - At build() time, we consolidate all these pieces into a single
  25. # rstr.STR, which is both returned and re-attached to the StringBuilder,
  26. # replacing the STRINGPIECEs.
  27. #
  28. # - The data is copied at most twice, and only once in case it fits
  29. # into the initial size (and the GC supports shrinking the STR).
  30. #
  31. # XXX in build(), we could try keeping around a global weakref to the
  32. # chain of STRINGPIECEs and reuse them the next time.
  33. #
  34. # ------------------------------------------------------------
  35. STRINGPIECE = lltype.GcStruct('stringpiece',
  36. ('buf', lltype.Ptr(STR)),
  37. ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
  38. STRINGPIECE.prev_piece.TO.become(STRINGPIECE)
  39. STRINGBUILDER = lltype.GcStruct('stringbuilder',
  40. ('current_buf', lltype.Ptr(STR)),
  41. ('current_pos', lltype.Signed),
  42. ('current_end', lltype.Signed),
  43. ('total_size', lltype.Signed),
  44. ('extra_pieces', lltype.Ptr(STRINGPIECE)),
  45. adtmeths={
  46. 'copy_string_contents': staticAdtMethod(rstr.copy_string_contents),
  47. 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_string),
  48. 'mallocfn': staticAdtMethod(rstr.mallocstr),
  49. }
  50. )
  51. UNICODEPIECE = lltype.GcStruct('unicodepiece',
  52. ('buf', lltype.Ptr(UNICODE)),
  53. ('prev_piece', lltype.Ptr(lltype.GcForwardReference())))
  54. UNICODEPIECE.prev_piece.TO.become(UNICODEPIECE)
  55. UNICODEBUILDER = lltype.GcStruct('unicodebuilder',
  56. ('current_buf', lltype.Ptr(UNICODE)),
  57. ('current_pos', lltype.Signed),
  58. ('current_end', lltype.Signed),
  59. ('total_size', lltype.Signed),
  60. ('extra_pieces', lltype.Ptr(UNICODEPIECE)),
  61. adtmeths={
  62. 'copy_string_contents': staticAdtMethod(rstr.copy_unicode_contents),
  63. 'copy_raw_to_string': staticAdtMethod(rstr.copy_raw_to_unicode),
  64. 'mallocfn': staticAdtMethod(rstr.mallocunicode),
  65. }
  66. )
  67. # ------------------------------------------------------------
  68. # The generic piece of code to append a string (or a slice of it)
  69. # to a builder; it is inlined inside various functions below
  70. @always_inline
  71. def _ll_append(ll_builder, ll_str, start, size):
  72. pos = ll_builder.current_pos
  73. end = ll_builder.current_end
  74. if (end - pos) < size:
  75. ll_grow_and_append(ll_builder, ll_str, start, size)
  76. else:
  77. ll_builder.current_pos = pos + size
  78. ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
  79. start, pos, size)
  80. # ------------------------------------------------------------
  81. # Logic to grow a builder (by adding a new string to it)
  82. @dont_inline
  83. @enforceargs(None, int)
  84. def ll_grow_by(ll_builder, needed):
  85. try:
  86. needed = ovfcheck(needed + ll_builder.total_size)
  87. needed = ovfcheck(needed + 63) & ~63
  88. total_size = ll_builder.total_size + needed
  89. except OverflowError:
  90. raise MemoryError
  91. #
  92. new_string = ll_builder.mallocfn(needed)
  93. #
  94. PIECE = lltype.typeOf(ll_builder.extra_pieces).TO
  95. old_piece = lltype.malloc(PIECE)
  96. old_piece.buf = ll_builder.current_buf
  97. old_piece.prev_piece = ll_builder.extra_pieces
  98. ll_assert(bool(old_piece.buf), "no buf??")
  99. ll_builder.current_buf = new_string
  100. ll_builder.current_pos = 0
  101. ll_builder.current_end = needed
  102. ll_builder.total_size = total_size
  103. ll_builder.extra_pieces = old_piece
  104. @dont_inline
  105. def ll_grow_and_append(ll_builder, ll_str, start, size):
  106. # First, the part that still fits in the current piece
  107. part1 = ll_builder.current_end - ll_builder.current_pos
  108. ll_assert(part1 < size, "part1 >= size")
  109. ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
  110. start, ll_builder.current_pos,
  111. part1)
  112. start += part1
  113. size -= part1
  114. # Allocate the new piece
  115. ll_grow_by(ll_builder, size)
  116. ll_assert(ll_builder.current_pos == 0, "current_pos must be 0 after grow()")
  117. # Finally, the second part of the string
  118. ll_builder.current_pos = size
  119. ll_builder.copy_string_contents(ll_str, ll_builder.current_buf,
  120. start, 0, size)
  121. # ------------------------------------------------------------
  122. # builder.append()
  123. @always_inline
  124. def ll_append(ll_builder, ll_str):
  125. if jit.we_are_jitted():
  126. ll_jit_append(ll_builder, ll_str)
  127. else:
  128. # no-jit case: inline the logic of _ll_append() in the caller
  129. _ll_append(ll_builder, ll_str, 0, len(ll_str.chars))
  130. @dont_inline
  131. def ll_jit_append(ll_builder, ll_str):
  132. # jit case: first try special cases for known small lengths
  133. if ll_jit_try_append_slice(ll_builder, ll_str, 0, len(ll_str.chars)):
  134. return
  135. # fall-back to do a residual call to ll_append_res0
  136. ll_append_res0(ll_builder, ll_str)
  137. @jit.dont_look_inside
  138. def ll_append_res0(ll_builder, ll_str):
  139. _ll_append(ll_builder, ll_str, 0, len(ll_str.chars))
  140. # ------------------------------------------------------------
  141. # builder.append_char()
  142. @always_inline
  143. def ll_append_char(ll_builder, char):
  144. jit.conditional_call(ll_builder.current_pos == ll_builder.current_end,
  145. ll_grow_by, ll_builder, 1)
  146. pos = ll_builder.current_pos
  147. ll_builder.current_pos = pos + 1
  148. ll_builder.current_buf.chars[pos] = char
  149. # ------------------------------------------------------------
  150. # builder.append_slice()
  151. @always_inline
  152. def ll_append_slice(ll_builder, ll_str, start, end):
  153. if jit.we_are_jitted():
  154. ll_jit_append_slice(ll_builder, ll_str, start, end)
  155. else:
  156. # no-jit case: inline the logic of _ll_append() in the caller
  157. _ll_append(ll_builder, ll_str, start, end - start)
  158. @dont_inline
  159. def ll_jit_append_slice(ll_builder, ll_str, start, end):
  160. # jit case: first try special cases for known small lengths
  161. if ll_jit_try_append_slice(ll_builder, ll_str, start, end - start):
  162. return
  163. # fall-back to do a residual call to ll_append_res_slice
  164. ll_append_res_slice(ll_builder, ll_str, start, end)
  165. @jit.dont_look_inside
  166. def ll_append_res_slice(ll_builder, ll_str, start, end):
  167. _ll_append(ll_builder, ll_str, start, end - start)
  168. # ------------------------------------------------------------
  169. # Special-casing for the JIT: appending strings (or slices) of
  170. # a known length up to MAX_N. These functions all contain an
  171. # inlined copy of _ll_append(), but with a known small N, gcc
  172. # will compile the copy_string_contents() efficiently.
  173. MAX_N = 10
  174. def make_func_for_size(N):
  175. @jit.dont_look_inside
  176. def ll_append_0(ll_builder, ll_str):
  177. _ll_append(ll_builder, ll_str, 0, N)
  178. ll_append_0 = func_with_new_name(ll_append_0, "ll_append_0_%d" % N)
  179. #
  180. @jit.dont_look_inside
  181. def ll_append_start(ll_builder, ll_str, start):
  182. _ll_append(ll_builder, ll_str, start, N)
  183. ll_append_start = func_with_new_name(ll_append_start,
  184. "ll_append_start_%d" % N)
  185. return ll_append_0, ll_append_start, N
  186. unroll_func_for_size = unrolling_iterable([make_func_for_size(_n)
  187. for _n in range(2, MAX_N + 1)])
  188. @jit.unroll_safe
  189. def ll_jit_try_append_slice(ll_builder, ll_str, start, size):
  190. if jit.isconstant(size):
  191. if size == 0:
  192. return True
  193. # a special case: if the builder's pos and end are still contants
  194. # (typically if the builder is still virtual), and if 'size' fits,
  195. # then we don't need any reallocation and can just set the
  196. # characters in the buffer, in a way that won't force anything.
  197. if (jit.isconstant(ll_builder.current_pos) and
  198. jit.isconstant(ll_builder.current_end) and
  199. size <= (ll_builder.current_end - ll_builder.current_pos) and
  200. size <= 16):
  201. pos = ll_builder.current_pos
  202. buf = ll_builder.current_buf
  203. stop = pos + size
  204. ll_builder.current_pos = stop
  205. while pos < stop:
  206. buf.chars[pos] = ll_str.chars[start]
  207. pos += 1
  208. start += 1
  209. return True
  210. # turn appends of length 1 into ll_append_char().
  211. if size == 1:
  212. ll_append_char(ll_builder, ll_str.chars[start])
  213. return True
  214. # turn appends of length 2 to 10 into residual calls to
  215. # specialized functions, for the lengths 2 to 10, where
  216. # gcc will optimize the known-length copy_string_contents()
  217. # as much as possible.
  218. for func0, funcstart, for_size in unroll_func_for_size:
  219. if size == for_size:
  220. if jit.isconstant(start) and start == 0:
  221. func0(ll_builder, ll_str)
  222. else:
  223. funcstart(ll_builder, ll_str, start)
  224. return True
  225. return False # use the fall-back path
  226. # ------------------------------------------------------------
  227. # builder.append_multiple_char()
  228. @always_inline
  229. def ll_append_multiple_char(ll_builder, char, times):
  230. if jit.we_are_jitted():
  231. if ll_jit_try_append_multiple_char(ll_builder, char, times):
  232. return
  233. _ll_append_multiple_char(ll_builder, char, times)
  234. @jit.dont_look_inside
  235. def _ll_append_multiple_char(ll_builder, char, times):
  236. part1 = ll_builder.current_end - ll_builder.current_pos
  237. if times > part1:
  238. times -= part1
  239. buf = ll_builder.current_buf
  240. for i in xrange(ll_builder.current_pos, ll_builder.current_end):
  241. buf.chars[i] = char
  242. ll_grow_by(ll_builder, times)
  243. #
  244. buf = ll_builder.current_buf
  245. pos = ll_builder.current_pos
  246. end = pos + times
  247. ll_builder.current_pos = end
  248. for i in xrange(pos, end):
  249. buf.chars[i] = char
  250. @jit.unroll_safe
  251. def ll_jit_try_append_multiple_char(ll_builder, char, size):
  252. if jit.isconstant(size):
  253. if size == 0:
  254. return True
  255. # a special case: if the builder's pos and end are still contants
  256. # (typically if the builder is still virtual), and if 'size' fits,
  257. # then we don't need any reallocation and can just set the
  258. # characters in the buffer, in a way that won't force anything.
  259. if (jit.isconstant(ll_builder.current_pos) and
  260. jit.isconstant(ll_builder.current_end) and
  261. size <= (ll_builder.current_end - ll_builder.current_pos) and
  262. size <= 16):
  263. pos = ll_builder.current_pos
  264. buf = ll_builder.current_buf
  265. stop = pos + size
  266. ll_builder.current_pos = stop
  267. while pos < stop:
  268. buf.chars[pos] = char
  269. pos += 1
  270. return True
  271. if size == 1:
  272. ll_append_char(ll_builder, char)
  273. return True
  274. return False # use the fall-back path
  275. # ------------------------------------------------------------
  276. # builder.append_charpsize()
  277. @jit.dont_look_inside
  278. def ll_append_charpsize(ll_builder, charp, size):
  279. part1 = ll_builder.current_end - ll_builder.current_pos
  280. if size > part1:
  281. # First, the part that still fits
  282. ll_builder.copy_raw_to_string(charp, ll_builder.current_buf,
  283. ll_builder.current_pos, part1)
  284. charp = rffi.ptradd(charp, part1)
  285. size -= part1
  286. ll_grow_by(ll_builder, size)
  287. #
  288. pos = ll_builder.current_pos
  289. ll_builder.current_pos = pos + size
  290. ll_builder.copy_raw_to_string(charp, ll_builder.current_buf, pos, size)
  291. # ------------------------------------------------------------
  292. # builder.getlength()
  293. @always_inline
  294. def ll_getlength(ll_builder):
  295. num_chars_missing_from_last_piece = (
  296. ll_builder.current_end - ll_builder.current_pos)
  297. return ll_builder.total_size - num_chars_missing_from_last_piece
  298. # ------------------------------------------------------------
  299. # builder.build()
  300. @jit.look_inside_iff(lambda ll_builder: jit.isvirtual(ll_builder))
  301. def ll_build(ll_builder):
  302. # NB. usually the JIT doesn't look inside this function; it does
  303. # so only in the simplest example where it could virtualize everything
  304. if ll_builder.extra_pieces:
  305. ll_fold_pieces(ll_builder)
  306. elif ll_builder.current_pos != ll_builder.total_size:
  307. ll_shrink_final(ll_builder)
  308. return ll_builder.current_buf
  309. def ll_shrink_final(ll_builder):
  310. final_size = ll_builder.current_pos
  311. ll_assert(final_size <= ll_builder.total_size,
  312. "final_size > ll_builder.total_size?")
  313. buf = rgc.ll_shrink_array(ll_builder.current_buf, final_size)
  314. ll_builder.current_buf = buf
  315. ll_builder.current_end = final_size
  316. ll_builder.total_size = final_size
  317. def ll_fold_pieces(ll_builder):
  318. final_size = BaseStringBuilderRepr.ll_getlength(ll_builder)
  319. ll_assert(final_size >= 0, "negative final_size")
  320. extra = ll_builder.extra_pieces
  321. ll_builder.extra_pieces = lltype.nullptr(lltype.typeOf(extra).TO)
  322. #
  323. result = ll_builder.mallocfn(final_size)
  324. piece = ll_builder.current_buf
  325. piece_lgt = ll_builder.current_pos
  326. ll_assert(ll_builder.current_end == len(piece.chars),
  327. "bogus last piece_lgt")
  328. ll_builder.total_size = final_size
  329. ll_builder.current_buf = result
  330. ll_builder.current_pos = final_size
  331. ll_builder.current_end = final_size
  332. dst = final_size
  333. while True:
  334. dst -= piece_lgt
  335. ll_assert(dst >= 0, "rbuilder build: overflow")
  336. ll_builder.copy_string_contents(piece, result, 0, dst, piece_lgt)
  337. if not extra:
  338. break
  339. piece = extra.buf
  340. piece_lgt = len(piece.chars)
  341. extra = extra.prev_piece
  342. ll_assert(dst == 0, "rbuilder build: underflow")
  343. # ------------------------------------------------------------
  344. # bool(builder)
  345. def ll_bool(ll_builder):
  346. return ll_builder != nullptr(lltype.typeOf(ll_builder).TO)
  347. # ------------------------------------------------------------
  348. class BaseStringBuilderRepr(AbstractStringBuilderRepr):
  349. def empty(self):
  350. return nullptr(self.lowleveltype.TO)
  351. ll_append = staticmethod(ll_append)
  352. ll_append_char = staticmethod(ll_append_char)
  353. ll_append_slice = staticmethod(ll_append_slice)
  354. ll_append_multiple_char = staticmethod(ll_append_multiple_char)
  355. ll_append_charpsize = staticmethod(ll_append_charpsize)
  356. ll_getlength = staticmethod(ll_getlength)
  357. ll_build = staticmethod(ll_build)
  358. ll_bool = staticmethod(ll_bool)
  359. class StringBuilderRepr(BaseStringBuilderRepr):
  360. lowleveltype = lltype.Ptr(STRINGBUILDER)
  361. basetp = STR
  362. convert_to_ll = staticmethod(llstr)
  363. string_repr = string_repr
  364. char_repr = char_repr
  365. raw_ptr_repr = PtrRepr(
  366. lltype.Ptr(lltype.Array(lltype.Char, hints={'nolength': True}))
  367. )
  368. @staticmethod
  369. def ll_new(init_size):
  370. # Clamp 'init_size' to be a value between 0 and 1280.
  371. # Negative values are mapped to 1280.
  372. init_size = intmask(min(r_uint(init_size), r_uint(1280)))
  373. ll_builder = lltype.malloc(STRINGBUILDER)
  374. ll_builder.current_buf = ll_builder.mallocfn(init_size)
  375. ll_builder.current_pos = 0
  376. ll_builder.current_end = init_size
  377. ll_builder.total_size = init_size
  378. return ll_builder
  379. class UnicodeBuilderRepr(BaseStringBuilderRepr):
  380. lowleveltype = lltype.Ptr(UNICODEBUILDER)
  381. basetp = UNICODE
  382. convert_to_ll = staticmethod(llunicode)
  383. string_repr = unicode_repr
  384. char_repr = unichar_repr
  385. raw_ptr_repr = PtrRepr(
  386. lltype.Ptr(lltype.Array(lltype.UniChar, hints={'nolength': True}))
  387. )
  388. @staticmethod
  389. def ll_new(init_size):
  390. # Clamp 'init_size' to be a value between 0 and 1280.
  391. # Negative values are mapped to 1280.
  392. init_size = intmask(min(r_uint(init_size), r_uint(1280)))
  393. ll_builder = lltype.malloc(UNICODEBUILDER)
  394. ll_builder.current_buf = ll_builder.mallocfn(init_size)
  395. ll_builder.current_pos = 0
  396. ll_builder.current_end = init_size
  397. ll_builder.total_size = init_size
  398. return ll_builder
  399. unicodebuilder_repr = UnicodeBuilderRepr()
  400. stringbuilder_repr = StringBuilderRepr()