PageRenderTime 54ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/rpython/lltypesystem/rstr.py

https://bitbucket.org/pypy/pypy/
Python | 1091 lines | 934 code | 106 blank | 51 comment | 208 complexity | 56e3e7da9fc72bca43724cd845ad14e3 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from weakref import WeakValueDictionary
  2. from pypy.tool.pairtype import pairtype
  3. from pypy.rpython.error import TyperError
  4. from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated
  5. from pypy.rlib.objectmodel import _hash_string, enforceargs
  6. from pypy.rlib.objectmodel import keepalive_until_here
  7. from pypy.rlib.debug import ll_assert
  8. from pypy.rlib import jit
  9. from pypy.rlib.rarithmetic import ovfcheck
  10. from pypy.rpython.robject import PyObjRepr, pyobj_repr
  11. from pypy.rpython.rmodel import inputconst, IntegerRepr
  12. from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
  13. AbstractUniCharRepr, AbstractStringIteratorRepr,\
  14. AbstractLLHelpers, AbstractUnicodeRepr
  15. from pypy.rpython.lltypesystem import ll_str
  16. from pypy.rpython.lltypesystem.lltype import \
  17. GcStruct, Signed, Array, Char, UniChar, Ptr, malloc, \
  18. Bool, Void, GcArray, nullptr, pyobjectptr, cast_primitive, typeOf,\
  19. staticAdtMethod, GcForwardReference
  20. from pypy.rpython.rmodel import Repr
  21. from pypy.rpython.lltypesystem import llmemory
  22. from pypy.tool.sourcetools import func_with_new_name
  23. from pypy.rpython.lltypesystem.lloperation import llop
  24. # ____________________________________________________________
  25. #
  26. # Concrete implementation of RPython strings:
  27. #
  28. # struct str {
  29. # hash: Signed
  30. # chars: array of Char
  31. # }
  32. STR = GcForwardReference()
  33. UNICODE = GcForwardReference()
  34. def new_malloc(TP, name):
  35. def mallocstr(length):
  36. ll_assert(length >= 0, "negative string length")
  37. r = malloc(TP, length)
  38. if not we_are_translated() or not malloc_zero_filled:
  39. r.hash = 0
  40. return r
  41. mallocstr._annspecialcase_ = 'specialize:semierased'
  42. return func_with_new_name(mallocstr, name)
  43. mallocstr = new_malloc(STR, 'mallocstr')
  44. mallocunicode = new_malloc(UNICODE, 'mallocunicode')
  45. def emptystrfun():
  46. return emptystr
  47. def emptyunicodefun():
  48. return emptyunicode
  49. def _new_copy_contents_fun(TP, CHAR_TP, name):
  50. def _str_ofs(item):
  51. return (llmemory.offsetof(TP, 'chars') +
  52. llmemory.itemoffsetof(TP.chars, 0) +
  53. llmemory.sizeof(CHAR_TP) * item)
  54. @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
  55. @enforceargs(None, None, int, int, int)
  56. def copy_string_contents(src, dst, srcstart, dststart, length):
  57. assert srcstart >= 0
  58. assert dststart >= 0
  59. assert length >= 0
  60. src = llmemory.cast_ptr_to_adr(src) + _str_ofs(srcstart)
  61. dst = llmemory.cast_ptr_to_adr(dst) + _str_ofs(dststart)
  62. llmemory.raw_memcopy(src, dst, llmemory.sizeof(CHAR_TP) * length)
  63. keepalive_until_here(src)
  64. keepalive_until_here(dst)
  65. copy_string_contents._always_inline_ = True
  66. return func_with_new_name(copy_string_contents, 'copy_%s_contents' % name)
  67. copy_string_contents = _new_copy_contents_fun(STR, Char, 'string')
  68. copy_unicode_contents = _new_copy_contents_fun(UNICODE, UniChar, 'unicode')
  69. SIGNED_ARRAY = GcArray(Signed)
  70. CONST_STR_CACHE = WeakValueDictionary()
  71. CONST_UNICODE_CACHE = WeakValueDictionary()
  72. class BaseLLStringRepr(Repr):
  73. def convert_const(self, value):
  74. if value is None:
  75. return nullptr(self.lowleveltype.TO)
  76. #value = getattr(value, '__self__', value) # for bound string methods
  77. if not isinstance(value, self.basetype):
  78. raise TyperError("not a str: %r" % (value,))
  79. try:
  80. return self.CACHE[value]
  81. except KeyError:
  82. p = self.malloc(len(value))
  83. for i in range(len(value)):
  84. p.chars[i] = cast_primitive(self.base, value[i])
  85. p.hash = 0
  86. self.ll.ll_strhash(p) # precompute the hash
  87. self.CACHE[value] = p
  88. return p
  89. def make_iterator_repr(self):
  90. return self.repr.iterator_repr
  91. def can_ll_be_null(self, s_value):
  92. # XXX unicode
  93. if self is string_repr:
  94. return s_value.can_be_none()
  95. else:
  96. return True # for CharRepr/UniCharRepr subclasses,
  97. # where NULL is always valid: it is chr(0)
  98. def _list_length_items(self, hop, v_lst, LIST):
  99. LIST = LIST.TO
  100. v_length = hop.gendirectcall(LIST.ll_length, v_lst)
  101. v_items = hop.gendirectcall(LIST.ll_items, v_lst)
  102. return v_length, v_items
  103. class StringRepr(BaseLLStringRepr, AbstractStringRepr):
  104. lowleveltype = Ptr(STR)
  105. basetype = str
  106. base = Char
  107. CACHE = CONST_STR_CACHE
  108. def __init__(self, *args):
  109. AbstractStringRepr.__init__(self, *args)
  110. self.ll = LLHelpers
  111. self.malloc = mallocstr
  112. def ll_decode_latin1(self, value):
  113. lgt = len(value.chars)
  114. s = mallocunicode(lgt)
  115. for i in range(lgt):
  116. s.chars[i] = cast_primitive(UniChar, value.chars[i])
  117. return s
  118. class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
  119. lowleveltype = Ptr(UNICODE)
  120. basetype = basestring
  121. base = UniChar
  122. CACHE = CONST_UNICODE_CACHE
  123. def __init__(self, *args):
  124. AbstractUnicodeRepr.__init__(self, *args)
  125. self.ll = LLHelpers
  126. self.malloc = mallocunicode
  127. @jit.elidable
  128. def ll_str(self, s):
  129. # XXX crazy that this is here, but I don't want to break
  130. # rmodel logic
  131. if not s:
  132. return self.ll.ll_constant('None')
  133. lgt = len(s.chars)
  134. result = mallocstr(lgt)
  135. for i in range(lgt):
  136. c = s.chars[i]
  137. if ord(c) > 127:
  138. raise UnicodeEncodeError("character not in ascii range")
  139. result.chars[i] = cast_primitive(Char, c)
  140. return result
  141. @jit.elidable
  142. def ll_encode_latin1(self, s):
  143. length = len(s.chars)
  144. result = mallocstr(length)
  145. for i in range(length):
  146. c = s.chars[i]
  147. if ord(c) > 255:
  148. raise UnicodeEncodeError("character not in latin1 range")
  149. result.chars[i] = cast_primitive(Char, c)
  150. return result
  151. class CharRepr(AbstractCharRepr, StringRepr):
  152. lowleveltype = Char
  153. class UniCharRepr(AbstractUniCharRepr, UnicodeRepr):
  154. lowleveltype = UniChar
  155. class __extend__(pairtype(PyObjRepr, AbstractStringRepr)):
  156. def convert_from_to((r_from, r_to), v, llops):
  157. v_len = llops.gencapicall('PyString_Size', [v], resulttype=Signed)
  158. cstr = inputconst(Void, STR)
  159. cflags = inputconst(Void, {'flavor': 'gc'})
  160. v_result = llops.genop('malloc_varsize', [cstr, cflags, v_len],
  161. resulttype=Ptr(STR))
  162. llops.gencapicall('PyString_ToRPyString', [v, v_result])
  163. string_repr = llops.rtyper.type_system.rstr.string_repr
  164. v_result = llops.convertvar(v_result, string_repr, r_to)
  165. return v_result
  166. class __extend__(pairtype(AbstractStringRepr, PyObjRepr)):
  167. def convert_from_to((r_from, r_to), v, llops):
  168. string_repr = llops.rtyper.type_system.rstr.string_repr
  169. v = llops.convertvar(v, r_from, string_repr)
  170. cchars = inputconst(Void, "chars")
  171. # xxx put in table
  172. return llops.gencapicall(
  173. 'PyString_FromRPyString',
  174. [v],
  175. resulttype=pyobj_repr,
  176. _callable=lambda v: pyobjectptr(''.join(v.chars)))
  177. class __extend__(pairtype(AbstractUnicodeRepr, PyObjRepr)):
  178. def convert_from_to((r_from, r_to), v, llops):
  179. unicode_repr = llops.rtyper.type_system.rstr.unicode_repr
  180. v = llops.convertvar(v, r_from, unicode_repr)
  181. cchars = inputconst(Void, "chars")
  182. # xxx put in table
  183. return llops.gencapicall(
  184. 'PyUnicode_FromRPyUnicode',
  185. [v],
  186. resulttype=pyobj_repr,
  187. _callable=lambda v: pyobjectptr(u''.join(v.chars)))
  188. # ____________________________________________________________
  189. #
  190. # Low-level methods. These can be run for testing, but are meant to
  191. # be direct_call'ed from rtyped flow graphs, which means that they will
  192. # get flowed and annotated, mostly with SomePtr.
  193. #
  194. def ll_construct_restart_positions(s, l):
  195. # Construct the array of possible restarting positions
  196. # T = Array_of_ints [-1..len2]
  197. # T[-1] = -1 s2.chars[-1] is supposed to be unequal to everything else
  198. T = malloc( SIGNED_ARRAY, l)
  199. T[0] = 0
  200. i = 1
  201. j = 0
  202. while i<l:
  203. if s.chars[i] == s.chars[j]:
  204. j += 1
  205. T[i] = j
  206. i += 1
  207. elif j>0:
  208. j = T[j-1]
  209. else:
  210. T[i] = 0
  211. i += 1
  212. j = 0
  213. return T
  214. FAST_COUNT = 0
  215. FAST_FIND = 1
  216. FAST_RFIND = 2
  217. from pypy.rlib.rarithmetic import LONG_BIT as BLOOM_WIDTH
  218. def bloom_add(mask, c):
  219. return mask | (1 << (ord(c) & (BLOOM_WIDTH - 1)))
  220. def bloom(mask, c):
  221. return mask & (1 << (ord(c) & (BLOOM_WIDTH - 1)))
  222. class LLHelpers(AbstractLLHelpers):
  223. @jit.elidable
  224. def ll_str_mul(s, times):
  225. if times < 0:
  226. times = 0
  227. try:
  228. size = ovfcheck(len(s.chars) * times)
  229. except OverflowError:
  230. raise MemoryError
  231. newstr = s.malloc(size)
  232. i = 0
  233. if i < size:
  234. s.copy_contents(s, newstr, 0, 0, len(s.chars))
  235. i += len(s.chars)
  236. while i < size:
  237. if i <= size - i:
  238. j = i
  239. else:
  240. j = size - i
  241. s.copy_contents(newstr, newstr, 0, i, j)
  242. i += j
  243. return newstr
  244. @jit.elidable
  245. def ll_char_mul(ch, times):
  246. if typeOf(ch) is Char:
  247. malloc = mallocstr
  248. else:
  249. malloc = mallocunicode
  250. if times < 0:
  251. times = 0
  252. newstr = malloc(times)
  253. j = 0
  254. # XXX we can use memset here, not sure how useful this is
  255. while j < times:
  256. newstr.chars[j] = ch
  257. j += 1
  258. return newstr
  259. def ll_strlen(s):
  260. return len(s.chars)
  261. def ll_stritem_nonneg(s, i):
  262. chars = s.chars
  263. ll_assert(i>=0, "negative str getitem index")
  264. ll_assert(i<len(chars), "str getitem index out of bound")
  265. return chars[i]
  266. ll_stritem_nonneg._annenforceargs_ = [None, int]
  267. def ll_chr2str(ch):
  268. if typeOf(ch) is Char:
  269. malloc = mallocstr
  270. else:
  271. malloc = mallocunicode
  272. s = malloc(1)
  273. s.chars[0] = ch
  274. return s
  275. # @jit.look_inside_iff(lambda str: jit.isconstant(len(str.chars)) and len(str.chars) == 1)
  276. @jit.oopspec("str.str2unicode(str)")
  277. def ll_str2unicode(str):
  278. lgt = len(str.chars)
  279. s = mallocunicode(lgt)
  280. for i in range(lgt):
  281. if ord(str.chars[i]) > 127:
  282. raise UnicodeDecodeError
  283. s.chars[i] = cast_primitive(UniChar, str.chars[i])
  284. return s
  285. @jit.elidable
  286. def ll_strhash(s):
  287. # unlike CPython, there is no reason to avoid to return -1
  288. # but our malloc initializes the memory to zero, so we use zero as the
  289. # special non-computed-yet value.
  290. if not s:
  291. return 0
  292. x = s.hash
  293. if x == 0:
  294. x = _hash_string(s.chars)
  295. if x == 0:
  296. x = 29872897
  297. s.hash = x
  298. return x
  299. def ll_strfasthash(s):
  300. return s.hash # assumes that the hash is already computed
  301. @jit.elidable
  302. def ll_strconcat(s1, s2):
  303. len1 = len(s1.chars)
  304. len2 = len(s2.chars)
  305. # a single '+' like this is allowed to overflow: it gets
  306. # a negative result, and the gc will complain
  307. newstr = s1.malloc(len1 + len2)
  308. s1.copy_contents(s1, newstr, 0, 0, len1)
  309. s1.copy_contents(s2, newstr, 0, len1, len2)
  310. return newstr
  311. ll_strconcat.oopspec = 'stroruni.concat(s1, s2)'
  312. @jit.elidable
  313. def ll_strip(s, ch, left, right):
  314. s_len = len(s.chars)
  315. if s_len == 0:
  316. return s.empty()
  317. lpos = 0
  318. rpos = s_len - 1
  319. if left:
  320. while lpos < rpos and s.chars[lpos] == ch:
  321. lpos += 1
  322. if right:
  323. while lpos < rpos + 1 and s.chars[rpos] == ch:
  324. rpos -= 1
  325. if rpos < lpos:
  326. return s.empty()
  327. r_len = rpos - lpos + 1
  328. result = s.malloc(r_len)
  329. s.copy_contents(s, result, lpos, 0, r_len)
  330. return result
  331. @jit.elidable
  332. def ll_upper(s):
  333. s_chars = s.chars
  334. s_len = len(s_chars)
  335. if s_len == 0:
  336. return s.empty()
  337. i = 0
  338. result = mallocstr(s_len)
  339. # ^^^^^^^^^ specifically to explode on unicode
  340. while i < s_len:
  341. ch = s_chars[i]
  342. if 'a' <= ch <= 'z':
  343. ch = chr(ord(ch) - 32)
  344. result.chars[i] = ch
  345. i += 1
  346. return result
  347. @jit.elidable
  348. def ll_lower(s):
  349. s_chars = s.chars
  350. s_len = len(s_chars)
  351. if s_len == 0:
  352. return s.empty()
  353. i = 0
  354. result = mallocstr(s_len)
  355. # ^^^^^^^^^ specifically to explode on unicode
  356. while i < s_len:
  357. ch = s_chars[i]
  358. if 'A' <= ch <= 'Z':
  359. ch = chr(ord(ch) + 32)
  360. result.chars[i] = ch
  361. i += 1
  362. return result
  363. def ll_join(s, length, items):
  364. s_chars = s.chars
  365. s_len = len(s_chars)
  366. num_items = length
  367. if num_items == 0:
  368. return s.empty()
  369. itemslen = 0
  370. i = 0
  371. while i < num_items:
  372. try:
  373. itemslen = ovfcheck(itemslen + len(items[i].chars))
  374. except OverflowError:
  375. raise MemoryError
  376. i += 1
  377. try:
  378. seplen = ovfcheck(s_len * (num_items - 1))
  379. except OverflowError:
  380. raise MemoryError
  381. # a single '+' at the end is allowed to overflow: it gets
  382. # a negative result, and the gc will complain
  383. result = s.malloc(itemslen + seplen)
  384. res_index = len(items[0].chars)
  385. s.copy_contents(items[0], result, 0, 0, res_index)
  386. i = 1
  387. while i < num_items:
  388. s.copy_contents(s, result, 0, res_index, s_len)
  389. res_index += s_len
  390. lgt = len(items[i].chars)
  391. s.copy_contents(items[i], result, 0, res_index, lgt)
  392. res_index += lgt
  393. i += 1
  394. return result
  395. @jit.elidable
  396. def ll_strcmp(s1, s2):
  397. if not s1 and not s2:
  398. return True
  399. if not s1 or not s2:
  400. return False
  401. chars1 = s1.chars
  402. chars2 = s2.chars
  403. len1 = len(chars1)
  404. len2 = len(chars2)
  405. if len1 < len2:
  406. cmplen = len1
  407. else:
  408. cmplen = len2
  409. i = 0
  410. while i < cmplen:
  411. diff = ord(chars1[i]) - ord(chars2[i])
  412. if diff != 0:
  413. return diff
  414. i += 1
  415. return len1 - len2
  416. @jit.elidable
  417. def ll_streq(s1, s2):
  418. if s1 == s2: # also if both are NULLs
  419. return True
  420. if not s1 or not s2:
  421. return False
  422. len1 = len(s1.chars)
  423. len2 = len(s2.chars)
  424. if len1 != len2:
  425. return False
  426. j = 0
  427. chars1 = s1.chars
  428. chars2 = s2.chars
  429. while j < len1:
  430. if chars1[j] != chars2[j]:
  431. return False
  432. j += 1
  433. return True
  434. ll_streq.oopspec = 'stroruni.equal(s1, s2)'
  435. @jit.elidable
  436. def ll_startswith(s1, s2):
  437. len1 = len(s1.chars)
  438. len2 = len(s2.chars)
  439. if len1 < len2:
  440. return False
  441. j = 0
  442. chars1 = s1.chars
  443. chars2 = s2.chars
  444. while j < len2:
  445. if chars1[j] != chars2[j]:
  446. return False
  447. j += 1
  448. return True
  449. def ll_startswith_char(s, ch):
  450. if not len(s.chars):
  451. return False
  452. return s.chars[0] == ch
  453. @jit.elidable
  454. def ll_endswith(s1, s2):
  455. len1 = len(s1.chars)
  456. len2 = len(s2.chars)
  457. if len1 < len2:
  458. return False
  459. j = 0
  460. chars1 = s1.chars
  461. chars2 = s2.chars
  462. offset = len1 - len2
  463. while j < len2:
  464. if chars1[offset + j] != chars2[j]:
  465. return False
  466. j += 1
  467. return True
  468. def ll_endswith_char(s, ch):
  469. if not len(s.chars):
  470. return False
  471. return s.chars[len(s.chars) - 1] == ch
  472. @jit.elidable
  473. def ll_find_char(s, ch, start, end):
  474. i = start
  475. if end > len(s.chars):
  476. end = len(s.chars)
  477. while i < end:
  478. if s.chars[i] == ch:
  479. return i
  480. i += 1
  481. return -1
  482. ll_find_char._annenforceargs_ = [None, None, int, int]
  483. @jit.elidable
  484. def ll_rfind_char(s, ch, start, end):
  485. if end > len(s.chars):
  486. end = len(s.chars)
  487. i = end
  488. while i > start:
  489. i -= 1
  490. if s.chars[i] == ch:
  491. return i
  492. return -1
  493. @jit.elidable
  494. def ll_count_char(s, ch, start, end):
  495. count = 0
  496. i = start
  497. if end > len(s.chars):
  498. end = len(s.chars)
  499. while i < end:
  500. if s.chars[i] == ch:
  501. count += 1
  502. i += 1
  503. return count
  504. @classmethod
  505. def ll_find(cls, s1, s2, start, end):
  506. if start < 0:
  507. start = 0
  508. if end > len(s1.chars):
  509. end = len(s1.chars)
  510. if end - start < 0:
  511. return -1
  512. m = len(s2.chars)
  513. if m == 0:
  514. return start
  515. elif m == 1:
  516. return cls.ll_find_char(s1, s2.chars[0], start, end)
  517. return cls.ll_search(s1, s2, start, end, FAST_FIND)
  518. @classmethod
  519. def ll_rfind(cls, s1, s2, start, end):
  520. if start < 0:
  521. start = 0
  522. if end > len(s1.chars):
  523. end = len(s1.chars)
  524. if end - start < 0:
  525. return -1
  526. m = len(s2.chars)
  527. if m == 0:
  528. return end
  529. elif m == 1:
  530. return cls.ll_rfind_char(s1, s2.chars[0], start, end)
  531. return cls.ll_search(s1, s2, start, end, FAST_RFIND)
  532. @classmethod
  533. def ll_count(cls, s1, s2, start, end):
  534. if start < 0:
  535. start = 0
  536. if end > len(s1.chars):
  537. end = len(s1.chars)
  538. if end - start < 0:
  539. return 0
  540. m = len(s2.chars)
  541. if m == 0:
  542. return end - start + 1
  543. elif m == 1:
  544. return cls.ll_count_char(s1, s2.chars[0], start, end)
  545. res = cls.ll_search(s1, s2, start, end, FAST_COUNT)
  546. # For a few cases ll_search can return -1 to indicate an "impossible"
  547. # condition for a string match, count just returns 0 in these cases.
  548. if res < 0:
  549. res = 0
  550. return res
  551. @jit.elidable
  552. def ll_search(s1, s2, start, end, mode):
  553. count = 0
  554. n = end - start
  555. m = len(s2.chars)
  556. w = n - m
  557. if w < 0:
  558. return -1
  559. mlast = m - 1
  560. skip = mlast - 1
  561. mask = 0
  562. if mode != FAST_RFIND:
  563. for i in range(mlast):
  564. mask = bloom_add(mask, s2.chars[i])
  565. if s2.chars[i] == s2.chars[mlast]:
  566. skip = mlast - i - 1
  567. mask = bloom_add(mask, s2.chars[mlast])
  568. i = start - 1
  569. while i + 1 <= start + w:
  570. i += 1
  571. if s1.chars[i+m-1] == s2.chars[m-1]:
  572. for j in range(mlast):
  573. if s1.chars[i+j] != s2.chars[j]:
  574. break
  575. else:
  576. if mode != FAST_COUNT:
  577. return i
  578. count += 1
  579. i += mlast
  580. continue
  581. if i + m < len(s1.chars):
  582. c = s1.chars[i + m]
  583. else:
  584. c = '\0'
  585. if not bloom(mask, c):
  586. i += m
  587. else:
  588. i += skip
  589. else:
  590. if i + m < len(s1.chars):
  591. c = s1.chars[i + m]
  592. else:
  593. c = '\0'
  594. if not bloom(mask, c):
  595. i += m
  596. else:
  597. mask = bloom_add(mask, s2.chars[0])
  598. for i in range(mlast, 0, -1):
  599. mask = bloom_add(mask, s2.chars[i])
  600. if s2.chars[i] == s2.chars[0]:
  601. skip = i - 1
  602. i = start + w + 1
  603. while i - 1 >= start:
  604. i -= 1
  605. if s1.chars[i] == s2.chars[0]:
  606. for j in xrange(mlast, 0, -1):
  607. if s1.chars[i+j] != s2.chars[j]:
  608. break
  609. else:
  610. return i
  611. if i-1 >= 0 and not bloom(mask, s1.chars[i-1]):
  612. i -= m
  613. else:
  614. i -= skip
  615. else:
  616. if i-1 >= 0 and not bloom(mask, s1.chars[i-1]):
  617. i -= m
  618. if mode != FAST_COUNT:
  619. return -1
  620. return count
  621. @enforceargs(int, None)
  622. @jit.look_inside_iff(lambda length, items: jit.isconstant(length) and length <= 2)
  623. def ll_join_strs(length, items):
  624. # Special case for length 1 items, helps both the JIT and other code
  625. if length == 1:
  626. return items[0]
  627. num_items = length
  628. itemslen = 0
  629. i = 0
  630. while i < num_items:
  631. try:
  632. itemslen = ovfcheck(itemslen + len(items[i].chars))
  633. except OverflowError:
  634. raise MemoryError
  635. i += 1
  636. if typeOf(items).TO.OF.TO == STR:
  637. malloc = mallocstr
  638. copy_contents = copy_string_contents
  639. else:
  640. malloc = mallocunicode
  641. copy_contents = copy_unicode_contents
  642. result = malloc(itemslen)
  643. res_chars = result.chars
  644. res_index = 0
  645. i = 0
  646. while i < num_items:
  647. item_chars = items[i].chars
  648. item_len = len(item_chars)
  649. copy_contents(items[i], result, 0, res_index, item_len)
  650. res_index += item_len
  651. i += 1
  652. return result
  653. @jit.look_inside_iff(lambda length, chars, RES: jit.isconstant(length) and jit.isvirtual(chars))
  654. def ll_join_chars(length, chars, RES):
  655. # no need to optimize this, will be replaced by string builder
  656. # at some point soon
  657. num_chars = length
  658. if RES is StringRepr.lowleveltype:
  659. target = Char
  660. malloc = mallocstr
  661. else:
  662. target = UniChar
  663. malloc = mallocunicode
  664. result = malloc(num_chars)
  665. res_chars = result.chars
  666. i = 0
  667. while i < num_chars:
  668. res_chars[i] = cast_primitive(target, chars[i])
  669. i += 1
  670. return result
  671. @jit.elidable
  672. def _ll_stringslice(s1, start, stop):
  673. lgt = stop - start
  674. assert start >= 0
  675. assert lgt >= 0
  676. newstr = s1.malloc(lgt)
  677. s1.copy_contents(s1, newstr, start, 0, lgt)
  678. return newstr
  679. _ll_stringslice.oopspec = 'stroruni.slice(s1, start, stop)'
  680. _ll_stringslice._annenforceargs_ = [None, int, int]
  681. def ll_stringslice_startonly(s1, start):
  682. return LLHelpers._ll_stringslice(s1, start, len(s1.chars))
  683. def ll_stringslice_startstop(s1, start, stop):
  684. if jit.we_are_jitted():
  685. if stop > len(s1.chars):
  686. stop = len(s1.chars)
  687. else:
  688. if stop >= len(s1.chars):
  689. if start == 0:
  690. return s1
  691. stop = len(s1.chars)
  692. return LLHelpers._ll_stringslice(s1, start, stop)
  693. def ll_stringslice_minusone(s1):
  694. newlen = len(s1.chars) - 1
  695. return LLHelpers._ll_stringslice(s1, 0, newlen)
  696. def ll_split_chr(LIST, s, c, max):
  697. chars = s.chars
  698. strlen = len(chars)
  699. count = 1
  700. i = 0
  701. if max == 0:
  702. i = strlen
  703. while i < strlen:
  704. if chars[i] == c:
  705. count += 1
  706. if max >= 0 and count > max:
  707. break
  708. i += 1
  709. res = LIST.ll_newlist(count)
  710. items = res.ll_items()
  711. i = 0
  712. j = 0
  713. resindex = 0
  714. if max == 0:
  715. j = strlen
  716. while j < strlen:
  717. if chars[j] == c:
  718. item = items[resindex] = s.malloc(j - i)
  719. item.copy_contents(s, item, i, 0, j - i)
  720. resindex += 1
  721. i = j + 1
  722. if max >= 0 and resindex >= max:
  723. j = strlen
  724. break
  725. j += 1
  726. item = items[resindex] = s.malloc(j - i)
  727. item.copy_contents(s, item, i, 0, j - i)
  728. return res
  729. def ll_rsplit_chr(LIST, s, c, max):
  730. chars = s.chars
  731. strlen = len(chars)
  732. count = 1
  733. i = 0
  734. if max == 0:
  735. i = strlen
  736. while i < strlen:
  737. if chars[i] == c:
  738. count += 1
  739. if max >= 0 and count > max:
  740. break
  741. i += 1
  742. res = LIST.ll_newlist(count)
  743. items = res.ll_items()
  744. i = strlen
  745. j = strlen
  746. resindex = count - 1
  747. assert resindex >= 0
  748. if max == 0:
  749. j = 0
  750. while j > 0:
  751. j -= 1
  752. if chars[j] == c:
  753. item = items[resindex] = s.malloc(i - j - 1)
  754. item.copy_contents(s, item, j + 1, 0, i - j - 1)
  755. resindex -= 1
  756. i = j
  757. if resindex == 0:
  758. j = 0
  759. break
  760. item = items[resindex] = s.malloc(i - j)
  761. item.copy_contents(s, item, j, 0, i - j)
  762. return res
  763. @jit.elidable
  764. def ll_replace_chr_chr(s, c1, c2):
  765. length = len(s.chars)
  766. newstr = s.malloc(length)
  767. src = s.chars
  768. dst = newstr.chars
  769. j = 0
  770. while j < length:
  771. c = src[j]
  772. if c == c1:
  773. c = c2
  774. dst[j] = c
  775. j += 1
  776. return newstr
  777. @jit.elidable
  778. def ll_contains(s, c):
  779. chars = s.chars
  780. strlen = len(chars)
  781. i = 0
  782. while i < strlen:
  783. if chars[i] == c:
  784. return True
  785. i += 1
  786. return False
  787. @jit.elidable
  788. def ll_int(s, base):
  789. if not 2 <= base <= 36:
  790. raise ValueError
  791. chars = s.chars
  792. strlen = len(chars)
  793. i = 0
  794. #XXX: only space is allowed as white space for now
  795. while i < strlen and chars[i] == ' ':
  796. i += 1
  797. if not i < strlen:
  798. raise ValueError
  799. #check sign
  800. sign = 1
  801. if chars[i] == '-':
  802. sign = -1
  803. i += 1
  804. elif chars[i] == '+':
  805. i += 1;
  806. # skip whitespaces between sign and digits
  807. while i < strlen and chars[i] == ' ':
  808. i += 1
  809. #now get digits
  810. val = 0
  811. oldpos = i
  812. while i < strlen:
  813. c = ord(chars[i])
  814. if ord('a') <= c <= ord('z'):
  815. digit = c - ord('a') + 10
  816. elif ord('A') <= c <= ord('Z'):
  817. digit = c - ord('A') + 10
  818. elif ord('0') <= c <= ord('9'):
  819. digit = c - ord('0')
  820. else:
  821. break
  822. if digit >= base:
  823. break
  824. val = val * base + digit
  825. i += 1
  826. if i == oldpos:
  827. raise ValueError # catch strings like '+' and '+ '
  828. #skip trailing whitespace
  829. while i < strlen and chars[i] == ' ':
  830. i += 1
  831. if not i == strlen:
  832. raise ValueError
  833. return sign * val
  834. # interface to build strings:
  835. # x = ll_build_start(n)
  836. # ll_build_push(x, next_string, 0)
  837. # ll_build_push(x, next_string, 1)
  838. # ...
  839. # ll_build_push(x, next_string, n-1)
  840. # s = ll_build_finish(x)
  841. def ll_build_start(parts_count):
  842. return malloc(TEMP, parts_count)
  843. def ll_build_push(builder, next_string, index):
  844. builder[index] = next_string
  845. def ll_build_finish(builder):
  846. return LLHelpers.ll_join_strs(len(builder), builder)
  847. def ll_constant(s):
  848. return string_repr.convert_const(s)
  849. ll_constant._annspecialcase_ = 'specialize:memo'
  850. def do_stringformat(cls, hop, sourcevarsrepr):
  851. s_str = hop.args_s[0]
  852. assert s_str.is_constant()
  853. s = s_str.const
  854. things = cls.parse_fmt_string(s)
  855. size = inputconst(Signed, len(things)) # could be unsigned?
  856. cTEMP = inputconst(Void, TEMP)
  857. cflags = inputconst(Void, {'flavor': 'gc'})
  858. vtemp = hop.genop("malloc_varsize", [cTEMP, cflags, size],
  859. resulttype=Ptr(TEMP))
  860. argsiter = iter(sourcevarsrepr)
  861. InstanceRepr = hop.rtyper.type_system.rclass.InstanceRepr
  862. for i, thing in enumerate(things):
  863. if isinstance(thing, tuple):
  864. code = thing[0]
  865. vitem, r_arg = argsiter.next()
  866. if not hasattr(r_arg, 'll_str'):
  867. raise TyperError("ll_str unsupported for: %r" % r_arg)
  868. if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)):
  869. vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
  870. elif code == 'd':
  871. assert isinstance(r_arg, IntegerRepr)
  872. #vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
  873. vchunk = hop.gendirectcall(ll_str.ll_int2dec, vitem)
  874. elif code == 'f':
  875. #assert isinstance(r_arg, FloatRepr)
  876. vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
  877. elif code == 'x':
  878. assert isinstance(r_arg, IntegerRepr)
  879. vchunk = hop.gendirectcall(ll_str.ll_int2hex, vitem,
  880. inputconst(Bool, False))
  881. elif code == 'o':
  882. assert isinstance(r_arg, IntegerRepr)
  883. vchunk = hop.gendirectcall(ll_str.ll_int2oct, vitem,
  884. inputconst(Bool, False))
  885. else:
  886. raise TyperError, "%%%s is not RPython" % (code, )
  887. else:
  888. from pypy.rpython.lltypesystem.rstr import string_repr
  889. vchunk = inputconst(string_repr, thing)
  890. i = inputconst(Signed, i)
  891. hop.genop('setarrayitem', [vtemp, i, vchunk])
  892. hop.exception_cannot_occur() # to ignore the ZeroDivisionError of '%'
  893. return hop.gendirectcall(cls.ll_join_strs, size, vtemp)
  894. do_stringformat = classmethod(do_stringformat)
  895. TEMP = GcArray(Ptr(STR))
  896. # ____________________________________________________________
  897. STR.become(GcStruct('rpy_string', ('hash', Signed),
  898. ('chars', Array(Char, hints={'immutable': True})),
  899. adtmeths={'malloc' : staticAdtMethod(mallocstr),
  900. 'empty' : staticAdtMethod(emptystrfun),
  901. 'copy_contents' : staticAdtMethod(copy_string_contents),
  902. 'gethash': LLHelpers.ll_strhash}))
  903. UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
  904. ('chars', Array(UniChar, hints={'immutable': True})),
  905. adtmeths={'malloc' : staticAdtMethod(mallocunicode),
  906. 'empty' : staticAdtMethod(emptyunicodefun),
  907. 'copy_contents' : staticAdtMethod(copy_unicode_contents),
  908. 'gethash': LLHelpers.ll_strhash}
  909. ))
  910. # TODO: make the public interface of the rstr module cleaner
  911. ll_strconcat = LLHelpers.ll_strconcat
  912. ll_join = LLHelpers.ll_join
  913. ll_str2unicode = LLHelpers.ll_str2unicode
  914. do_stringformat = LLHelpers.do_stringformat
  915. string_repr = StringRepr()
  916. char_repr = CharRepr()
  917. unichar_repr = UniCharRepr()
  918. char_repr.ll = LLHelpers
  919. unichar_repr.ll = LLHelpers
  920. unicode_repr = UnicodeRepr()
  921. emptystr = string_repr.convert_const("")
  922. emptyunicode = unicode_repr.convert_const(u'')
  923. StringRepr.repr = string_repr
  924. UnicodeRepr.repr = unicode_repr
  925. UniCharRepr.repr = unicode_repr
  926. UniCharRepr.char_repr = unichar_repr
  927. UnicodeRepr.char_repr = unichar_repr
  928. CharRepr.char_repr = char_repr
  929. StringRepr.char_repr = char_repr
  930. class BaseStringIteratorRepr(AbstractStringIteratorRepr):
  931. def __init__(self):
  932. self.ll_striter = ll_striter
  933. self.ll_strnext = ll_strnext
  934. class StringIteratorRepr(BaseStringIteratorRepr):
  935. lowleveltype = Ptr(GcStruct('stringiter',
  936. ('string', string_repr.lowleveltype),
  937. ('index', Signed)))
  938. class UnicodeIteratorRepr(BaseStringIteratorRepr):
  939. lowleveltype = Ptr(GcStruct('unicodeiter',
  940. ('string', unicode_repr.lowleveltype),
  941. ('index', Signed)))
  942. def ll_striter(string):
  943. if typeOf(string) == string_repr.lowleveltype:
  944. TP = string_repr.iterator_repr.lowleveltype.TO
  945. elif typeOf(string) == unicode_repr.lowleveltype:
  946. TP = unicode_repr.iterator_repr.lowleveltype.TO
  947. else:
  948. raise TypeError("Unknown string type %s" % (typeOf(string),))
  949. iter = malloc(TP)
  950. iter.string = string
  951. iter.index = 0
  952. return iter
  953. def ll_strnext(iter):
  954. chars = iter.string.chars
  955. index = iter.index
  956. if index >= len(chars):
  957. raise StopIteration
  958. iter.index = index + 1
  959. return chars[index]
  960. string_repr.iterator_repr = StringIteratorRepr()
  961. unicode_repr.iterator_repr = UnicodeIteratorRepr()
  962. # these should be in rclass, but circular imports prevent (also it's
  963. # not that insane that a string constant is built in this file).
  964. instance_str_prefix = string_repr.convert_const("<")
  965. instance_str_infix = string_repr.convert_const(" object at 0x")
  966. instance_str_suffix = string_repr.convert_const(">")
  967. null_str = string_repr.convert_const("NULL")
  968. unboxed_instance_str_prefix = string_repr.convert_const("<unboxed ")
  969. unboxed_instance_str_suffix = string_repr.convert_const(">")