PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/rpython/ootypesystem/rstr.py

http://github.com/pypy/pypy
Python | 444 lines | 357 code | 53 blank | 34 comment | 67 complexity | fd5d84080037677da9f1e14cee2bbd36 MD5 | raw file
  1. from pypy.tool.pairtype import pairtype
  2. from pypy.rlib.rarithmetic import ovfcheck
  3. from pypy.rpython.error import TyperError
  4. from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
  5. AbstractUniCharRepr, AbstractStringIteratorRepr,\
  6. AbstractLLHelpers, AbstractUnicodeRepr
  7. from pypy.rpython.rmodel import IntegerRepr
  8. from pypy.rpython.lltypesystem.lltype import Ptr, Char, UniChar, typeOf,\
  9. cast_primitive
  10. from pypy.rpython.ootypesystem import ootype
  11. from pypy.rpython.rmodel import Repr
  12. # TODO: investigate if it's possible and it's worth to concatenate a
  13. # String and a Char directly without passing to Char-->String
  14. # conversion
  15. class BaseOOStringRepr(Repr):
  16. def __init__(self, *args):
  17. AbstractStringRepr.__init__(self, *args)
  18. self.ll = LLHelpers
  19. def convert_const(self, value):
  20. if value is None:
  21. return self.lowleveltype._null
  22. if not isinstance(value, self.basetype):
  23. raise TyperError("not a str: %r" % (value,))
  24. return self.make_string(value)
  25. def make_string(self, value):
  26. raise NotImplementedError
  27. def make_iterator_repr(self):
  28. return self.string_iterator_repr
  29. def _list_length_items(self, hop, v_lst, LIST):
  30. # ootypesystem list has a different interface that
  31. # lltypesystem list, so we don't need to calculate the lenght
  32. # here and to pass the 'items' array. Let's pass the list
  33. # itself and let LLHelpers.join to manipulate it directly.
  34. c_length = hop.inputconst(ootype.Void, None)
  35. return c_length, v_lst
  36. class StringRepr(BaseOOStringRepr, AbstractStringRepr):
  37. lowleveltype = ootype.String
  38. basetype = str
  39. def make_string(self, value):
  40. return ootype.make_string(value)
  41. def ll_decode_latin1(self, value):
  42. sb = ootype.new(ootype.UnicodeBuilder)
  43. length = value.ll_strlen()
  44. sb.ll_allocate(length)
  45. for i in range(length):
  46. c = value.ll_stritem_nonneg(i)
  47. sb.ll_append_char(cast_primitive(UniChar, c))
  48. return sb.ll_build()
  49. class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
  50. lowleveltype = ootype.Unicode
  51. basetype = basestring
  52. def make_string(self, value):
  53. return ootype.make_unicode(value)
  54. def ll_str(self, value):
  55. if not value:
  56. return self.ll.ll_constant('None')
  57. sb = ootype.new(ootype.StringBuilder)
  58. lgt = value.ll_strlen()
  59. sb.ll_allocate(lgt)
  60. for i in range(lgt):
  61. c = value.ll_stritem_nonneg(i)
  62. if ord(c) > 127:
  63. raise UnicodeEncodeError("%d > 127, not ascii" % ord(c))
  64. sb.ll_append_char(cast_primitive(Char, c))
  65. return sb.ll_build()
  66. def ll_encode_latin1(self, value):
  67. sb = ootype.new(ootype.StringBuilder)
  68. length = value.ll_strlen()
  69. sb.ll_allocate(length)
  70. for i in range(length):
  71. c = value.ll_stritem_nonneg(i)
  72. if ord(c) > 255:
  73. raise UnicodeEncodeError("%d > 255, not latin-1" % ord(c))
  74. sb.ll_append_char(cast_primitive(Char, c))
  75. return sb.ll_build()
  76. class CharRepr(AbstractCharRepr, StringRepr):
  77. lowleveltype = Char
  78. class UniCharRepr(AbstractUniCharRepr, UnicodeRepr):
  79. lowleveltype = UniChar
  80. class __extend__(pairtype(UniCharRepr, UnicodeRepr)):
  81. def convert_from_to((r_from, r_to), v, llops):
  82. rstr = llops.rtyper.type_system.rstr
  83. if r_from == unichar_repr and r_to == unicode_repr:
  84. return llops.gendirectcall(r_from.ll.ll_unichr2unicode, v)
  85. return NotImplemented
  86. class LLHelpers(AbstractLLHelpers):
  87. def ll_chr2str(ch):
  88. return ootype.oostring(ch, -1)
  89. def ll_str2unicode(s):
  90. return ootype.oounicode(s, -1)
  91. def ll_unichr2unicode(ch):
  92. return ootype.oounicode(ch, -1)
  93. def ll_strhash(s):
  94. if not s:
  95. return 0
  96. return s.ll_hash()
  97. def ll_strfasthash(s):
  98. return s.ll_hash()
  99. def ll_char_mul(ch, times):
  100. if times < 0:
  101. times = 0
  102. if typeOf(ch) == Char:
  103. buf = ootype.new(ootype.StringBuilder)
  104. else:
  105. buf = ootype.new(ootype.UnicodeBuilder)
  106. buf.ll_allocate(times)
  107. i = 0
  108. while i<times:
  109. buf.ll_append_char(ch)
  110. i+= 1
  111. return buf.ll_build()
  112. def ll_str_mul(s, times):
  113. if times < 0:
  114. times = 0
  115. try:
  116. size = ovfcheck(s.ll_strlen() * times)
  117. except OverflowError:
  118. raise MemoryError
  119. buf = ootype.new(typeOf(s).builder)
  120. buf.ll_allocate(size)
  121. for i in xrange(times):
  122. buf.ll_append(s)
  123. return buf.ll_build()
  124. def ll_streq(s1, s2):
  125. if s1 is None:
  126. return s2 is None
  127. return s1.ll_streq(s2)
  128. def ll_strcmp(s1, s2):
  129. if not s1 and not s2:
  130. return True
  131. if not s1 or not s2:
  132. return False
  133. return s1.ll_strcmp(s2)
  134. def ll_join(s, length_dummy, lst):
  135. length = lst.ll_length()
  136. buf = ootype.new(typeOf(s).builder)
  137. # TODO: check if it's worth of preallocating the buffer with
  138. # the exact length
  139. ## itemslen = 0
  140. ## i = 0
  141. ## while i < length:
  142. ## itemslen += lst.ll_getitem_fast(i).ll_strlen()
  143. ## i += 1
  144. ## resultlen = itemslen + s.ll_strlen()*(length-1)
  145. ## buf.ll_allocate(resultlen)
  146. i = 0
  147. while i < length-1:
  148. item = lst.ll_getitem_fast(i)
  149. buf.ll_append(item)
  150. buf.ll_append(s)
  151. i += 1
  152. if length > 0:
  153. lastitem = lst.ll_getitem_fast(i)
  154. buf.ll_append(lastitem)
  155. return buf.ll_build()
  156. def ll_join_chars(length_dummy, lst, RES):
  157. if RES is ootype.String:
  158. target = Char
  159. buf = ootype.new(ootype.StringBuilder)
  160. else:
  161. target = UniChar
  162. buf = ootype.new(ootype.UnicodeBuilder)
  163. length = lst.ll_length()
  164. buf.ll_allocate(length)
  165. i = 0
  166. while i < length:
  167. buf.ll_append_char(cast_primitive(target, lst.ll_getitem_fast(i)))
  168. i += 1
  169. return buf.ll_build()
  170. def ll_join_strs(length_dummy, lst):
  171. if typeOf(lst).ITEM == ootype.String:
  172. buf = ootype.new(ootype.StringBuilder)
  173. else:
  174. buf = ootype.new(ootype.UnicodeBuilder)
  175. length = lst.ll_length()
  176. #buf.ll_allocate(length)
  177. i = 0
  178. while i < length:
  179. buf.ll_append(lst.ll_getitem_fast(i))
  180. i += 1
  181. return buf.ll_build()
  182. def ll_stringslice_startonly(s, start):
  183. return s.ll_substring(start, s.ll_strlen() - start)
  184. def ll_stringslice_startstop(s, start, stop):
  185. length = s.ll_strlen()
  186. if stop > length:
  187. stop = length
  188. # If start > stop, return a empty string. This can happen if the start
  189. # is greater than the length of the string.
  190. if start > stop:
  191. start = stop
  192. return s.ll_substring(start, stop-start)
  193. def ll_stringslice_minusone(s):
  194. return s.ll_substring(0, s.ll_strlen()-1)
  195. def ll_split_chr(RESULT, s, c, max):
  196. return RESULT.ll_convert_from_array(s.ll_split_chr(c, max))
  197. def ll_rsplit_chr(RESULT, s, c, max):
  198. return RESULT.ll_convert_from_array(s.ll_rsplit_chr(c, max))
  199. def ll_int(s, base):
  200. if not 2 <= base <= 36:
  201. raise ValueError
  202. strlen = s.ll_strlen()
  203. i = 0
  204. #XXX: only space is allowed as white space for now
  205. while i < strlen and s.ll_stritem_nonneg(i) == ' ':
  206. i += 1
  207. if not i < strlen:
  208. raise ValueError
  209. #check sign
  210. sign = 1
  211. if s.ll_stritem_nonneg(i) == '-':
  212. sign = -1
  213. i += 1
  214. elif s.ll_stritem_nonneg(i) == '+':
  215. i += 1;
  216. # skip whitespaces between sign and digits
  217. while i < strlen and s.ll_stritem_nonneg(i) == ' ':
  218. i += 1
  219. #now get digits
  220. val = 0
  221. oldpos = i
  222. while i < strlen:
  223. c = ord(s.ll_stritem_nonneg(i))
  224. if ord('a') <= c <= ord('z'):
  225. digit = c - ord('a') + 10
  226. elif ord('A') <= c <= ord('Z'):
  227. digit = c - ord('A') + 10
  228. elif ord('0') <= c <= ord('9'):
  229. digit = c - ord('0')
  230. else:
  231. break
  232. if digit >= base:
  233. break
  234. val = val * base + digit
  235. i += 1
  236. if i == oldpos:
  237. raise ValueError # catch strings like '+' and '+ '
  238. #skip trailing whitespace
  239. while i < strlen and s.ll_stritem_nonneg(i) == ' ':
  240. i += 1
  241. if not i == strlen:
  242. raise ValueError
  243. return sign * val
  244. def ll_float(ll_str):
  245. return ootype.ooparse_float(ll_str)
  246. # interface to build strings:
  247. # x = ll_build_start(n)
  248. # ll_build_push(x, next_string, 0)
  249. # ll_build_push(x, next_string, 1)
  250. # ...
  251. # ll_build_push(x, next_string, n-1)
  252. # s = ll_build_finish(x)
  253. def ll_build_start(parts_count):
  254. return ootype.new(ootype.StringBuilder)
  255. def ll_build_push(buf, next_string, index):
  256. buf.ll_append(next_string)
  257. def ll_build_finish(buf):
  258. return buf.ll_build()
  259. def ll_constant(s):
  260. return ootype.make_string(s)
  261. ll_constant._annspecialcase_ = 'specialize:memo'
  262. def do_stringformat(cls, hop, sourcevarsrepr):
  263. InstanceRepr = hop.rtyper.type_system.rclass.InstanceRepr
  264. string_repr = hop.rtyper.type_system.rstr.string_repr
  265. s_str = hop.args_s[0]
  266. assert s_str.is_constant()
  267. s = s_str.const
  268. c_append = hop.inputconst(ootype.Void, 'll_append')
  269. c_build = hop.inputconst(ootype.Void, 'll_build')
  270. cm1 = hop.inputconst(ootype.Signed, -1)
  271. c8 = hop.inputconst(ootype.Signed, 8)
  272. c10 = hop.inputconst(ootype.Signed, 10)
  273. c16 = hop.inputconst(ootype.Signed, 16)
  274. c_StringBuilder = hop.inputconst(ootype.Void, ootype.StringBuilder)
  275. v_buf = hop.genop("new", [c_StringBuilder], resulttype=ootype.StringBuilder)
  276. things = cls.parse_fmt_string(s)
  277. argsiter = iter(sourcevarsrepr)
  278. for thing in things:
  279. if isinstance(thing, tuple):
  280. code = thing[0]
  281. vitem, r_arg = argsiter.next()
  282. if not hasattr(r_arg, 'll_str'):
  283. raise TyperError("ll_str unsupported for: %r" % r_arg)
  284. if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)):
  285. vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
  286. elif code == 'd':
  287. assert isinstance(r_arg, IntegerRepr)
  288. vchunk = hop.genop('oostring', [vitem, c10], resulttype=ootype.String)
  289. elif code == 'f':
  290. #assert isinstance(r_arg, FloatRepr)
  291. vchunk = hop.gendirectcall(r_arg.ll_str, vitem)
  292. elif code == 'x':
  293. assert isinstance(r_arg, IntegerRepr)
  294. vchunk = hop.genop('oostring', [vitem, c16], resulttype=ootype.String)
  295. elif code == 'o':
  296. assert isinstance(r_arg, IntegerRepr)
  297. vchunk = hop.genop('oostring', [vitem, c8], resulttype=ootype.String)
  298. else:
  299. raise TyperError, "%%%s is not RPython" % (code, )
  300. else:
  301. vchunk = hop.inputconst(string_repr, thing)
  302. #i = inputconst(Signed, i)
  303. #hop.genop('setarrayitem', [vtemp, i, vchunk])
  304. hop.genop('oosend', [c_append, v_buf, vchunk], resulttype=ootype.Void)
  305. hop.exception_cannot_occur() # to ignore the ZeroDivisionError of '%'
  306. return hop.genop('oosend', [c_build, v_buf], resulttype=ootype.String)
  307. do_stringformat = classmethod(do_stringformat)
  308. def add_helpers():
  309. dic = {}
  310. for name, meth in ootype.String._GENERIC_METHODS.iteritems():
  311. if name in LLHelpers.__dict__:
  312. continue
  313. n_args = len(meth.ARGS)
  314. args = ', '.join(['arg%d' % i for i in range(n_args)])
  315. code = """
  316. def %s(obj, %s):
  317. return obj.%s(%s)
  318. """ % (name, args, name, args)
  319. exec code in dic
  320. setattr(LLHelpers, name, staticmethod(dic[name]))
  321. add_helpers()
  322. del add_helpers
  323. do_stringformat = LLHelpers.do_stringformat
  324. char_repr = CharRepr()
  325. unichar_repr = UniCharRepr()
  326. char_repr.ll = LLHelpers
  327. unichar_repr.ll = LLHelpers
  328. string_repr = StringRepr()
  329. StringRepr.repr = string_repr
  330. StringRepr.char_repr = char_repr
  331. emptystr = string_repr.convert_const("")
  332. unicode_repr = UnicodeRepr()
  333. UnicodeRepr.repr = unicode_repr
  334. UnicodeRepr.char_repr = unichar_repr
  335. class StringIteratorRepr(AbstractStringIteratorRepr):
  336. lowleveltype = ootype.Record({'string': string_repr.lowleveltype,
  337. 'index': ootype.Signed})
  338. def __init__(self):
  339. self.ll_striter = ll_striter
  340. self.ll_strnext = ll_strnext
  341. class UnicodeIteratorRepr(AbstractStringIteratorRepr):
  342. lowleveltype = ootype.Record({'string': unicode_repr.lowleveltype,
  343. 'index': ootype.Signed})
  344. def __init__(self):
  345. self.ll_striter = ll_unicodeiter
  346. self.ll_strnext = ll_strnext
  347. def ll_striter(string):
  348. iter = ootype.new(string_repr.string_iterator_repr.lowleveltype)
  349. iter.string = string
  350. iter.index = 0
  351. return iter
  352. def ll_unicodeiter(string):
  353. iter = ootype.new(unicode_repr.string_iterator_repr.lowleveltype)
  354. iter.string = string
  355. iter.index = 0
  356. return iter
  357. def ll_strnext(iter):
  358. string = iter.string
  359. index = iter.index
  360. if index >= string.ll_strlen():
  361. raise StopIteration
  362. iter.index = index + 1
  363. return string.ll_stritem_nonneg(index)
  364. StringRepr.string_iterator_repr = StringIteratorRepr()
  365. UnicodeRepr.string_iterator_repr = UnicodeIteratorRepr()
  366. # these should be in rclass, but circular imports prevent (also it's
  367. # not that insane that a string constant is built in this file).
  368. instance_str_prefix = string_repr.convert_const("<")
  369. instance_str_suffix = string_repr.convert_const(" object>")
  370. unboxed_instance_str_prefix = string_repr.convert_const("<unboxed ")
  371. unboxed_instance_str_suffix = string_repr.convert_const(">")
  372. list_str_open_bracket = string_repr.convert_const("[")
  373. list_str_close_bracket = string_repr.convert_const("]")
  374. list_str_sep = string_repr.convert_const(", ")