PageRenderTime 69ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/objspace/std/ropeunicodeobject.py

https://bitbucket.org/pypy/pypy/
Python | 1036 lines | 1003 code | 27 blank | 6 comment | 38 complexity | 4161debeb60e5d96ff5916fcab954473 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from pypy.objspace.std.model import registerimplementation, W_Object
  2. from pypy.objspace.std.register_all import register_all
  3. from pypy.objspace.std.multimethod import FailedToImplement
  4. from pypy.interpreter.error import OperationError, operationerrfmt
  5. from pypy.interpreter import gateway
  6. from pypy.objspace.std.stringobject import W_StringObject
  7. from pypy.objspace.std.unicodeobject import _normalize_index
  8. from pypy.objspace.std.ropeobject import W_RopeObject
  9. from pypy.objspace.std.noneobject import W_NoneObject
  10. from pypy.rlib import rope
  11. from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
  12. from pypy.objspace.std import unicodeobject, slicetype, iterobject
  13. from pypy.objspace.std.tupleobject import W_TupleObject
  14. from pypy.rlib.rarithmetic import intmask, ovfcheck
  15. from pypy.module.unicodedata import unicodedb
  16. from pypy.tool.sourcetools import func_with_new_name
  17. from pypy.objspace.std.formatting import mod_format
  18. from pypy.objspace.std.unicodeobject import (
  19. format__Unicode_ANY as format__RopeUnicode_ANY)
  20. def wrapunicode(space, uni):
  21. return W_RopeUnicodeObject(rope.rope_from_unicode(uni))
  22. def unicode_from_string(space, w_str):
  23. from pypy.objspace.std.unicodetype import getdefaultencoding
  24. assert isinstance(w_str, W_RopeObject)
  25. encoding = getdefaultencoding(space)
  26. w_retval = decode_string(space, w_str, encoding, "strict")
  27. if not space.isinstance_w(w_retval, space.w_unicode):
  28. raise operationerrfmt(
  29. space.w_TypeError,
  30. "decoder did not return an unicode object (type '%s')",
  31. space.type(w_retval).getname(space))
  32. assert isinstance(w_retval, W_RopeUnicodeObject)
  33. return w_retval
  34. def decode_string(space, w_str, encoding, errors):
  35. from pypy.objspace.std.unicodetype import decode_object
  36. if errors is None or errors == "strict":
  37. node = w_str._node
  38. if encoding == 'ascii':
  39. result = rope.str_decode_ascii(node)
  40. if result is not None:
  41. return W_RopeUnicodeObject(result)
  42. elif encoding == 'latin-1':
  43. assert node.is_bytestring()
  44. return W_RopeUnicodeObject(node)
  45. elif encoding == "utf-8":
  46. result = rope.str_decode_utf8(node)
  47. if result is not None:
  48. return W_RopeUnicodeObject(result)
  49. w_result = decode_object(space, w_str, encoding, errors)
  50. return w_result
  51. def encode_unicode(space, w_unistr, encoding, errors):
  52. from pypy.objspace.std.unicodetype import getdefaultencoding, \
  53. _get_encoding_and_errors, encode_object
  54. from pypy.objspace.std.ropeobject import W_RopeObject
  55. if errors is None or errors == "strict":
  56. node = w_unistr._node
  57. if encoding == 'ascii':
  58. result = rope.unicode_encode_ascii(node)
  59. if result is not None:
  60. return W_RopeObject(result)
  61. elif encoding == 'latin-1':
  62. result = rope.unicode_encode_latin1(node)
  63. if result is not None:
  64. return W_RopeObject(result)
  65. elif encoding == "utf-8":
  66. result = rope.unicode_encode_utf8(node)
  67. if result is not None:
  68. return W_RopeObject(result)
  69. return encode_object(space, w_unistr, encoding, errors)
  70. class W_RopeUnicodeObject(unicodeobject.W_AbstractUnicodeObject):
  71. from pypy.objspace.std.unicodetype import unicode_typedef as typedef
  72. _immutable_fields_ = ['_node']
  73. def __init__(w_self, node):
  74. w_self._node = node
  75. def __repr__(w_self):
  76. """ representation for debugging purposes """
  77. return "%s(%r)" % (w_self.__class__.__name__, w_self._node)
  78. def unwrap(w_self, space):
  79. # for testing
  80. return w_self._node.flatten_unicode()
  81. def str_w(w_self, space):
  82. return space.str_w(space.str(w_self))
  83. def create_if_subclassed(w_self):
  84. if type(w_self) is W_RopeUnicodeObject:
  85. return w_self
  86. return W_RopeUnicodeObject(w_self._node)
  87. def unicode_w(self, space):
  88. return self._node.flatten_unicode()
  89. W_RopeUnicodeObject.EMPTY = W_RopeUnicodeObject(rope.LiteralStringNode.EMPTY)
  90. registerimplementation(W_RopeUnicodeObject)
  91. def _isspace(uchar_ord):
  92. return unicodedb.isspace(uchar_ord)
  93. def ropeunicode_w(space, w_str):
  94. if isinstance(w_str, W_RopeUnicodeObject):
  95. return w_str._node
  96. if isinstance(w_str, W_RopeObject):
  97. return unicode_from_string(space, w_str)._node
  98. return rope.LiteralUnicodeNode(space.unicode_w(w_str))
  99. class W_RopeUnicodeIterObject(iterobject.W_AbstractIterObject):
  100. from pypy.objspace.std.itertype import iter_typedef as typedef
  101. def __init__(w_self, w_rope, index=0):
  102. w_self.node = node = w_rope._node
  103. w_self.item_iter = rope.ItemIterator(node)
  104. w_self.index = index
  105. def iter__RopeUnicode(space, w_uni):
  106. return W_RopeUnicodeIterObject(w_uni)
  107. # Helper for converting int/long
  108. def unicode_to_decimal_w(space, w_unistr):
  109. if not isinstance(w_unistr, W_RopeUnicodeObject):
  110. raise OperationError(space.w_TypeError,
  111. space.wrap("expected unicode"))
  112. unistr = w_unistr._node
  113. length = unistr.length()
  114. result = ['\0'] * length
  115. digits = [ '0', '1', '2', '3', '4',
  116. '5', '6', '7', '8', '9']
  117. iter = rope.ItemIterator(unistr)
  118. for i in range(length):
  119. uchr = iter.nextint()
  120. if unicodedb.isspace(uchr):
  121. result[i] = ' '
  122. continue
  123. try:
  124. result[i] = digits[unicodedb.decimal(uchr)]
  125. except KeyError:
  126. if 0 < uchr < 256:
  127. result[i] = chr(uchr)
  128. else:
  129. w_encoding = space.wrap('decimal')
  130. w_start = space.wrap(i)
  131. w_end = space.wrap(i+1)
  132. w_reason = space.wrap('invalid decimal Unicode string')
  133. raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
  134. return ''.join(result)
  135. # string-to-unicode delegation
  136. def delegate_Rope2RopeUnicode(space, w_rope):
  137. w_uni = unicode_from_string(space, w_rope)
  138. assert isinstance(w_uni, W_RopeUnicodeObject) # help the annotator!
  139. return w_uni
  140. def str__RopeUnicode(space, w_uni):
  141. return space.call_method(w_uni, 'encode')
  142. def lt__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  143. n1 = w_str1._node
  144. n2 = w_str2._node
  145. return space.newbool(rope.compare(n1, n2) < 0)
  146. def le__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  147. n1 = w_str1._node
  148. n2 = w_str2._node
  149. return space.newbool(rope.compare(n1, n2) <= 0)
  150. def _eq(w_str1, w_str2):
  151. result = rope.eq(w_str1._node, w_str2._node)
  152. return result
  153. def eq__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  154. return space.newbool(_eq(w_str1, w_str2))
  155. def eq__RopeUnicode_Rope(space, w_runi, w_rope):
  156. from pypy.objspace.std.unicodeobject import _unicode_string_comparison
  157. return _unicode_string_comparison(space, w_runi, w_rope,
  158. False, unicode_from_string)
  159. def ne__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  160. return space.newbool(not _eq(w_str1, w_str2))
  161. def ne__RopeUnicode_Rope(space, w_runi, w_rope):
  162. from pypy.objspace.std.unicodeobject import _unicode_string_comparison
  163. return _unicode_string_comparison(space, w_runi, w_rope,
  164. True, unicode_from_string)
  165. def gt__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  166. n1 = w_str1._node
  167. n2 = w_str2._node
  168. return space.newbool(rope.compare(n1, n2) > 0)
  169. def ge__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
  170. n1 = w_str1._node
  171. n2 = w_str2._node
  172. return space.newbool(rope.compare(n1, n2) >= 0)
  173. def ord__RopeUnicode(space, w_uni):
  174. if w_uni._node.length() != 1:
  175. raise OperationError(space.w_TypeError, space.wrap('ord() expected a character'))
  176. return space.wrap(w_uni._node.getint(0))
  177. def getnewargs__RopeUnicode(space, w_uni):
  178. return space.newtuple([W_RopeUnicodeObject(w_uni._node)])
  179. def add__RopeUnicode_RopeUnicode(space, w_left, w_right):
  180. right = w_right._node
  181. left = w_left._node
  182. try:
  183. return W_RopeUnicodeObject(rope.concatenate(left, right))
  184. except OverflowError:
  185. raise OperationError(space.w_OverflowError,
  186. space.wrap("string too long"))
  187. def add__Rope_RopeUnicode(space, w_left, w_right):
  188. return space.add(unicode_from_string(space, w_left) , w_right)
  189. def add__RopeUnicode_Rope(space, w_left, w_right):
  190. return space.add(w_left, unicode_from_string(space, w_right))
  191. def contains__RopeUnicode_RopeUnicode(space, w_container, w_item):
  192. item = w_item._node
  193. container = w_container._node
  194. return space.newbool(rope.find(container, item) != -1)
  195. def contains__Rope_RopeUnicode(space, w_container, w_item):
  196. return space.contains(unicode_from_string(space, w_container), w_item )
  197. def unicode_join__RopeUnicode_ANY(space, w_self, w_list):
  198. l_w = space.listview(w_list)
  199. delim = w_self._node
  200. totlen = 0
  201. if len(l_w) == 0:
  202. return W_RopeUnicodeObject.EMPTY
  203. if (len(l_w) == 1 and
  204. space.is_w(space.type(l_w[0]), space.w_unicode)):
  205. return l_w[0]
  206. values_list = []
  207. for i in range(len(l_w)):
  208. w_item = l_w[i]
  209. if isinstance(w_item, W_RopeUnicodeObject):
  210. # shortcut for performane
  211. item = w_item._node
  212. elif space.isinstance_w(w_item, space.w_str):
  213. item = unicode_from_string(space, w_item)._node
  214. else:
  215. msg = 'sequence item %d: expected string or Unicode'
  216. raise operationerrfmt(space.w_TypeError, msg, i)
  217. values_list.append(item)
  218. try:
  219. return W_RopeUnicodeObject(rope.join(w_self._node, values_list))
  220. except OverflowError:
  221. raise OperationError(space.w_OverflowError,
  222. space.wrap("string too long"))
  223. def hash__RopeUnicode(space, w_uni):
  224. return space.wrap(rope.hash_rope(w_uni._node))
  225. def len__RopeUnicode(space, w_uni):
  226. return space.wrap(w_uni._node.length())
  227. def getitem__RopeUnicode_ANY(space, w_uni, w_index):
  228. ival = space.getindex_w(w_index, space.w_IndexError, "string index")
  229. uni = w_uni._node
  230. ulen = uni.length()
  231. if ival < 0:
  232. ival += ulen
  233. if ival < 0 or ival >= ulen:
  234. exc = space.call_function(space.w_IndexError,
  235. space.wrap("unicode index out of range"))
  236. raise OperationError(space.w_IndexError, exc)
  237. return W_RopeUnicodeObject(uni.getrope(ival))
  238. def getitem__RopeUnicode_Slice(space, w_uni, w_slice):
  239. node = w_uni._node
  240. length = node.length()
  241. start, stop, step, sl = w_slice.indices4(space, length)
  242. if sl == 0:
  243. return W_RopeUnicodeObject.EMPTY
  244. return W_RopeUnicodeObject(rope.getslice(node, start, stop, step, sl))
  245. def getslice__RopeUnicode_ANY_ANY(space, w_uni, w_start, w_stop):
  246. node = w_uni._node
  247. length = node.length()
  248. start, stop = normalize_simple_slice(space, length, w_start, w_stop)
  249. sl = stop - start
  250. if sl == 0:
  251. return W_RopeUnicodeObject.EMPTY
  252. return W_RopeUnicodeObject(rope.getslice(node, start, stop, 1, sl))
  253. def mul__RopeUnicode_ANY(space, w_uni, w_times):
  254. try:
  255. times = space.getindex_w(w_times, space.w_OverflowError)
  256. except OperationError, e:
  257. if e.match(space, space.w_TypeError):
  258. raise FailedToImplement
  259. raise
  260. node = w_uni._node
  261. try:
  262. return W_RopeUnicodeObject(rope.multiply(node, times))
  263. except OverflowError:
  264. raise OperationError(space.w_OverflowError,
  265. space.wrap("string too long"))
  266. def mul__ANY_RopeUnicode(space, w_times, w_uni):
  267. return mul__RopeUnicode_ANY(space, w_uni, w_times)
  268. def make_generic(funcname):
  269. def func(space, w_self):
  270. node = w_self._node
  271. if node.length() == 0:
  272. return space.w_False
  273. iter = rope.ItemIterator(node)
  274. for idx in range(node.length()):
  275. if not getattr(unicodedb, funcname)(iter.nextint()):
  276. return space.w_False
  277. return space.w_True
  278. return func_with_new_name(func, "unicode_%s__RopeUnicode" % (funcname, ))
  279. unicode_isspace__RopeUnicode = make_generic("isspace")
  280. unicode_isalpha__RopeUnicode = make_generic("isalpha")
  281. unicode_isalnum__RopeUnicode = make_generic("isalnum")
  282. unicode_isdecimal__RopeUnicode = make_generic("isdecimal")
  283. unicode_isdigit__RopeUnicode = make_generic("isdigit")
  284. unicode_isnumeric__RopeUnicode = make_generic("isnumeric")
  285. def unicode_islower__RopeUnicode(space, w_unicode):
  286. cased = False
  287. iter = rope.ItemIterator(w_unicode._node)
  288. while 1:
  289. try:
  290. ch = iter.nextint()
  291. except StopIteration:
  292. return space.newbool(cased)
  293. if (unicodedb.isupper(ch) or
  294. unicodedb.istitle(ch)):
  295. return space.w_False
  296. if not cased and unicodedb.islower(ch):
  297. cased = True
  298. def unicode_isupper__RopeUnicode(space, w_unicode):
  299. cased = False
  300. iter = rope.ItemIterator(w_unicode._node)
  301. while 1:
  302. try:
  303. ch = iter.nextint()
  304. except StopIteration:
  305. return space.newbool(cased)
  306. if (unicodedb.islower(ch) or
  307. unicodedb.istitle(ch)):
  308. return space.w_False
  309. if not cased and unicodedb.isupper(ch):
  310. cased = True
  311. def unicode_istitle__RopeUnicode(space, w_unicode):
  312. cased = False
  313. previous_is_cased = False
  314. iter = rope.ItemIterator(w_unicode._node)
  315. while 1:
  316. try:
  317. ch = iter.nextint()
  318. except StopIteration:
  319. return space.newbool(cased)
  320. if (unicodedb.isupper(ch) or
  321. unicodedb.istitle(ch)):
  322. if previous_is_cased:
  323. return space.w_False
  324. previous_is_cased = cased = True
  325. elif unicodedb.islower(ch):
  326. if not previous_is_cased:
  327. return space.w_False
  328. previous_is_cased = cased = True
  329. else:
  330. previous_is_cased = False
  331. def _contains(i, uni):
  332. return unichr(i) in uni
  333. def unicode_strip__RopeUnicode_None(space, w_self, w_chars):
  334. return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _isspace))
  335. def unicode_strip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
  336. return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains,
  337. w_chars._node.flatten_unicode()))
  338. def unicode_strip__RopeUnicode_Rope(space, w_self, w_chars):
  339. return space.call_method(w_self, 'strip',
  340. unicode_from_string(space, w_chars))
  341. def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars):
  342. return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace))
  343. def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
  344. return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains,
  345. w_chars._node.flatten_unicode()))
  346. def unicode_lstrip__RopeUnicode_Rope(space, w_self, w_chars):
  347. return space.call_method(w_self, 'lstrip',
  348. unicode_from_string(space, w_chars))
  349. def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars):
  350. return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace))
  351. def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
  352. return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains,
  353. w_chars._node.flatten_unicode()))
  354. def unicode_rstrip__RopeUnicode_Rope(space, w_self, w_chars):
  355. return space.call_method(w_self, 'rstrip',
  356. unicode_from_string(space, w_chars))
  357. def unicode_capitalize__RopeUnicode(space, w_self):
  358. input = w_self._node
  359. length = input.length()
  360. if length == 0:
  361. return w_self
  362. result = [u'\0'] * length
  363. iter = rope.ItemIterator(input)
  364. result[0] = unichr(unicodedb.toupper(iter.nextint()))
  365. for i in range(1, length):
  366. result[i] = unichr(unicodedb.tolower(iter.nextint()))
  367. return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
  368. def unicode_title__RopeUnicode(space, w_self):
  369. input = w_self._node
  370. length = input.length()
  371. if length == 0:
  372. return w_self
  373. result = [u'\0'] * length
  374. iter = rope.ItemIterator(input)
  375. previous_is_cased = False
  376. for i in range(input.length()):
  377. unichar = iter.nextint()
  378. if previous_is_cased:
  379. result[i] = unichr(unicodedb.tolower(unichar))
  380. else:
  381. result[i] = unichr(unicodedb.totitle(unichar))
  382. previous_is_cased = unicodedb.iscased(unichar)
  383. return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
  384. def _local_transform(node, transform):
  385. l = node.length()
  386. res = [u' '] * l
  387. iter = rope.ItemIterator(node)
  388. for i in range(l):
  389. ch = iter.nextint()
  390. res[i] = transform(ch)
  391. return W_RopeUnicodeObject(rope.rope_from_unicharlist(res))
  392. _local_transform._annspecialcase_ = "specialize:arg(1)"
  393. def _tolower(ordch):
  394. return unichr(unicodedb.tolower(ordch))
  395. def unicode_lower__RopeUnicode(space, w_self):
  396. return _local_transform(w_self._node, _tolower)
  397. def _toupper(ordch):
  398. return unichr(unicodedb.toupper(ordch))
  399. def unicode_upper__RopeUnicode(space, w_self):
  400. return _local_transform(w_self._node, _toupper)
  401. def _swapcase(ordch):
  402. if unicodedb.islower(ordch):
  403. return unichr(unicodedb.toupper(ordch))
  404. elif unicodedb.isupper(ordch):
  405. return unichr(unicodedb.tolower(ordch))
  406. else:
  407. return unichr(ordch)
  408. def unicode_swapcase__RopeUnicode(space, w_self):
  409. return _local_transform(w_self._node, _swapcase)
  410. def _convert_idx_params(space, w_self, w_start, w_end):
  411. self = w_self._node
  412. length = w_self._node.length()
  413. if space.is_w(w_start, space.w_None):
  414. w_start = space.wrap(0)
  415. if space.is_w(w_end, space.w_None):
  416. w_end = space.len(w_self)
  417. start = slicetype.adapt_bound(space, length, w_start)
  418. end = slicetype.adapt_bound(space, length, w_end)
  419. assert start >= 0
  420. assert end >= 0
  421. return (self, start, end)
  422. def unicode_endswith__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  423. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  424. return space.newbool(rope.endswith(self, w_substr._node, start, end))
  425. def unicode_startswith__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  426. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  427. # XXX this stuff can be waaay better for ootypebased backends if
  428. # we re-use more of our rpython machinery (ie implement startswith
  429. # with additional parameters as rpython)
  430. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  431. return space.newbool(rope.startswith(self, w_substr._node, start, end))
  432. def unicode_startswith__RopeUnicode_Tuple_ANY_ANY(space, w_unistr, w_prefixes,
  433. w_start, w_end):
  434. unistr, start, end = _convert_idx_params(space, w_unistr, w_start, w_end)
  435. for w_prefix in space.fixedview(w_prefixes):
  436. prefix = ropeunicode_w(space, w_prefix)
  437. if rope.startswith(unistr, prefix, start, end):
  438. return space.w_True
  439. return space.w_False
  440. def unicode_endswith__RopeUnicode_Tuple_ANY_ANY(space, w_unistr, w_suffixes,
  441. w_start, w_end):
  442. unistr, start, end = _convert_idx_params(space, w_unistr, w_start, w_end)
  443. for w_suffix in space.fixedview(w_suffixes):
  444. suffix = ropeunicode_w(space, w_suffix)
  445. if rope.endswith(unistr, suffix, start, end):
  446. return space.w_True
  447. return space.w_False
  448. def _to_unichar_w(space, w_char):
  449. try:
  450. unistr = ropeunicode_w(space, w_char)
  451. except OperationError, e:
  452. if e.match(space, space.w_TypeError):
  453. msg = 'The fill character cannot be converted to Unicode'
  454. raise OperationError(space.w_TypeError, space.wrap(msg))
  455. else:
  456. raise
  457. if unistr.length() != 1:
  458. raise OperationError(space.w_TypeError, space.wrap('The fill character must be exactly one character long'))
  459. return unistr
  460. def unicode_center__RopeUnicode_ANY_ANY(space, w_self, w_width, w_fillchar):
  461. self = w_self._node
  462. length = self.length()
  463. width = space.int_w(w_width)
  464. fillchar = _to_unichar_w(space, w_fillchar)
  465. padding = width - length
  466. if padding < 0:
  467. return w_self.create_if_subclassed()
  468. offset = padding // 2
  469. pre = rope.multiply(fillchar, offset)
  470. post = rope.multiply(fillchar, (padding - offset))
  471. centered = rope.rebalance([pre, self, post])
  472. return W_RopeUnicodeObject(centered)
  473. def unicode_ljust__RopeUnicode_ANY_ANY(space, w_self, w_width, w_fillchar):
  474. self = w_self._node
  475. length = self.length()
  476. width = space.int_w(w_width)
  477. fillchar = _to_unichar_w(space, w_fillchar)
  478. padding = width - length
  479. if padding < 0:
  480. return w_self.create_if_subclassed()
  481. resultnode = rope.concatenate(self, rope.multiply(fillchar, padding))
  482. return W_RopeUnicodeObject(resultnode)
  483. def unicode_rjust__RopeUnicode_ANY_ANY(space, w_self, w_width, w_fillchar):
  484. self = w_self._node
  485. length = self.length()
  486. width = space.int_w(w_width)
  487. fillchar = _to_unichar_w(space, w_fillchar)
  488. padding = width - length
  489. if padding < 0:
  490. return w_self.create_if_subclassed()
  491. resultnode = rope.concatenate(rope.multiply(fillchar, padding), self)
  492. return W_RopeUnicodeObject(resultnode)
  493. def unicode_zfill__RopeUnicode_ANY(space, w_self, w_width):
  494. self = w_self._node
  495. length = self.length()
  496. width = space.int_w(w_width)
  497. zero = rope.LiteralStringNode.PREBUILT[ord("0")]
  498. if self.length() == 0:
  499. return W_RopeUnicodeObject(
  500. rope.multiply(zero, width))
  501. padding = width - length
  502. if padding <= 0:
  503. return w_self.create_if_subclassed()
  504. firstchar = self.getunichar(0)
  505. if firstchar in (u'+', u'-'):
  506. return W_RopeUnicodeObject(rope.rebalance(
  507. [rope.LiteralStringNode.PREBUILT[ord(firstchar)],
  508. rope.multiply(zero, padding),
  509. rope.getslice_one(self, 1, length)]))
  510. else:
  511. return W_RopeUnicodeObject(rope.concatenate(
  512. rope.multiply(zero, padding), self))
  513. def unicode_splitlines__RopeUnicode_ANY(space, w_self, w_keepends):
  514. keepends = bool(space.int_w(w_keepends)) # truth value, but type checked
  515. node = w_self._node
  516. return space.newlist(
  517. [W_RopeUnicodeObject(n) for n in rope.splitlines(node, keepends)])
  518. def unicode_find__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  519. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  520. sub = w_substr._node
  521. return space.wrap(rope.find(self, sub, start, end))
  522. def unicode_rfind__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  523. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  524. self = self.flatten_unicode()
  525. sub = w_substr._node.flatten_unicode()
  526. res = self.rfind(sub, start, end)
  527. return space.wrap(res)
  528. def unicode_index__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  529. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  530. sub = w_substr._node
  531. res = rope.find(self, sub, start, end)
  532. if res < 0:
  533. raise OperationError(space.w_ValueError,
  534. space.wrap("substring not found in string.index"))
  535. return space.wrap(res)
  536. def unicode_rindex__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  537. # XXX works but flattens string
  538. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  539. self = self.flatten_unicode()
  540. sub = w_substr._node.flatten_unicode()
  541. res = self.rfind(sub, start, end)
  542. if res < 0:
  543. raise OperationError(space.w_ValueError,
  544. space.wrap("substring not found in string.rindex"))
  545. return space.wrap(res)
  546. def unicode_count__RopeUnicode_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  547. self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
  548. assert start >= 0
  549. assert end >= 0
  550. iter = rope.FindIterator(self, w_substr._node, start, end)
  551. i = 0
  552. while 1:
  553. try:
  554. index = iter.next()
  555. except StopIteration:
  556. break
  557. i += 1
  558. return space.wrap(i)
  559. def unicode_split__RopeUnicode_None_ANY(space, w_self, w_none, w_maxsplit):
  560. selfnode = w_self._node
  561. maxsplit = space.int_w(w_maxsplit)
  562. res_w = [W_RopeUnicodeObject(node)
  563. for node in rope.split_chars(selfnode, maxsplit, _isspace)]
  564. return space.newlist(res_w)
  565. def unicode_split__RopeUnicode_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
  566. maxsplit = space.int_w(w_maxsplit)
  567. start = 0
  568. selfnode = w_self._node
  569. delimnode = w_delim._node
  570. delimlen = delimnode.length()
  571. if delimlen == 0:
  572. raise OperationError(space.w_ValueError, space.wrap("empty separator"))
  573. res_w = [W_RopeUnicodeObject(node)
  574. for node in rope.split(selfnode, delimnode, maxsplit)]
  575. return space.newlist(res_w)
  576. def unicode_rsplit__RopeUnicode_None_ANY(space, w_self, w_none, w_maxsplit):
  577. selfnode = w_self._node
  578. maxsplit = space.int_w(w_maxsplit)
  579. res_w = [W_RopeUnicodeObject(node)
  580. for node in rope.rsplit_chars(selfnode, maxsplit, _isspace)]
  581. return space.newlist(res_w)
  582. def unicode_rsplit__RopeUnicode_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
  583. # XXX works but flattens
  584. self = w_self._node.flatten_unicode()
  585. delim = w_delim._node.flatten_unicode()
  586. maxsplit = space.int_w(w_maxsplit)
  587. delim_len = len(delim)
  588. if delim_len == 0:
  589. raise OperationError(space.w_ValueError,
  590. space.wrap('empty separator'))
  591. parts = []
  592. if len(self) == 0:
  593. return space.newlist([])
  594. start = 0
  595. end = len(self)
  596. while maxsplit != 0:
  597. index = self.rfind(delim, 0, end)
  598. if index < 0:
  599. break
  600. parts.append(W_RopeUnicodeObject(
  601. rope.getslice_one(w_self._node, index+delim_len, end)))
  602. end = index
  603. maxsplit -= 1
  604. parts.append(W_RopeUnicodeObject(
  605. rope.getslice_one(w_self._node, 0, end)))
  606. parts.reverse()
  607. return space.newlist(parts)
  608. def _split_into_chars(self, maxsplit):
  609. if maxsplit == 0:
  610. return [self]
  611. index = 0
  612. end = self.length()
  613. parts = [rope.LiteralStringNode.EMPTY]
  614. maxsplit -= 1
  615. while maxsplit != 0:
  616. if index >= end:
  617. break
  618. parts.append(self.getrope(index))
  619. index += 1
  620. maxsplit -= 1
  621. parts.append(rope.getslice_one(self, index, self.length()))
  622. return parts
  623. def unicode_replace__RopeUnicode_RopeUnicode_RopeUnicode_ANY(
  624. space, w_self, w_old, w_new, w_maxsplit):
  625. self = w_self._node
  626. old = w_old._node
  627. maxsplit = space.int_w(w_maxsplit)
  628. oldlength = old.length()
  629. if not oldlength:
  630. parts = _split_into_chars(self, maxsplit)
  631. try:
  632. return W_RopeUnicodeObject(rope.join(w_new._node, parts))
  633. except OverflowError:
  634. raise OperationError(space.w_OverflowError,
  635. space.wrap("string too long"))
  636. substrings = rope.split(self, old, maxsplit)
  637. if not substrings:
  638. return w_self.create_if_subclassed()
  639. try:
  640. return W_RopeUnicodeObject(rope.join(w_new._node, substrings))
  641. except OverflowError:
  642. raise OperationError(space.w_OverflowError,
  643. space.wrap("string too long"))
  644. def unicode_encode__RopeUnicode_ANY_ANY(space, w_unistr,
  645. w_encoding=None,
  646. w_errors=None):
  647. from pypy.objspace.std.unicodetype import getdefaultencoding, \
  648. _get_encoding_and_errors
  649. encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
  650. if encoding is None:
  651. encoding = getdefaultencoding(space)
  652. return encode_unicode(space, w_unistr, encoding, errors)
  653. def unicode_partition__RopeUnicode_RopeUnicode(space, w_unistr, w_unisub):
  654. self = w_unistr._node
  655. sub = w_unisub._node
  656. if not sub.length():
  657. raise OperationError(space.w_ValueError,
  658. space.wrap("empty separator"))
  659. pos = rope.find(self, sub)
  660. if pos == -1:
  661. return space.newtuple([w_unistr, W_RopeUnicodeObject.EMPTY,
  662. W_RopeUnicodeObject.EMPTY])
  663. else:
  664. return space.newtuple(
  665. [W_RopeUnicodeObject(rope.getslice_one(self, 0, pos)),
  666. w_unisub,
  667. W_RopeUnicodeObject(rope.getslice_one(self, pos + sub.length(),
  668. self.length()))])
  669. def unicode_rpartition__RopeUnicode_RopeUnicode(space, w_unistr, w_unisub):
  670. # XXX works but flattens
  671. unistr = w_unistr._node.flatten_unicode()
  672. unisub = w_unisub._node.flatten_unicode()
  673. if not unisub:
  674. raise OperationError(space.w_ValueError,
  675. space.wrap("empty separator"))
  676. pos = unistr.rfind(unisub)
  677. if pos == -1:
  678. return space.newtuple([W_RopeUnicodeObject.EMPTY,
  679. W_RopeUnicodeObject.EMPTY, w_unistr])
  680. else:
  681. assert pos >= 0
  682. return space.newtuple([space.wrap(unistr[:pos]), w_unisub,
  683. space.wrap(unistr[pos+len(unisub):])])
  684. def unicode_expandtabs__RopeUnicode_ANY(space, w_self, w_tabsize):
  685. from pypy.objspace.std.ropeobject import _tabindent
  686. self = w_self._node
  687. tabsize = space.int_w(w_tabsize)
  688. splitted = rope.split(self, rope.LiteralStringNode.PREBUILT[ord('\t')])
  689. last = splitted[0]
  690. expanded = [last]
  691. for i in range(1, len(splitted)):
  692. expanded.append(rope.multiply(rope.LiteralStringNode.PREBUILT[ord(" ")],
  693. _tabindent(last, tabsize)))
  694. last = splitted[i]
  695. expanded.append(last)
  696. try:
  697. return W_RopeUnicodeObject(rope.rebalance(expanded))
  698. except OverflowError:
  699. raise OperationError(space.w_OverflowError,
  700. space.wrap("string too long"))
  701. def unicode_translate__RopeUnicode_ANY(space, w_self, w_table):
  702. self = w_self._node
  703. w_sys = space.getbuiltinmodule('sys')
  704. maxunicode = space.int_w(space.getattr(w_sys, space.wrap("maxunicode")))
  705. result = []
  706. iter = rope.ItemIterator(self)
  707. for i in range(self.length()):
  708. crope = iter.nextrope()
  709. char = crope.getint(0)
  710. try:
  711. w_newval = space.getitem(w_table, space.wrap(char))
  712. except OperationError, e:
  713. if e.match(space, space.w_LookupError):
  714. result.append(crope)
  715. else:
  716. raise
  717. else:
  718. if space.is_w(w_newval, space.w_None):
  719. continue
  720. elif space.isinstance_w(w_newval, space.w_int):
  721. newval = space.int_w(w_newval)
  722. if newval < 0 or newval > maxunicode:
  723. raise OperationError(
  724. space.w_TypeError,
  725. space.wrap("character mapping must be in range(0x%x)" % (maxunicode + 1,)))
  726. result.append(rope.rope_from_unichar(unichr(newval)))
  727. elif space.isinstance_w(w_newval, space.w_unicode):
  728. result.append(ropeunicode_w(space, w_newval))
  729. else:
  730. raise OperationError(
  731. space.w_TypeError,
  732. space.wrap("character mapping must return integer, None or unicode"))
  733. return W_RopeUnicodeObject(rope.join(rope.LiteralStringNode.EMPTY, result))
  734. # Move this into the _codecs module as 'unicodeescape_string (Remember to cater for quotes)'
  735. def repr__RopeUnicode(space, w_unicode):
  736. hexdigits = "0123456789abcdef"
  737. node = w_unicode._node
  738. size = node.length()
  739. singlequote = doublequote = False
  740. iter = rope.ItemIterator(node)
  741. for i in range(size):
  742. c = iter.nextunichar()
  743. if singlequote and doublequote:
  744. break
  745. if c == u'\'':
  746. singlequote = True
  747. elif c == u'"':
  748. doublequote = True
  749. if singlequote and not doublequote:
  750. quote = '"'
  751. else:
  752. quote = '\''
  753. result = ['u', quote]
  754. iter = rope.ItemIterator(node)
  755. j = 0
  756. while j < size:
  757. code = iter.nextint()
  758. if code >= 0x10000:
  759. result.extend(['\\', "U",
  760. hexdigits[(code >> 28) & 0xf],
  761. hexdigits[(code >> 24) & 0xf],
  762. hexdigits[(code >> 20) & 0xf],
  763. hexdigits[(code >> 16) & 0xf],
  764. hexdigits[(code >> 12) & 0xf],
  765. hexdigits[(code >> 8) & 0xf],
  766. hexdigits[(code >> 4) & 0xf],
  767. hexdigits[(code >> 0) & 0xf],
  768. ])
  769. j += 1
  770. continue
  771. if code >= 0xD800 and code < 0xDC00:
  772. if j < size - 1:
  773. code2 = iter.nextint()
  774. # XXX this is wrong: if the next if is false,
  775. # code2 is lost
  776. if code2 >= 0xDC00 and code2 <= 0xDFFF:
  777. code = (((code & 0x03FF) << 10) | (code2 & 0x03FF)) + 0x00010000
  778. result.extend(['\\', "U",
  779. hexdigits[(code >> 28) & 0xf],
  780. hexdigits[(code >> 24) & 0xf],
  781. hexdigits[(code >> 20) & 0xf],
  782. hexdigits[(code >> 16) & 0xf],
  783. hexdigits[(code >> 12) & 0xf],
  784. hexdigits[(code >> 8) & 0xf],
  785. hexdigits[(code >> 4) & 0xf],
  786. hexdigits[(code >> 0) & 0xf],
  787. ])
  788. j += 2
  789. continue
  790. if code >= 0x100:
  791. result.extend(['\\', "u",
  792. hexdigits[(code >> 12) & 0xf],
  793. hexdigits[(code >> 8) & 0xf],
  794. hexdigits[(code >> 4) & 0xf],
  795. hexdigits[(code >> 0) & 0xf],
  796. ])
  797. j += 1
  798. continue
  799. if code == ord('\\') or code == ord(quote):
  800. result.append('\\')
  801. result.append(chr(code))
  802. j += 1
  803. continue
  804. if code == ord('\t'):
  805. result.append('\\')
  806. result.append('t')
  807. j += 1
  808. continue
  809. if code == ord('\r'):
  810. result.append('\\')
  811. result.append('r')
  812. j += 1
  813. continue
  814. if code == ord('\n'):
  815. result.append('\\')
  816. result.append('n')
  817. j += 1
  818. continue
  819. if code < ord(' ') or code >= 0x7f:
  820. result.extend(['\\', "x",
  821. hexdigits[(code >> 4) & 0xf],
  822. hexdigits[(code >> 0) & 0xf],
  823. ])
  824. j += 1
  825. continue
  826. result.append(chr(code))
  827. j += 1
  828. result.append(quote)
  829. return W_RopeObject(rope.rope_from_charlist(result))
  830. def mod__RopeUnicode_ANY(space, w_format, w_values):
  831. return mod_format(space, w_format, w_values, do_unicode=True)
  832. def buffer__RopeUnicode(space, w_unicode):
  833. from pypy.rlib.rstruct.unichar import pack_unichar
  834. charlist = []
  835. node = w_unicode._node
  836. iter = rope.ItemIterator(node)
  837. for idx in range(node.length()):
  838. unich = unichr(iter.nextint())
  839. pack_unichar(unich, charlist)
  840. from pypy.interpreter.buffer import StringBuffer
  841. return space.wrap(StringBuffer(''.join(charlist)))
  842. # methods of the iterator
  843. def iter__RopeUnicodeIter(space, w_ropeiter):
  844. return w_ropeiter
  845. def next__RopeUnicodeIter(space, w_ropeiter):
  846. if w_ropeiter.node is None:
  847. raise OperationError(space.w_StopIteration, space.w_None)
  848. try:
  849. unichar = w_ropeiter.item_iter.nextunichar()
  850. w_item = space.wrap(unichar)
  851. except StopIteration:
  852. w_ropeiter.node = None
  853. w_ropeiter.char_iter = None
  854. raise OperationError(space.w_StopIteration, space.w_None)
  855. w_ropeiter.index += 1
  856. return w_item
  857. # XXX __length_hint__()
  858. ##def len__RopeUnicodeIter(space, w_ropeiter):
  859. ## if w_ropeiter.node is None:
  860. ## return space.wrap(0)
  861. ## index = w_ropeiter.index
  862. ## length = w_ropeiter.node.length()
  863. ## result = length - index
  864. ## if result < 0:
  865. ## return space.wrap(0)
  866. ## return space.wrap(result)
  867. from pypy.objspace.std import unicodetype
  868. register_all(vars(), unicodetype)
  869. # str.strip(unicode) needs to convert self to unicode and call unicode.strip we
  870. # use the following magic to register strip_string_unicode as a String
  871. # multimethod.
  872. # XXX couldn't string and unicode _share_ the multimethods that make up their
  873. # methods?
  874. class str_methods:
  875. from pypy.objspace.std import stringtype
  876. W_RopeUnicodeObject = W_RopeUnicodeObject
  877. from pypy.objspace.std.ropeobject import W_RopeObject
  878. def str_strip__Rope_RopeUnicode(space, w_self, w_chars):
  879. return space.call_method(unicode_from_string(space, w_self),
  880. 'strip', w_chars)
  881. def str_lstrip__Rope_RopeUnicode(space, w_self, w_chars):
  882. return space.call_method(unicode_from_string(space, w_self),
  883. 'lstrip', w_chars)
  884. def str_rstrip__Rope_RopeUnicode(space, w_self, w_chars):
  885. return space.call_method(unicode_from_string(space, w_self),
  886. 'rstrip', w_chars)
  887. def str_count__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  888. return space.call_method(unicode_from_string(space, w_self),
  889. 'count', w_substr, w_start, w_end)
  890. def str_find__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  891. return space.call_method(unicode_from_string(space, w_self),
  892. 'find', w_substr, w_start, w_end)
  893. def str_rfind__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  894. return space.call_method(unicode_from_string(space, w_self),
  895. 'rfind', w_substr, w_start, w_end)
  896. def str_index__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  897. return space.call_method(unicode_from_string(space, w_self),
  898. 'index', w_substr, w_start, w_end)
  899. def str_rindex__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
  900. return space.call_method(unicode_from_string(space, w_self),
  901. 'rindex', w_substr, w_start, w_end)
  902. def str_replace__Rope_RopeUnicode_RopeUnicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
  903. return space.call_method(unicode_from_string(space, w_self),
  904. 'replace', w_old, w_new, w_maxsplit)
  905. def str_split__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
  906. return space.call_method(unicode_from_string(space, w_self),
  907. 'split', w_delim, w_maxsplit)
  908. def str_rsplit__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
  909. return space.call_method(unicode_from_string(space, w_self),
  910. 'rsplit', w_delim, w_maxsplit)
  911. register_all(vars(), stringtype)