PageRenderTime 43ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/cpyext/bytesobject.py

https://bitbucket.org/pypy/pypy/
Python | 353 lines | 272 code | 17 blank | 64 comment | 20 complexity | f4372dc9b02e362aac80352f34e35567 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from pypy.interpreter.error import oefmt
  2. from rpython.rtyper.lltypesystem import rffi, lltype
  3. from pypy.module.cpyext.api import (
  4. cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
  5. PyVarObjectFields, Py_ssize_t, CONST_STRING, CANNOT_FAIL)
  6. from pypy.module.cpyext.pyerrors import PyErr_BadArgument
  7. from pypy.module.cpyext.pyobject import (
  8. PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
  9. make_typedescr, get_typedescr, as_pyobj, Py_IncRef, get_w_obj_and_decref,
  10. pyobj_has_w_obj)
  11. from pypy.objspace.std.bytesobject import W_BytesObject
  12. ##
  13. ## Implementation of PyBytesObject
  14. ## ================================
  15. ##
  16. ## The problem
  17. ## -----------
  18. ##
  19. ## PyString_AsString() must return a (non-movable) pointer to the underlying
  20. ## ob_sval, whereas pypy strings are movable. C code may temporarily store
  21. ## this address and use it, as long as it owns a reference to the PyObject.
  22. ## There is no "release" function to specify that the pointer is not needed
  23. ## any more.
  24. ##
  25. ## Also, the pointer may be used to fill the initial value of string. This is
  26. ## valid only when the string was just allocated, and is not used elsewhere.
  27. ##
  28. ## Solution
  29. ## --------
  30. ##
  31. ## PyBytesObject contains two additional members: the ob_size and a pointer to a
  32. ## char ob_sval; it may be NULL.
  33. ##
  34. ## - A string allocated by pypy will be converted into a PyBytesObject with a
  35. ## NULL buffer. The first time PyString_AsString() is called, memory is
  36. ## allocated (with flavor='raw') and content is copied.
  37. ##
  38. ## - A string allocated with PyString_FromStringAndSize(NULL, size) will
  39. ## allocate a PyBytesObject structure, and a buffer with the specified
  40. ## size+1, but the reference won't be stored in the global map; there is no
  41. ## corresponding object in pypy. When from_ref() or Py_INCREF() is called,
  42. ## the pypy string is created, and added to the global map of tracked
  43. ## objects. The buffer is then supposed to be immutable.
  44. ##
  45. ##- A buffer obtained from PyString_AS_STRING() could be mutable iff
  46. ## there is no corresponding pypy object for the string
  47. ##
  48. ## - _PyString_Resize() works only on not-yet-pypy'd strings, and returns a
  49. ## similar object.
  50. ##
  51. ## - PyString_Size() doesn't need to force the object.
  52. ##
  53. ## - There could be an (expensive!) check in from_ref() that the buffer still
  54. ## corresponds to the pypy gc-managed string.
  55. ##
  56. PyBytesObjectStruct = lltype.ForwardReference()
  57. PyBytesObject = lltype.Ptr(PyBytesObjectStruct)
  58. PyBytesObjectFields = PyVarObjectFields + \
  59. (("ob_shash", rffi.LONG), ("ob_sstate", rffi.INT), ("ob_sval", rffi.CArray(lltype.Char)))
  60. cpython_struct("PyStringObject", PyBytesObjectFields, PyBytesObjectStruct)
  61. @bootstrap_function
  62. def init_bytesobject(space):
  63. "Type description of PyBytesObject"
  64. make_typedescr(space.w_str.layout.typedef,
  65. basestruct=PyBytesObject.TO,
  66. attach=bytes_attach,
  67. dealloc=bytes_dealloc,
  68. realize=bytes_realize)
  69. PyString_Check, PyString_CheckExact = build_type_checkers("String", "w_str")
  70. def new_empty_str(space, length):
  71. """
  72. Allocate a PyBytesObject and its ob_sval, but without a corresponding
  73. interpreter object. The ob_sval may be mutated, until bytes_realize() is
  74. called. Refcount of the result is 1.
  75. """
  76. typedescr = get_typedescr(space.w_str.layout.typedef)
  77. py_obj = typedescr.allocate(space, space.w_str, length)
  78. py_str = rffi.cast(PyBytesObject, py_obj)
  79. py_str.c_ob_shash = -1
  80. py_str.c_ob_sstate = rffi.cast(rffi.INT, 0) # SSTATE_NOT_INTERNED
  81. return py_str
  82. def bytes_attach(space, py_obj, w_obj):
  83. """
  84. Copy RPython string object contents to a PyBytesObject. The
  85. c_ob_sval must not be modified.
  86. """
  87. py_str = rffi.cast(PyBytesObject, py_obj)
  88. s = space.str_w(w_obj)
  89. if py_str.c_ob_size < len(s):
  90. raise oefmt(space.w_ValueError,
  91. "bytes_attach called on object with ob_size %d but trying to store %d",
  92. py_str.c_ob_size, len(s))
  93. with rffi.scoped_nonmovingbuffer(s) as s_ptr:
  94. rffi.c_memcpy(py_str.c_ob_sval, s_ptr, len(s))
  95. py_str.c_ob_sval[len(s)] = '\0'
  96. py_str.c_ob_shash = space.hash_w(w_obj)
  97. py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
  98. def bytes_realize(space, py_obj):
  99. """
  100. Creates the string in the interpreter. The PyBytesObject ob_sval must not
  101. be modified after this call.
  102. """
  103. py_str = rffi.cast(PyBytesObject, py_obj)
  104. s = rffi.charpsize2str(py_str.c_ob_sval, py_str.c_ob_size)
  105. w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
  106. w_obj = space.allocate_instance(W_BytesObject, w_type)
  107. w_obj.__init__(s)
  108. py_str.c_ob_shash = space.hash_w(w_obj)
  109. py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
  110. track_reference(space, py_obj, w_obj)
  111. return w_obj
  112. @cpython_api([PyObject], lltype.Void, header=None)
  113. def bytes_dealloc(space, py_obj):
  114. """Frees allocated PyBytesObject resources.
  115. """
  116. from pypy.module.cpyext.object import _dealloc
  117. _dealloc(space, py_obj)
  118. #_______________________________________________________________________
  119. @cpython_api([CONST_STRING, Py_ssize_t], PyObject, result_is_ll=True)
  120. def PyString_FromStringAndSize(space, char_p, length):
  121. if char_p:
  122. s = rffi.charpsize2str(char_p, length)
  123. return make_ref(space, space.wrap(s))
  124. else:
  125. return rffi.cast(PyObject, new_empty_str(space, length))
  126. @cpython_api([CONST_STRING], PyObject)
  127. def PyString_FromString(space, char_p):
  128. s = rffi.charp2str(char_p)
  129. return space.wrap(s)
  130. @cpython_api([PyObject], rffi.CCHARP, error=0)
  131. def PyString_AsString(space, ref):
  132. return _PyString_AsString(space, ref)
  133. def _PyString_AsString(space, ref):
  134. if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
  135. pass # typecheck returned "ok" without forcing 'ref' at all
  136. elif not PyString_Check(space, ref): # otherwise, use the alternate way
  137. from pypy.module.cpyext.unicodeobject import (
  138. PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
  139. if PyUnicode_Check(space, ref):
  140. ref = _PyUnicode_AsDefaultEncodedString(space, ref, lltype.nullptr(rffi.CCHARP.TO))
  141. else:
  142. raise oefmt(space.w_TypeError,
  143. "expected string or Unicode object, %T found",
  144. from_ref(space, ref))
  145. ref_str = rffi.cast(PyBytesObject, ref)
  146. if not pyobj_has_w_obj(ref):
  147. # XXX Force the ref?
  148. bytes_realize(space, ref)
  149. return ref_str.c_ob_sval
  150. @cpython_api([rffi.VOIDP], rffi.CCHARP, error=0)
  151. def PyString_AS_STRING(space, void_ref):
  152. ref = rffi.cast(PyObject, void_ref)
  153. # if no w_str is associated with this ref,
  154. # return the c-level ptr as RW
  155. if not pyobj_has_w_obj(ref):
  156. py_str = rffi.cast(PyBytesObject, ref)
  157. return py_str.c_ob_sval
  158. return _PyString_AsString(space, ref)
  159. @cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)], rffi.INT_real, error=-1)
  160. def PyString_AsStringAndSize(space, ref, data, length):
  161. if not PyString_Check(space, ref):
  162. from pypy.module.cpyext.unicodeobject import (
  163. PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
  164. if PyUnicode_Check(space, ref):
  165. ref = _PyUnicode_AsDefaultEncodedString(space, ref, lltype.nullptr(rffi.CCHARP.TO))
  166. else:
  167. raise oefmt(space.w_TypeError,
  168. "expected string or Unicode object, %T found",
  169. from_ref(space, ref))
  170. if not pyobj_has_w_obj(ref):
  171. # force the ref
  172. bytes_realize(space, ref)
  173. ref_str = rffi.cast(PyBytesObject, ref)
  174. data[0] = ref_str.c_ob_sval
  175. if length:
  176. length[0] = ref_str.c_ob_size
  177. else:
  178. i = 0
  179. while ref_str.c_ob_sval[i] != '\0':
  180. i += 1
  181. if i != ref_str.c_ob_size:
  182. raise oefmt(space.w_TypeError,
  183. "expected string without null bytes")
  184. return 0
  185. @cpython_api([PyObject], Py_ssize_t, error=-1)
  186. def PyString_Size(space, ref):
  187. if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
  188. ref = rffi.cast(PyBytesObject, ref)
  189. return ref.c_ob_size
  190. else:
  191. w_obj = from_ref(space, ref)
  192. return space.len_w(w_obj)
  193. @cpython_api([PyObjectP, Py_ssize_t], rffi.INT_real, error=-1)
  194. def _PyString_Resize(space, ref, newsize):
  195. """A way to resize a string object even though it is "immutable". Only use this to
  196. build up a brand new string object; don't use this if the string may already be
  197. known in other parts of the code. It is an error to call this function if the
  198. refcount on the input string object is not one. Pass the address of an existing
  199. string object as an lvalue (it may be written into), and the new size desired.
  200. On success, *string holds the resized string object and 0 is returned;
  201. the address in *string may differ from its input value. If the reallocation
  202. fails, the original string object at *string is deallocated, *string is
  203. set to NULL, a memory exception is set, and -1 is returned.
  204. """
  205. # XXX always create a new string so far
  206. if pyobj_has_w_obj(ref[0]):
  207. raise oefmt(space.w_SystemError,
  208. "_PyString_Resize called on already created string")
  209. py_str = rffi.cast(PyBytesObject, ref[0])
  210. try:
  211. py_newstr = new_empty_str(space, newsize)
  212. except MemoryError:
  213. Py_DecRef(space, ref[0])
  214. ref[0] = lltype.nullptr(PyObject.TO)
  215. raise
  216. to_cp = newsize
  217. oldsize = py_str.c_ob_size
  218. if oldsize < newsize:
  219. to_cp = oldsize
  220. for i in range(to_cp):
  221. py_newstr.c_ob_sval[i] = py_str.c_ob_sval[i]
  222. Py_DecRef(space, ref[0])
  223. ref[0] = rffi.cast(PyObject, py_newstr)
  224. return 0
  225. @cpython_api([PyObject, PyObject], rffi.INT, error=CANNOT_FAIL)
  226. def _PyString_Eq(space, w_str1, w_str2):
  227. return space.eq_w(w_str1, w_str2)
  228. @cpython_api([PyObjectP, PyObject], lltype.Void, error=None)
  229. def PyString_Concat(space, ref, w_newpart):
  230. """Create a new string object in *string containing the contents of newpart
  231. appended to string; the caller will own the new reference. The reference to
  232. the old value of string will be stolen. If the new string cannot be created,
  233. the old reference to string will still be discarded and the value of
  234. *string will be set to NULL; the appropriate exception will be set."""
  235. old = ref[0]
  236. if not old:
  237. return
  238. ref[0] = lltype.nullptr(PyObject.TO)
  239. w_str = get_w_obj_and_decref(space, old)
  240. if w_newpart is not None and PyString_Check(space, old):
  241. # xxx if w_newpart is not a string or unicode or bytearray,
  242. # this might call __radd__() on it, whereas CPython raises
  243. # a TypeError in this case.
  244. w_newstr = space.add(w_str, w_newpart)
  245. ref[0] = make_ref(space, w_newstr)
  246. @cpython_api([PyObjectP, PyObject], lltype.Void, error=None)
  247. def PyString_ConcatAndDel(space, ref, newpart):
  248. """Create a new string object in *string containing the contents of newpart
  249. appended to string. This version decrements the reference count of newpart."""
  250. try:
  251. PyString_Concat(space, ref, newpart)
  252. finally:
  253. Py_DecRef(space, newpart)
  254. @cpython_api([PyObject, PyObject], PyObject)
  255. def PyString_Format(space, w_format, w_args):
  256. """Return a new string object from format and args. Analogous to format %
  257. args. The args argument must be a tuple."""
  258. return space.mod(w_format, w_args)
  259. @cpython_api([CONST_STRING], PyObject)
  260. def PyString_InternFromString(space, string):
  261. """A combination of PyString_FromString() and
  262. PyString_InternInPlace(), returning either a new string object that has
  263. been interned, or a new ("owned") reference to an earlier interned string
  264. object with the same value."""
  265. s = rffi.charp2str(string)
  266. return space.new_interned_str(s)
  267. @cpython_api([PyObjectP], lltype.Void)
  268. def PyString_InternInPlace(space, string):
  269. """Intern the argument *string in place. The argument must be the
  270. address of a pointer variable pointing to a Python string object.
  271. If there is an existing interned string that is the same as
  272. *string, it sets *string to it (decrementing the reference count
  273. of the old string object and incrementing the reference count of
  274. the interned string object), otherwise it leaves *string alone and
  275. interns it (incrementing its reference count). (Clarification:
  276. even though there is a lot of talk about reference counts, think
  277. of this function as reference-count-neutral; you own the object
  278. after the call if and only if you owned it before the call.)
  279. This function is not available in 3.x and does not have a PyBytes
  280. alias."""
  281. w_str = from_ref(space, string[0])
  282. w_str = space.new_interned_w_str(w_str)
  283. Py_DecRef(space, string[0])
  284. string[0] = make_ref(space, w_str)
  285. @cpython_api([PyObject, CONST_STRING, CONST_STRING], PyObject)
  286. def PyString_AsEncodedObject(space, w_str, encoding, errors):
  287. """Encode a string object using the codec registered for encoding and return
  288. the result as Python object. encoding and errors have the same meaning as
  289. the parameters of the same name in the string encode() method. The codec to
  290. be used is looked up using the Python codec registry. Return NULL if an
  291. exception was raised by the codec.
  292. This function is not available in 3.x and does not have a PyBytes alias."""
  293. if not PyString_Check(space, w_str):
  294. PyErr_BadArgument(space)
  295. w_encoding = w_errors = None
  296. if encoding:
  297. w_encoding = space.wrap(rffi.charp2str(encoding))
  298. if errors:
  299. w_errors = space.wrap(rffi.charp2str(errors))
  300. return space.call_method(w_str, 'encode', w_encoding, w_errors)
  301. @cpython_api([PyObject, CONST_STRING, CONST_STRING], PyObject)
  302. def PyString_AsDecodedObject(space, w_str, encoding, errors):
  303. """Decode a string object by passing it to the codec registered
  304. for encoding and return the result as Python object. encoding and
  305. errors have the same meaning as the parameters of the same name in
  306. the string encode() method. The codec to be used is looked up
  307. using the Python codec registry. Return NULL if an exception was
  308. raised by the codec.
  309. This function is not available in 3.x and does not have a PyBytes alias."""
  310. if not PyString_Check(space, w_str):
  311. PyErr_BadArgument(space)
  312. w_encoding = w_errors = None
  313. if encoding:
  314. w_encoding = space.wrap(rffi.charp2str(encoding))
  315. if errors:
  316. w_errors = space.wrap(rffi.charp2str(errors))
  317. return space.call_method(w_str, "decode", w_encoding, w_errors)
  318. @cpython_api([PyObject, PyObject], PyObject)
  319. def _PyString_Join(space, w_sep, w_seq):
  320. return space.call_method(w_sep, 'join', w_seq)