/Objects/bytearrayobject.c

http://unladen-swallow.googlecode.com/ · C · 3401 lines · 2820 code · 418 blank · 163 comment · 726 complexity · 2faea443ce28295f6ab8b550d9026b22 MD5 · raw file

Large files are truncated click here to view the full file

  1. /* PyBytes (bytearray) implementation */
  2. #define PY_SSIZE_T_CLEAN
  3. #include "Python.h"
  4. #include "structmember.h"
  5. #include "bytes_methods.h"
  6. static PyByteArrayObject *nullbytes = NULL;
  7. void
  8. PyByteArray_Fini(void)
  9. {
  10. Py_CLEAR(nullbytes);
  11. }
  12. int
  13. PyByteArray_Init(void)
  14. {
  15. nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
  16. if (nullbytes == NULL)
  17. return 0;
  18. nullbytes->ob_bytes = NULL;
  19. Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
  20. nullbytes->ob_exports = 0;
  21. return 1;
  22. }
  23. /* end nullbytes support */
  24. /* Helpers */
  25. static int
  26. _getbytevalue(PyObject* arg, int *value)
  27. {
  28. long face_value;
  29. if (PyBytes_CheckExact(arg)) {
  30. if (Py_SIZE(arg) != 1) {
  31. PyErr_SetString(PyExc_ValueError, "string must be of size 1");
  32. return 0;
  33. }
  34. *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
  35. return 1;
  36. }
  37. else if (PyInt_Check(arg) || PyLong_Check(arg)) {
  38. face_value = PyLong_AsLong(arg);
  39. }
  40. else {
  41. PyObject *index = PyNumber_Index(arg);
  42. if (index == NULL) {
  43. PyErr_Format(PyExc_TypeError,
  44. "an integer or string of size 1 is required");
  45. return 0;
  46. }
  47. face_value = PyLong_AsLong(index);
  48. Py_DECREF(index);
  49. }
  50. if (face_value < 0 || face_value >= 256) {
  51. /* this includes the OverflowError in case the long is too large */
  52. PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
  53. return 0;
  54. }
  55. *value = face_value;
  56. return 1;
  57. }
  58. static Py_ssize_t
  59. bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
  60. {
  61. if ( index != 0 ) {
  62. PyErr_SetString(PyExc_SystemError,
  63. "accessing non-existent bytes segment");
  64. return -1;
  65. }
  66. *ptr = (void *)self->ob_bytes;
  67. return Py_SIZE(self);
  68. }
  69. static Py_ssize_t
  70. bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
  71. {
  72. if ( index != 0 ) {
  73. PyErr_SetString(PyExc_SystemError,
  74. "accessing non-existent bytes segment");
  75. return -1;
  76. }
  77. *ptr = (void *)self->ob_bytes;
  78. return Py_SIZE(self);
  79. }
  80. static Py_ssize_t
  81. bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
  82. {
  83. if ( lenp )
  84. *lenp = Py_SIZE(self);
  85. return 1;
  86. }
  87. static Py_ssize_t
  88. bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
  89. {
  90. if ( index != 0 ) {
  91. PyErr_SetString(PyExc_SystemError,
  92. "accessing non-existent bytes segment");
  93. return -1;
  94. }
  95. *ptr = self->ob_bytes;
  96. return Py_SIZE(self);
  97. }
  98. static int
  99. bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
  100. {
  101. int ret;
  102. void *ptr;
  103. if (view == NULL) {
  104. obj->ob_exports++;
  105. return 0;
  106. }
  107. if (obj->ob_bytes == NULL)
  108. ptr = "";
  109. else
  110. ptr = obj->ob_bytes;
  111. ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
  112. if (ret >= 0) {
  113. obj->ob_exports++;
  114. }
  115. return ret;
  116. }
  117. static void
  118. bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
  119. {
  120. obj->ob_exports--;
  121. }
  122. static Py_ssize_t
  123. _getbuffer(PyObject *obj, Py_buffer *view)
  124. {
  125. PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
  126. if (buffer == NULL || buffer->bf_getbuffer == NULL)
  127. {
  128. PyErr_Format(PyExc_TypeError,
  129. "Type %.100s doesn't support the buffer API",
  130. Py_TYPE(obj)->tp_name);
  131. return -1;
  132. }
  133. if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
  134. return -1;
  135. return view->len;
  136. }
  137. static int
  138. _canresize(PyByteArrayObject *self)
  139. {
  140. if (self->ob_exports > 0) {
  141. PyErr_SetString(PyExc_BufferError,
  142. "Existing exports of data: object cannot be re-sized");
  143. return 0;
  144. }
  145. return 1;
  146. }
  147. /* Direct API functions */
  148. PyObject *
  149. PyByteArray_FromObject(PyObject *input)
  150. {
  151. return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
  152. input, NULL);
  153. }
  154. PyObject *
  155. PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
  156. {
  157. PyByteArrayObject *new;
  158. Py_ssize_t alloc;
  159. if (size < 0) {
  160. PyErr_SetString(PyExc_SystemError,
  161. "Negative size passed to PyByteArray_FromStringAndSize");
  162. return NULL;
  163. }
  164. new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
  165. if (new == NULL)
  166. return NULL;
  167. if (size == 0) {
  168. new->ob_bytes = NULL;
  169. alloc = 0;
  170. }
  171. else {
  172. alloc = size + 1;
  173. new->ob_bytes = PyMem_Malloc(alloc);
  174. if (new->ob_bytes == NULL) {
  175. Py_DECREF(new);
  176. return PyErr_NoMemory();
  177. }
  178. if (bytes != NULL)
  179. memcpy(new->ob_bytes, bytes, size);
  180. new->ob_bytes[size] = '\0'; /* Trailing null byte */
  181. }
  182. Py_SIZE(new) = size;
  183. new->ob_alloc = alloc;
  184. new->ob_exports = 0;
  185. return (PyObject *)new;
  186. }
  187. Py_ssize_t
  188. PyByteArray_Size(PyObject *self)
  189. {
  190. assert(self != NULL);
  191. assert(PyByteArray_Check(self));
  192. return PyByteArray_GET_SIZE(self);
  193. }
  194. char *
  195. PyByteArray_AsString(PyObject *self)
  196. {
  197. assert(self != NULL);
  198. assert(PyByteArray_Check(self));
  199. return PyByteArray_AS_STRING(self);
  200. }
  201. int
  202. PyByteArray_Resize(PyObject *self, Py_ssize_t size)
  203. {
  204. void *sval;
  205. Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
  206. assert(self != NULL);
  207. assert(PyByteArray_Check(self));
  208. assert(size >= 0);
  209. if (size == Py_SIZE(self)) {
  210. return 0;
  211. }
  212. if (!_canresize((PyByteArrayObject *)self)) {
  213. return -1;
  214. }
  215. if (size < alloc / 2) {
  216. /* Major downsize; resize down to exact size */
  217. alloc = size + 1;
  218. }
  219. else if (size < alloc) {
  220. /* Within allocated size; quick exit */
  221. Py_SIZE(self) = size;
  222. ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
  223. return 0;
  224. }
  225. else if (size <= alloc * 1.125) {
  226. /* Moderate upsize; overallocate similar to list_resize() */
  227. alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
  228. }
  229. else {
  230. /* Major upsize; resize up to exact size */
  231. alloc = size + 1;
  232. }
  233. sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
  234. if (sval == NULL) {
  235. PyErr_NoMemory();
  236. return -1;
  237. }
  238. ((PyByteArrayObject *)self)->ob_bytes = sval;
  239. Py_SIZE(self) = size;
  240. ((PyByteArrayObject *)self)->ob_alloc = alloc;
  241. ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
  242. return 0;
  243. }
  244. PyObject *
  245. PyByteArray_Concat(PyObject *a, PyObject *b)
  246. {
  247. Py_ssize_t size;
  248. Py_buffer va, vb;
  249. PyByteArrayObject *result = NULL;
  250. va.len = -1;
  251. vb.len = -1;
  252. if (_getbuffer(a, &va) < 0 ||
  253. _getbuffer(b, &vb) < 0) {
  254. PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
  255. Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
  256. goto done;
  257. }
  258. size = va.len + vb.len;
  259. if (size < 0) {
  260. return PyErr_NoMemory();
  261. goto done;
  262. }
  263. result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
  264. if (result != NULL) {
  265. memcpy(result->ob_bytes, va.buf, va.len);
  266. memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
  267. }
  268. done:
  269. if (va.len != -1)
  270. PyBuffer_Release(&va);
  271. if (vb.len != -1)
  272. PyBuffer_Release(&vb);
  273. return (PyObject *)result;
  274. }
  275. /* Functions stuffed into the type object */
  276. static Py_ssize_t
  277. bytes_length(PyByteArrayObject *self)
  278. {
  279. return Py_SIZE(self);
  280. }
  281. static PyObject *
  282. bytes_iconcat(PyByteArrayObject *self, PyObject *other)
  283. {
  284. Py_ssize_t mysize;
  285. Py_ssize_t size;
  286. Py_buffer vo;
  287. if (_getbuffer(other, &vo) < 0) {
  288. PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
  289. Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
  290. return NULL;
  291. }
  292. mysize = Py_SIZE(self);
  293. size = mysize + vo.len;
  294. if (size < 0) {
  295. PyBuffer_Release(&vo);
  296. return PyErr_NoMemory();
  297. }
  298. if (size < self->ob_alloc) {
  299. Py_SIZE(self) = size;
  300. self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
  301. }
  302. else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
  303. PyBuffer_Release(&vo);
  304. return NULL;
  305. }
  306. memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
  307. PyBuffer_Release(&vo);
  308. Py_INCREF(self);
  309. return (PyObject *)self;
  310. }
  311. static PyObject *
  312. bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
  313. {
  314. PyByteArrayObject *result;
  315. Py_ssize_t mysize;
  316. Py_ssize_t size;
  317. if (count < 0)
  318. count = 0;
  319. mysize = Py_SIZE(self);
  320. size = mysize * count;
  321. if (count != 0 && size / count != mysize)
  322. return PyErr_NoMemory();
  323. result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
  324. if (result != NULL && size != 0) {
  325. if (mysize == 1)
  326. memset(result->ob_bytes, self->ob_bytes[0], size);
  327. else {
  328. Py_ssize_t i;
  329. for (i = 0; i < count; i++)
  330. memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
  331. }
  332. }
  333. return (PyObject *)result;
  334. }
  335. static PyObject *
  336. bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
  337. {
  338. Py_ssize_t mysize;
  339. Py_ssize_t size;
  340. if (count < 0)
  341. count = 0;
  342. mysize = Py_SIZE(self);
  343. size = mysize * count;
  344. if (count != 0 && size / count != mysize)
  345. return PyErr_NoMemory();
  346. if (size < self->ob_alloc) {
  347. Py_SIZE(self) = size;
  348. self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
  349. }
  350. else if (PyByteArray_Resize((PyObject *)self, size) < 0)
  351. return NULL;
  352. if (mysize == 1)
  353. memset(self->ob_bytes, self->ob_bytes[0], size);
  354. else {
  355. Py_ssize_t i;
  356. for (i = 1; i < count; i++)
  357. memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
  358. }
  359. Py_INCREF(self);
  360. return (PyObject *)self;
  361. }
  362. static PyObject *
  363. bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
  364. {
  365. if (i < 0)
  366. i += Py_SIZE(self);
  367. if (i < 0 || i >= Py_SIZE(self)) {
  368. PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
  369. return NULL;
  370. }
  371. return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
  372. }
  373. static PyObject *
  374. bytes_subscript(PyByteArrayObject *self, PyObject *index)
  375. {
  376. if (PyIndex_Check(index)) {
  377. Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
  378. if (i == -1 && PyErr_Occurred())
  379. return NULL;
  380. if (i < 0)
  381. i += PyByteArray_GET_SIZE(self);
  382. if (i < 0 || i >= Py_SIZE(self)) {
  383. PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
  384. return NULL;
  385. }
  386. return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
  387. }
  388. else if (PySlice_Check(index)) {
  389. Py_ssize_t start, stop, step, slicelength, cur, i;
  390. if (PySlice_GetIndicesEx((PySliceObject *)index,
  391. PyByteArray_GET_SIZE(self),
  392. &start, &stop, &step, &slicelength) < 0) {
  393. return NULL;
  394. }
  395. if (slicelength <= 0)
  396. return PyByteArray_FromStringAndSize("", 0);
  397. else if (step == 1) {
  398. return PyByteArray_FromStringAndSize(self->ob_bytes + start,
  399. slicelength);
  400. }
  401. else {
  402. char *source_buf = PyByteArray_AS_STRING(self);
  403. char *result_buf = (char *)PyMem_Malloc(slicelength);
  404. PyObject *result;
  405. if (result_buf == NULL)
  406. return PyErr_NoMemory();
  407. for (cur = start, i = 0; i < slicelength;
  408. cur += step, i++) {
  409. result_buf[i] = source_buf[cur];
  410. }
  411. result = PyByteArray_FromStringAndSize(result_buf, slicelength);
  412. PyMem_Free(result_buf);
  413. return result;
  414. }
  415. }
  416. else {
  417. PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
  418. return NULL;
  419. }
  420. }
  421. static int
  422. bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
  423. PyObject *values)
  424. {
  425. Py_ssize_t avail, needed;
  426. void *bytes;
  427. Py_buffer vbytes;
  428. int res = 0;
  429. vbytes.len = -1;
  430. if (values == (PyObject *)self) {
  431. /* Make a copy and call this function recursively */
  432. int err;
  433. values = PyByteArray_FromObject(values);
  434. if (values == NULL)
  435. return -1;
  436. err = bytes_setslice(self, lo, hi, values);
  437. Py_DECREF(values);
  438. return err;
  439. }
  440. if (values == NULL) {
  441. /* del b[lo:hi] */
  442. bytes = NULL;
  443. needed = 0;
  444. }
  445. else {
  446. if (_getbuffer(values, &vbytes) < 0) {
  447. PyErr_Format(PyExc_TypeError,
  448. "can't set bytearray slice from %.100s",
  449. Py_TYPE(values)->tp_name);
  450. return -1;
  451. }
  452. needed = vbytes.len;
  453. bytes = vbytes.buf;
  454. }
  455. if (lo < 0)
  456. lo = 0;
  457. if (hi < lo)
  458. hi = lo;
  459. if (hi > Py_SIZE(self))
  460. hi = Py_SIZE(self);
  461. avail = hi - lo;
  462. if (avail < 0)
  463. lo = hi = avail = 0;
  464. if (avail != needed) {
  465. if (avail > needed) {
  466. if (!_canresize(self)) {
  467. res = -1;
  468. goto finish;
  469. }
  470. /*
  471. 0 lo hi old_size
  472. | |<----avail----->|<-----tomove------>|
  473. | |<-needed->|<-----tomove------>|
  474. 0 lo new_hi new_size
  475. */
  476. memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
  477. Py_SIZE(self) - hi);
  478. }
  479. /* XXX(nnorwitz): need to verify this can't overflow! */
  480. if (PyByteArray_Resize((PyObject *)self,
  481. Py_SIZE(self) + needed - avail) < 0) {
  482. res = -1;
  483. goto finish;
  484. }
  485. if (avail < needed) {
  486. /*
  487. 0 lo hi old_size
  488. | |<-avail->|<-----tomove------>|
  489. | |<----needed---->|<-----tomove------>|
  490. 0 lo new_hi new_size
  491. */
  492. memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
  493. Py_SIZE(self) - lo - needed);
  494. }
  495. }
  496. if (needed > 0)
  497. memcpy(self->ob_bytes + lo, bytes, needed);
  498. finish:
  499. if (vbytes.len != -1)
  500. PyBuffer_Release(&vbytes);
  501. return res;
  502. }
  503. static int
  504. bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
  505. {
  506. int ival;
  507. if (i < 0)
  508. i += Py_SIZE(self);
  509. if (i < 0 || i >= Py_SIZE(self)) {
  510. PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
  511. return -1;
  512. }
  513. if (value == NULL)
  514. return bytes_setslice(self, i, i+1, NULL);
  515. if (!_getbytevalue(value, &ival))
  516. return -1;
  517. self->ob_bytes[i] = ival;
  518. return 0;
  519. }
  520. static int
  521. bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
  522. {
  523. Py_ssize_t start, stop, step, slicelen, needed;
  524. char *bytes;
  525. if (PyIndex_Check(index)) {
  526. Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
  527. if (i == -1 && PyErr_Occurred())
  528. return -1;
  529. if (i < 0)
  530. i += PyByteArray_GET_SIZE(self);
  531. if (i < 0 || i >= Py_SIZE(self)) {
  532. PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
  533. return -1;
  534. }
  535. if (values == NULL) {
  536. /* Fall through to slice assignment */
  537. start = i;
  538. stop = i + 1;
  539. step = 1;
  540. slicelen = 1;
  541. }
  542. else {
  543. int ival;
  544. if (!_getbytevalue(values, &ival))
  545. return -1;
  546. self->ob_bytes[i] = (char)ival;
  547. return 0;
  548. }
  549. }
  550. else if (PySlice_Check(index)) {
  551. if (PySlice_GetIndicesEx((PySliceObject *)index,
  552. PyByteArray_GET_SIZE(self),
  553. &start, &stop, &step, &slicelen) < 0) {
  554. return -1;
  555. }
  556. }
  557. else {
  558. PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
  559. return -1;
  560. }
  561. if (values == NULL) {
  562. bytes = NULL;
  563. needed = 0;
  564. }
  565. else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
  566. /* Make a copy an call this function recursively */
  567. int err;
  568. values = PyByteArray_FromObject(values);
  569. if (values == NULL)
  570. return -1;
  571. err = bytes_ass_subscript(self, index, values);
  572. Py_DECREF(values);
  573. return err;
  574. }
  575. else {
  576. assert(PyByteArray_Check(values));
  577. bytes = ((PyByteArrayObject *)values)->ob_bytes;
  578. needed = Py_SIZE(values);
  579. }
  580. /* Make sure b[5:2] = ... inserts before 5, not before 2. */
  581. if ((step < 0 && start < stop) ||
  582. (step > 0 && start > stop))
  583. stop = start;
  584. if (step == 1) {
  585. if (slicelen != needed) {
  586. if (!_canresize(self))
  587. return -1;
  588. if (slicelen > needed) {
  589. /*
  590. 0 start stop old_size
  591. | |<---slicelen--->|<-----tomove------>|
  592. | |<-needed->|<-----tomove------>|
  593. 0 lo new_hi new_size
  594. */
  595. memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
  596. Py_SIZE(self) - stop);
  597. }
  598. if (PyByteArray_Resize((PyObject *)self,
  599. Py_SIZE(self) + needed - slicelen) < 0)
  600. return -1;
  601. if (slicelen < needed) {
  602. /*
  603. 0 lo hi old_size
  604. | |<-avail->|<-----tomove------>|
  605. | |<----needed---->|<-----tomove------>|
  606. 0 lo new_hi new_size
  607. */
  608. memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
  609. Py_SIZE(self) - start - needed);
  610. }
  611. }
  612. if (needed > 0)
  613. memcpy(self->ob_bytes + start, bytes, needed);
  614. return 0;
  615. }
  616. else {
  617. if (needed == 0) {
  618. /* Delete slice */
  619. Py_ssize_t cur, i;
  620. if (!_canresize(self))
  621. return -1;
  622. if (step < 0) {
  623. stop = start + 1;
  624. start = stop + step * (slicelen - 1) - 1;
  625. step = -step;
  626. }
  627. for (cur = start, i = 0;
  628. i < slicelen; cur += step, i++) {
  629. Py_ssize_t lim = step - 1;
  630. if (cur + step >= PyByteArray_GET_SIZE(self))
  631. lim = PyByteArray_GET_SIZE(self) - cur - 1;
  632. memmove(self->ob_bytes + cur - i,
  633. self->ob_bytes + cur + 1, lim);
  634. }
  635. /* Move the tail of the bytes, in one chunk */
  636. cur = start + slicelen*step;
  637. if (cur < PyByteArray_GET_SIZE(self)) {
  638. memmove(self->ob_bytes + cur - slicelen,
  639. self->ob_bytes + cur,
  640. PyByteArray_GET_SIZE(self) - cur);
  641. }
  642. if (PyByteArray_Resize((PyObject *)self,
  643. PyByteArray_GET_SIZE(self) - slicelen) < 0)
  644. return -1;
  645. return 0;
  646. }
  647. else {
  648. /* Assign slice */
  649. Py_ssize_t cur, i;
  650. if (needed != slicelen) {
  651. PyErr_Format(PyExc_ValueError,
  652. "attempt to assign bytes of size %zd "
  653. "to extended slice of size %zd",
  654. needed, slicelen);
  655. return -1;
  656. }
  657. for (cur = start, i = 0; i < slicelen; cur += step, i++)
  658. self->ob_bytes[cur] = bytes[i];
  659. return 0;
  660. }
  661. }
  662. }
  663. static int
  664. bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
  665. {
  666. static char *kwlist[] = {"source", "encoding", "errors", 0};
  667. PyObject *arg = NULL;
  668. const char *encoding = NULL;
  669. const char *errors = NULL;
  670. Py_ssize_t count;
  671. PyObject *it;
  672. PyObject *(*iternext)(PyObject *);
  673. if (Py_SIZE(self) != 0) {
  674. /* Empty previous contents (yes, do this first of all!) */
  675. if (PyByteArray_Resize((PyObject *)self, 0) < 0)
  676. return -1;
  677. }
  678. /* Parse arguments */
  679. if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
  680. &arg, &encoding, &errors))
  681. return -1;
  682. /* Make a quick exit if no first argument */
  683. if (arg == NULL) {
  684. if (encoding != NULL || errors != NULL) {
  685. PyErr_SetString(PyExc_TypeError,
  686. "encoding or errors without sequence argument");
  687. return -1;
  688. }
  689. return 0;
  690. }
  691. if (PyBytes_Check(arg)) {
  692. PyObject *new, *encoded;
  693. if (encoding != NULL) {
  694. encoded = PyCodec_Encode(arg, encoding, errors);
  695. if (encoded == NULL)
  696. return -1;
  697. assert(PyBytes_Check(encoded));
  698. }
  699. else {
  700. encoded = arg;
  701. Py_INCREF(arg);
  702. }
  703. new = bytes_iconcat(self, arg);
  704. Py_DECREF(encoded);
  705. if (new == NULL)
  706. return -1;
  707. Py_DECREF(new);
  708. return 0;
  709. }
  710. if (PyUnicode_Check(arg)) {
  711. /* Encode via the codec registry */
  712. PyObject *encoded, *new;
  713. if (encoding == NULL) {
  714. PyErr_SetString(PyExc_TypeError,
  715. "unicode argument without an encoding");
  716. return -1;
  717. }
  718. encoded = PyCodec_Encode(arg, encoding, errors);
  719. if (encoded == NULL)
  720. return -1;
  721. assert(PyBytes_Check(encoded));
  722. new = bytes_iconcat(self, encoded);
  723. Py_DECREF(encoded);
  724. if (new == NULL)
  725. return -1;
  726. Py_DECREF(new);
  727. return 0;
  728. }
  729. /* If it's not unicode, there can't be encoding or errors */
  730. if (encoding != NULL || errors != NULL) {
  731. PyErr_SetString(PyExc_TypeError,
  732. "encoding or errors without a string argument");
  733. return -1;
  734. }
  735. /* Is it an int? */
  736. count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
  737. if (count == -1 && PyErr_Occurred())
  738. PyErr_Clear();
  739. else {
  740. if (count < 0) {
  741. PyErr_SetString(PyExc_ValueError, "negative count");
  742. return -1;
  743. }
  744. if (count > 0) {
  745. if (PyByteArray_Resize((PyObject *)self, count))
  746. return -1;
  747. memset(self->ob_bytes, 0, count);
  748. }
  749. return 0;
  750. }
  751. /* Use the buffer API */
  752. if (PyObject_CheckBuffer(arg)) {
  753. Py_ssize_t size;
  754. Py_buffer view;
  755. if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
  756. return -1;
  757. size = view.len;
  758. if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
  759. if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
  760. goto fail;
  761. PyBuffer_Release(&view);
  762. return 0;
  763. fail:
  764. PyBuffer_Release(&view);
  765. return -1;
  766. }
  767. /* XXX Optimize this if the arguments is a list, tuple */
  768. /* Get the iterator */
  769. it = PyObject_GetIter(arg);
  770. if (it == NULL)
  771. return -1;
  772. iternext = *Py_TYPE(it)->tp_iternext;
  773. /* Run the iterator to exhaustion */
  774. for (;;) {
  775. PyObject *item;
  776. int rc, value;
  777. /* Get the next item */
  778. item = iternext(it);
  779. if (item == NULL) {
  780. if (PyErr_Occurred()) {
  781. if (!PyErr_ExceptionMatches(PyExc_StopIteration))
  782. goto error;
  783. PyErr_Clear();
  784. }
  785. break;
  786. }
  787. /* Interpret it as an int (__index__) */
  788. rc = _getbytevalue(item, &value);
  789. Py_DECREF(item);
  790. if (!rc)
  791. goto error;
  792. /* Append the byte */
  793. if (Py_SIZE(self) < self->ob_alloc)
  794. Py_SIZE(self)++;
  795. else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
  796. goto error;
  797. self->ob_bytes[Py_SIZE(self)-1] = value;
  798. }
  799. /* Clean up and return success */
  800. Py_DECREF(it);
  801. return 0;
  802. error:
  803. /* Error handling when it != NULL */
  804. Py_DECREF(it);
  805. return -1;
  806. }
  807. /* Mostly copied from string_repr, but without the
  808. "smart quote" functionality. */
  809. static PyObject *
  810. bytes_repr(PyByteArrayObject *self)
  811. {
  812. static const char *hexdigits = "0123456789abcdef";
  813. const char *quote_prefix = "bytearray(b";
  814. const char *quote_postfix = ")";
  815. Py_ssize_t length = Py_SIZE(self);
  816. /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
  817. size_t newsize = 14 + 4 * length;
  818. PyObject *v;
  819. if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
  820. PyErr_SetString(PyExc_OverflowError,
  821. "bytearray object is too large to make repr");
  822. return NULL;
  823. }
  824. v = PyUnicode_FromUnicode(NULL, newsize);
  825. if (v == NULL) {
  826. return NULL;
  827. }
  828. else {
  829. register Py_ssize_t i;
  830. register Py_UNICODE c;
  831. register Py_UNICODE *p;
  832. int quote;
  833. /* Figure out which quote to use; single is preferred */
  834. quote = '\'';
  835. {
  836. char *test, *start;
  837. start = PyByteArray_AS_STRING(self);
  838. for (test = start; test < start+length; ++test) {
  839. if (*test == '"') {
  840. quote = '\''; /* back to single */
  841. goto decided;
  842. }
  843. else if (*test == '\'')
  844. quote = '"';
  845. }
  846. decided:
  847. ;
  848. }
  849. p = PyUnicode_AS_UNICODE(v);
  850. while (*quote_prefix)
  851. *p++ = *quote_prefix++;
  852. *p++ = quote;
  853. for (i = 0; i < length; i++) {
  854. /* There's at least enough room for a hex escape
  855. and a closing quote. */
  856. assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
  857. c = self->ob_bytes[i];
  858. if (c == '\'' || c == '\\')
  859. *p++ = '\\', *p++ = c;
  860. else if (c == '\t')
  861. *p++ = '\\', *p++ = 't';
  862. else if (c == '\n')
  863. *p++ = '\\', *p++ = 'n';
  864. else if (c == '\r')
  865. *p++ = '\\', *p++ = 'r';
  866. else if (c == 0)
  867. *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
  868. else if (c < ' ' || c >= 0x7f) {
  869. *p++ = '\\';
  870. *p++ = 'x';
  871. *p++ = hexdigits[(c & 0xf0) >> 4];
  872. *p++ = hexdigits[c & 0xf];
  873. }
  874. else
  875. *p++ = c;
  876. }
  877. assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
  878. *p++ = quote;
  879. while (*quote_postfix) {
  880. *p++ = *quote_postfix++;
  881. }
  882. *p = '\0';
  883. if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
  884. Py_DECREF(v);
  885. return NULL;
  886. }
  887. return v;
  888. }
  889. }
  890. static PyObject *
  891. bytes_str(PyObject *op)
  892. {
  893. #if 0
  894. if (Py_BytesWarningFlag) {
  895. if (PyErr_WarnEx(PyExc_BytesWarning,
  896. "str() on a bytearray instance", 1))
  897. return NULL;
  898. }
  899. return bytes_repr((PyByteArrayObject*)op);
  900. #endif
  901. return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
  902. }
  903. static PyObject *
  904. bytes_richcompare(PyObject *self, PyObject *other, int op)
  905. {
  906. Py_ssize_t self_size, other_size;
  907. Py_buffer self_bytes, other_bytes;
  908. PyObject *res;
  909. Py_ssize_t minsize;
  910. int cmp;
  911. /* Bytes can be compared to anything that supports the (binary)
  912. buffer API. Except that a comparison with Unicode is always an
  913. error, even if the comparison is for equality. */
  914. if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
  915. PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
  916. if (Py_BytesWarningFlag && op == Py_EQ) {
  917. if (PyErr_WarnEx(PyExc_BytesWarning,
  918. "Comparsion between bytearray and string", 1))
  919. return NULL;
  920. }
  921. Py_INCREF(Py_NotImplemented);
  922. return Py_NotImplemented;
  923. }
  924. self_size = _getbuffer(self, &self_bytes);
  925. if (self_size < 0) {
  926. PyErr_Clear();
  927. Py_INCREF(Py_NotImplemented);
  928. return Py_NotImplemented;
  929. }
  930. other_size = _getbuffer(other, &other_bytes);
  931. if (other_size < 0) {
  932. PyErr_Clear();
  933. PyBuffer_Release(&self_bytes);
  934. Py_INCREF(Py_NotImplemented);
  935. return Py_NotImplemented;
  936. }
  937. if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
  938. /* Shortcut: if the lengths differ, the objects differ */
  939. cmp = (op == Py_NE);
  940. }
  941. else {
  942. minsize = self_size;
  943. if (other_size < minsize)
  944. minsize = other_size;
  945. cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
  946. /* In ISO C, memcmp() guarantees to use unsigned bytes! */
  947. if (cmp == 0) {
  948. if (self_size < other_size)
  949. cmp = -1;
  950. else if (self_size > other_size)
  951. cmp = 1;
  952. }
  953. switch (op) {
  954. case Py_LT: cmp = cmp < 0; break;
  955. case Py_LE: cmp = cmp <= 0; break;
  956. case Py_EQ: cmp = cmp == 0; break;
  957. case Py_NE: cmp = cmp != 0; break;
  958. case Py_GT: cmp = cmp > 0; break;
  959. case Py_GE: cmp = cmp >= 0; break;
  960. }
  961. }
  962. res = cmp ? Py_True : Py_False;
  963. PyBuffer_Release(&self_bytes);
  964. PyBuffer_Release(&other_bytes);
  965. Py_INCREF(res);
  966. return res;
  967. }
  968. static void
  969. bytes_dealloc(PyByteArrayObject *self)
  970. {
  971. if (self->ob_exports > 0) {
  972. PyErr_SetString(PyExc_SystemError,
  973. "deallocated bytearray object has exported buffers");
  974. PyErr_Print();
  975. }
  976. if (self->ob_bytes != 0) {
  977. PyMem_Free(self->ob_bytes);
  978. }
  979. Py_TYPE(self)->tp_free((PyObject *)self);
  980. }
  981. /* -------------------------------------------------------------------- */
  982. /* Methods */
  983. #define STRINGLIB_CHAR char
  984. #define STRINGLIB_CMP memcmp
  985. #define STRINGLIB_LEN PyByteArray_GET_SIZE
  986. #define STRINGLIB_STR PyByteArray_AS_STRING
  987. #define STRINGLIB_NEW PyByteArray_FromStringAndSize
  988. #define STRINGLIB_EMPTY nullbytes
  989. #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
  990. #define STRINGLIB_MUTABLE 1
  991. #define FROM_BYTEARRAY 1
  992. #include "stringlib/fastsearch.h"
  993. #include "stringlib/count.h"
  994. #include "stringlib/find.h"
  995. #include "stringlib/partition.h"
  996. #include "stringlib/ctype.h"
  997. #include "stringlib/transmogrify.h"
  998. /* The following Py_LOCAL_INLINE and Py_LOCAL functions
  999. were copied from the old char* style string object. */
  1000. Py_LOCAL_INLINE(void)
  1001. _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
  1002. {
  1003. if (*end > len)
  1004. *end = len;
  1005. else if (*end < 0)
  1006. *end += len;
  1007. if (*end < 0)
  1008. *end = 0;
  1009. if (*start < 0)
  1010. *start += len;
  1011. if (*start < 0)
  1012. *start = 0;
  1013. }
  1014. Py_LOCAL_INLINE(Py_ssize_t)
  1015. bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
  1016. {
  1017. PyObject *subobj;
  1018. Py_buffer subbuf;
  1019. Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
  1020. Py_ssize_t res;
  1021. if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
  1022. _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
  1023. return -2;
  1024. if (_getbuffer(subobj, &subbuf) < 0)
  1025. return -2;
  1026. if (dir > 0)
  1027. res = stringlib_find_slice(
  1028. PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
  1029. subbuf.buf, subbuf.len, start, end);
  1030. else
  1031. res = stringlib_rfind_slice(
  1032. PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
  1033. subbuf.buf, subbuf.len, start, end);
  1034. PyBuffer_Release(&subbuf);
  1035. return res;
  1036. }
  1037. PyDoc_STRVAR(find__doc__,
  1038. "B.find(sub [,start [,end]]) -> int\n\
  1039. \n\
  1040. Return the lowest index in B where subsection sub is found,\n\
  1041. such that sub is contained within s[start,end]. Optional\n\
  1042. arguments start and end are interpreted as in slice notation.\n\
  1043. \n\
  1044. Return -1 on failure.");
  1045. static PyObject *
  1046. bytes_find(PyByteArrayObject *self, PyObject *args)
  1047. {
  1048. Py_ssize_t result = bytes_find_internal(self, args, +1);
  1049. if (result == -2)
  1050. return NULL;
  1051. return PyInt_FromSsize_t(result);
  1052. }
  1053. PyDoc_STRVAR(count__doc__,
  1054. "B.count(sub [,start [,end]]) -> int\n\
  1055. \n\
  1056. Return the number of non-overlapping occurrences of subsection sub in\n\
  1057. bytes B[start:end]. Optional arguments start and end are interpreted\n\
  1058. as in slice notation.");
  1059. static PyObject *
  1060. bytes_count(PyByteArrayObject *self, PyObject *args)
  1061. {
  1062. PyObject *sub_obj;
  1063. const char *str = PyByteArray_AS_STRING(self);
  1064. Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
  1065. Py_buffer vsub;
  1066. PyObject *count_obj;
  1067. if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
  1068. _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
  1069. return NULL;
  1070. if (_getbuffer(sub_obj, &vsub) < 0)
  1071. return NULL;
  1072. _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
  1073. count_obj = PyInt_FromSsize_t(
  1074. stringlib_count(str + start, end - start, vsub.buf, vsub.len)
  1075. );
  1076. PyBuffer_Release(&vsub);
  1077. return count_obj;
  1078. }
  1079. PyDoc_STRVAR(index__doc__,
  1080. "B.index(sub [,start [,end]]) -> int\n\
  1081. \n\
  1082. Like B.find() but raise ValueError when the subsection is not found.");
  1083. static PyObject *
  1084. bytes_index(PyByteArrayObject *self, PyObject *args)
  1085. {
  1086. Py_ssize_t result = bytes_find_internal(self, args, +1);
  1087. if (result == -2)
  1088. return NULL;
  1089. if (result == -1) {
  1090. PyErr_SetString(PyExc_ValueError,
  1091. "subsection not found");
  1092. return NULL;
  1093. }
  1094. return PyInt_FromSsize_t(result);
  1095. }
  1096. PyDoc_STRVAR(rfind__doc__,
  1097. "B.rfind(sub [,start [,end]]) -> int\n\
  1098. \n\
  1099. Return the highest index in B where subsection sub is found,\n\
  1100. such that sub is contained within s[start,end]. Optional\n\
  1101. arguments start and end are interpreted as in slice notation.\n\
  1102. \n\
  1103. Return -1 on failure.");
  1104. static PyObject *
  1105. bytes_rfind(PyByteArrayObject *self, PyObject *args)
  1106. {
  1107. Py_ssize_t result = bytes_find_internal(self, args, -1);
  1108. if (result == -2)
  1109. return NULL;
  1110. return PyInt_FromSsize_t(result);
  1111. }
  1112. PyDoc_STRVAR(rindex__doc__,
  1113. "B.rindex(sub [,start [,end]]) -> int\n\
  1114. \n\
  1115. Like B.rfind() but raise ValueError when the subsection is not found.");
  1116. static PyObject *
  1117. bytes_rindex(PyByteArrayObject *self, PyObject *args)
  1118. {
  1119. Py_ssize_t result = bytes_find_internal(self, args, -1);
  1120. if (result == -2)
  1121. return NULL;
  1122. if (result == -1) {
  1123. PyErr_SetString(PyExc_ValueError,
  1124. "subsection not found");
  1125. return NULL;
  1126. }
  1127. return PyInt_FromSsize_t(result);
  1128. }
  1129. static int
  1130. bytes_contains(PyObject *self, PyObject *arg)
  1131. {
  1132. Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
  1133. if (ival == -1 && PyErr_Occurred()) {
  1134. Py_buffer varg;
  1135. int pos;
  1136. PyErr_Clear();
  1137. if (_getbuffer(arg, &varg) < 0)
  1138. return -1;
  1139. pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
  1140. varg.buf, varg.len, 0);
  1141. PyBuffer_Release(&varg);
  1142. return pos >= 0;
  1143. }
  1144. if (ival < 0 || ival >= 256) {
  1145. PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
  1146. return -1;
  1147. }
  1148. return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
  1149. }
  1150. /* Matches the end (direction >= 0) or start (direction < 0) of self
  1151. * against substr, using the start and end arguments. Returns
  1152. * -1 on error, 0 if not found and 1 if found.
  1153. */
  1154. Py_LOCAL(int)
  1155. _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
  1156. Py_ssize_t end, int direction)
  1157. {
  1158. Py_ssize_t len = PyByteArray_GET_SIZE(self);
  1159. const char* str;
  1160. Py_buffer vsubstr;
  1161. int rv = 0;
  1162. str = PyByteArray_AS_STRING(self);
  1163. if (_getbuffer(substr, &vsubstr) < 0)
  1164. return -1;
  1165. _adjust_indices(&start, &end, len);
  1166. if (direction < 0) {
  1167. /* startswith */
  1168. if (start+vsubstr.len > len) {
  1169. goto done;
  1170. }
  1171. } else {
  1172. /* endswith */
  1173. if (end-start < vsubstr.len || start > len) {
  1174. goto done;
  1175. }
  1176. if (end-vsubstr.len > start)
  1177. start = end - vsubstr.len;
  1178. }
  1179. if (end-start >= vsubstr.len)
  1180. rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
  1181. done:
  1182. PyBuffer_Release(&vsubstr);
  1183. return rv;
  1184. }
  1185. PyDoc_STRVAR(startswith__doc__,
  1186. "B.startswith(prefix [,start [,end]]) -> bool\n\
  1187. \n\
  1188. Return True if B starts with the specified prefix, False otherwise.\n\
  1189. With optional start, test B beginning at that position.\n\
  1190. With optional end, stop comparing B at that position.\n\
  1191. prefix can also be a tuple of strings to try.");
  1192. static PyObject *
  1193. bytes_startswith(PyByteArrayObject *self, PyObject *args)
  1194. {
  1195. Py_ssize_t start = 0;
  1196. Py_ssize_t end = PY_SSIZE_T_MAX;
  1197. PyObject *subobj;
  1198. int result;
  1199. if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
  1200. _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
  1201. return NULL;
  1202. if (PyTuple_Check(subobj)) {
  1203. Py_ssize_t i;
  1204. for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
  1205. result = _bytes_tailmatch(self,
  1206. PyTuple_GET_ITEM(subobj, i),
  1207. start, end, -1);
  1208. if (result == -1)
  1209. return NULL;
  1210. else if (result) {
  1211. Py_RETURN_TRUE;
  1212. }
  1213. }
  1214. Py_RETURN_FALSE;
  1215. }
  1216. result = _bytes_tailmatch(self, subobj, start, end, -1);
  1217. if (result == -1)
  1218. return NULL;
  1219. else
  1220. return PyBool_FromLong(result);
  1221. }
  1222. PyDoc_STRVAR(endswith__doc__,
  1223. "B.endswith(suffix [,start [,end]]) -> bool\n\
  1224. \n\
  1225. Return True if B ends with the specified suffix, False otherwise.\n\
  1226. With optional start, test B beginning at that position.\n\
  1227. With optional end, stop comparing B at that position.\n\
  1228. suffix can also be a tuple of strings to try.");
  1229. static PyObject *
  1230. bytes_endswith(PyByteArrayObject *self, PyObject *args)
  1231. {
  1232. Py_ssize_t start = 0;
  1233. Py_ssize_t end = PY_SSIZE_T_MAX;
  1234. PyObject *subobj;
  1235. int result;
  1236. if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
  1237. _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
  1238. return NULL;
  1239. if (PyTuple_Check(subobj)) {
  1240. Py_ssize_t i;
  1241. for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
  1242. result = _bytes_tailmatch(self,
  1243. PyTuple_GET_ITEM(subobj, i),
  1244. start, end, +1);
  1245. if (result == -1)
  1246. return NULL;
  1247. else if (result) {
  1248. Py_RETURN_TRUE;
  1249. }
  1250. }
  1251. Py_RETURN_FALSE;
  1252. }
  1253. result = _bytes_tailmatch(self, subobj, start, end, +1);
  1254. if (result == -1)
  1255. return NULL;
  1256. else
  1257. return PyBool_FromLong(result);
  1258. }
  1259. PyDoc_STRVAR(translate__doc__,
  1260. "B.translate(table[, deletechars]) -> bytearray\n\
  1261. \n\
  1262. Return a copy of B, where all characters occurring in the\n\
  1263. optional argument deletechars are removed, and the remaining\n\
  1264. characters have been mapped through the given translation\n\
  1265. table, which must be a bytes object of length 256.");
  1266. static PyObject *
  1267. bytes_translate(PyByteArrayObject *self, PyObject *args)
  1268. {
  1269. register char *input, *output;
  1270. register const char *table;
  1271. register Py_ssize_t i, c;
  1272. PyObject *input_obj = (PyObject*)self;
  1273. const char *output_start;
  1274. Py_ssize_t inlen;
  1275. PyObject *result;
  1276. int trans_table[256];
  1277. PyObject *tableobj, *delobj = NULL;
  1278. Py_buffer vtable, vdel;
  1279. if (!PyArg_UnpackTuple(args, "translate", 1, 2,
  1280. &tableobj, &delobj))
  1281. return NULL;
  1282. if (_getbuffer(tableobj, &vtable) < 0)
  1283. return NULL;
  1284. if (vtable.len != 256) {
  1285. PyErr_SetString(PyExc_ValueError,
  1286. "translation table must be 256 characters long");
  1287. PyBuffer_Release(&vtable);
  1288. return NULL;
  1289. }
  1290. if (delobj != NULL) {
  1291. if (_getbuffer(delobj, &vdel) < 0) {
  1292. PyBuffer_Release(&vtable);
  1293. return NULL;
  1294. }
  1295. }
  1296. else {
  1297. vdel.buf = NULL;
  1298. vdel.len = 0;
  1299. }
  1300. table = (const char *)vtable.buf;
  1301. inlen = PyByteArray_GET_SIZE(input_obj);
  1302. result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
  1303. if (result == NULL)
  1304. goto done;
  1305. output_start = output = PyByteArray_AsString(result);
  1306. input = PyByteArray_AS_STRING(input_obj);
  1307. if (vdel.len == 0) {
  1308. /* If no deletions are required, use faster code */
  1309. for (i = inlen; --i >= 0; ) {
  1310. c = Py_CHARMASK(*input++);
  1311. *output++ = table[c];
  1312. }
  1313. goto done;
  1314. }
  1315. for (i = 0; i < 256; i++)
  1316. trans_table[i] = Py_CHARMASK(table[i]);
  1317. for (i = 0; i < vdel.len; i++)
  1318. trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
  1319. for (i = inlen; --i >= 0; ) {
  1320. c = Py_CHARMASK(*input++);
  1321. if (trans_table[c] != -1)
  1322. if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
  1323. continue;
  1324. }
  1325. /* Fix the size of the resulting string */
  1326. if (inlen > 0)
  1327. PyByteArray_Resize(result, output - output_start);
  1328. done:
  1329. PyBuffer_Release(&vtable);
  1330. if (delobj != NULL)
  1331. PyBuffer_Release(&vdel);
  1332. return result;
  1333. }
  1334. #define FORWARD 1
  1335. #define REVERSE -1
  1336. /* find and count characters and substrings */
  1337. #define findchar(target, target_len, c) \
  1338. ((char *)memchr((const void *)(target), c, target_len))
  1339. /* Don't call if length < 2 */
  1340. #define Py_STRING_MATCH(target, offset, pattern, length) \
  1341. (target[offset] == pattern[0] && \
  1342. target[offset+length-1] == pattern[length-1] && \
  1343. !memcmp(target+offset+1, pattern+1, length-2) )
  1344. /* Bytes ops must return a string, create a copy */
  1345. Py_LOCAL(PyByteArrayObject *)
  1346. return_self(PyByteArrayObject *self)
  1347. {
  1348. return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
  1349. PyByteArray_AS_STRING(self),
  1350. PyByteArray_GET_SIZE(self));
  1351. }
  1352. Py_LOCAL_INLINE(Py_ssize_t)
  1353. countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
  1354. {
  1355. Py_ssize_t count=0;
  1356. const char *start=target;
  1357. const char *end=target+target_len;
  1358. while ( (start=findchar(start, end-start, c)) != NULL ) {
  1359. count++;
  1360. if (count >= maxcount)
  1361. break;
  1362. start += 1;
  1363. }
  1364. return count;
  1365. }
  1366. Py_LOCAL(Py_ssize_t)
  1367. findstring(const char *target, Py_ssize_t target_len,
  1368. const char *pattern, Py_ssize_t pattern_len,
  1369. Py_ssize_t start,
  1370. Py_ssize_t end,
  1371. int direction)
  1372. {
  1373. if (start < 0) {
  1374. start += target_len;
  1375. if (start < 0)
  1376. start = 0;
  1377. }
  1378. if (end > target_len) {
  1379. end = target_len;
  1380. } else if (end < 0) {
  1381. end += target_len;
  1382. if (end < 0)
  1383. end = 0;
  1384. }
  1385. /* zero-length substrings always match at the first attempt */
  1386. if (pattern_len == 0)
  1387. return (direction > 0) ? start : end;
  1388. end -= pattern_len;
  1389. if (direction < 0) {
  1390. for (; end >= start; end--)
  1391. if (Py_STRING_MATCH(target, end, pattern, pattern_len))
  1392. return end;
  1393. } else {
  1394. for (; start <= end; start++)
  1395. if (Py_STRING_MATCH(target, start, pattern, pattern_len))
  1396. return start;
  1397. }
  1398. return -1;
  1399. }
  1400. Py_LOCAL_INLINE(Py_ssize_t)
  1401. countstring(const char *target, Py_ssize_t target_len,
  1402. const char *pattern, Py_ssize_t pattern_len,
  1403. Py_ssize_t start,
  1404. Py_ssize_t end,
  1405. int direction, Py_ssize_t maxcount)
  1406. {
  1407. Py_ssize_t count=0;
  1408. if (start < 0) {
  1409. start += target_len;
  1410. if (start < 0)
  1411. start = 0;
  1412. }
  1413. if (end > target_len) {
  1414. end = target_len;
  1415. } else if (end < 0) {
  1416. end += target_len;
  1417. if (end < 0)
  1418. end = 0;
  1419. }
  1420. /* zero-length substrings match everywhere */
  1421. if (pattern_len == 0 || maxcount == 0) {
  1422. if (target_len+1 < maxcount)
  1423. return target_len+1;
  1424. return maxcount;
  1425. }
  1426. end -= pattern_len;
  1427. if (direction < 0) {
  1428. for (; (end >= start); end--)
  1429. if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
  1430. count++;
  1431. if (--maxcount <= 0) break;
  1432. end -= pattern_len-1;
  1433. }
  1434. } else {
  1435. for (; (start <= end); start++)
  1436. if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
  1437. count++;
  1438. if (--maxcount <= 0)
  1439. break;
  1440. start += pattern_len-1;
  1441. }
  1442. }
  1443. return count;
  1444. }
  1445. /* Algorithms for different cases of string replacement */
  1446. /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
  1447. Py_LOCAL(PyByteArrayObject *)
  1448. replace_interleave(PyByteArrayObject *self,
  1449. const char *to_s, Py_ssize_t to_len,
  1450. Py_ssize_t maxcount)
  1451. {
  1452. char *self_s, *result_s;
  1453. Py_ssize_t self_len, result_len;
  1454. Py_ssize_t count, i, product;
  1455. PyByteArrayObject *result;
  1456. self_len = PyByteArray_GET_SIZE(self);
  1457. /* 1 at the end plus 1 after every character */
  1458. count = self_len+1;
  1459. if (maxcount < count)
  1460. count = maxcount;
  1461. /* Check for overflow */
  1462. /* result_len = count * to_len + self_len; */
  1463. product = count * to_len;
  1464. if (product / to_len != count) {
  1465. PyErr_SetString(PyExc_OverflowError,
  1466. "replace string is too long");
  1467. return NULL;
  1468. }
  1469. result_len = product + self_len;
  1470. if (result_len < 0) {
  1471. PyErr_SetString(PyExc_OverflowError,
  1472. "replace string is too long");
  1473. return NULL;
  1474. }
  1475. if (! (result = (PyByteArrayObject *)
  1476. PyByteArray_FromStringAndSize(NULL, result_len)) )
  1477. return NULL;
  1478. self_s = PyByteArray_AS_STRING(self);
  1479. result_s = PyByteArray_AS_STRING(result);
  1480. /* TODO: special case single character, which doesn't need memcpy */
  1481. /* Lay the first one down (guaranteed this will occur) */
  1482. Py_MEMCPY(result_s, to_s, to_len);
  1483. result_s += to_len;
  1484. count -= 1;
  1485. for (i=0; i<count; i++) {
  1486. *result_s++ = *self_s++;
  1487. Py_MEMCPY(result_s, to_s, to_len);
  1488. result_s += to_len;
  1489. }
  1490. /* Copy the rest of the original string */
  1491. Py_MEMCPY(result_s, self_s, self_len-i);
  1492. return result;
  1493. }
  1494. /* Special case for deleting a single character */
  1495. /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
  1496. Py_LOCAL(PyByteArrayObject *)
  1497. replace_delete_single_character(PyByteArrayObject *self,
  1498. char from_c, Py_ssize_t maxcount)
  1499. {
  1500. char *self_s, *result_s;
  1501. char *start, *next, *end;
  1502. Py_ssize_t self_len, result_len;
  1503. Py_ssize_t count;
  1504. PyByteArrayObject *result;
  1505. self_len = PyByteArray_GET_SIZE(self);
  1506. self_s = PyByteArray_AS_STRING(self);
  1507. count = countchar(self_s, self_len, from_c, maxcount);
  1508. if (count == 0) {
  1509. return return_self(self);
  1510. }
  1511. result_len = self_len - count; /* from_len == 1 */
  1512. assert(result_len>=0);
  1513. if ( (result = (PyByteArrayObject *)
  1514. PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
  1515. return NULL;
  1516. result_s = PyByteArray_AS_STRING(result);
  1517. start = self_s;
  1518. end = self_s + self_len;
  1519. while (count-- > 0) {
  1520. next = findchar(start, end-start, from_c);
  1521. if (next == NULL)
  1522. break;
  1523. Py_MEMCPY(result_s, start, next-start);
  1524. result_s += (next-start);
  1525. start = next+1;
  1526. }
  1527. Py_MEMCPY(result_s, start, end-start);
  1528. return result;
  1529. }
  1530. /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
  1531. Py_LOCAL(PyByteArrayObject *)
  1532. replace_delete_substring(PyByteArrayObject *self,
  1533. const char *from_s, Py_ssize_t from_len,
  1534. Py_ssize_t maxcount)
  1535. {
  1536. char *self_s, *result_s;
  1537. char *start, *next, *end;
  1538. Py_ssize_t self_len, result_len;
  1539. Py_ssize_t count, offset;
  1540. PyByteArrayObject *result;
  1541. self_len = PyByteArray_GET_SIZE(self);
  1542. self_s = PyByteArray_AS_STRING(self);
  1543. count = countstring(self_s, self_len,
  1544. from_s, from_len,
  1545. 0, self_len, 1,
  1546. maxcount);
  1547. if (count == 0) {
  1548. /* no matches */
  1549. return return_self(self);
  1550. }
  1551. result_len = self_len - (count * from_len);
  1552. assert (result_len>=0);
  1553. if ( (result = (PyByteArrayObject *)
  1554. PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
  1555. return NULL;
  1556. result_s = PyByteArray_AS_STRING(result);
  1557. start = self_s;
  1558. end = self_s + self_len;
  1559. while (count-- > 0) {
  1560. offset = findstring(start, end-start,
  1561. from_s, from_len,
  1562. 0, end-start, FORWARD);
  1563. if (offset == -1)
  1564. break;
  1565. next = start + offset;
  1566. Py_MEMCPY(result_s, start, next-start);
  1567. r