PageRenderTime 49ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/service/lib/py/thrift/protocol/fastbinary.c

#
C | 1171 lines | 856 code | 223 blank | 92 comment | 178 complexity | 9301a1fc32150646407bd114fb0c6776 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. // Copyright (c) 2006- Facebook
  2. // Distributed under the Thrift Software License
  3. //
  4. // See accompanying file LICENSE or visit the Thrift site at:
  5. // http://developers.facebook.com/thrift/
  6. //
  7. // NOTE: This code was contributed by an external developer.
  8. // The internal Thrift team has reviewed and tested it,
  9. // but we cannot guarantee that it is production-ready.
  10. // Please feel free to report bugs and/or success stories
  11. // to the public mailing list.
  12. #include <Python.h>
  13. #include "cStringIO.h"
  14. #include <stdbool.h>
  15. #include <stdint.h>
  16. #include <netinet/in.h>
  17. // TODO(dreiss): defval appears to be unused. Look into removing it.
  18. // TODO(dreiss): Make parse_spec_args recursive, and cache the output
  19. // permanently in the object. (Malloc and orphan.)
  20. // TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
  21. // Can cStringIO let us work with a BufferedTransport?
  22. // TODO(dreiss): Don't ignore the rv from cwrite (maybe).
  23. /* ====== BEGIN UTILITIES ====== */
  24. #define INIT_OUTBUF_SIZE 128
  25. // Stolen out of TProtocol.h.
  26. // It would be a huge pain to have both get this from one place.
  27. typedef enum TType {
  28. T_STOP = 0,
  29. T_VOID = 1,
  30. T_BOOL = 2,
  31. T_BYTE = 3,
  32. T_I08 = 3,
  33. T_I16 = 6,
  34. T_I32 = 8,
  35. T_U64 = 9,
  36. T_I64 = 10,
  37. T_DOUBLE = 4,
  38. T_STRING = 11,
  39. T_UTF7 = 11,
  40. T_STRUCT = 12,
  41. T_MAP = 13,
  42. T_SET = 14,
  43. T_LIST = 15,
  44. T_UTF8 = 16,
  45. T_UTF16 = 17
  46. } TType;
  47. #ifndef __BYTE_ORDER
  48. # if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
  49. # define __BYTE_ORDER BYTE_ORDER
  50. # define __LITTLE_ENDIAN LITTLE_ENDIAN
  51. # define __BIG_ENDIAN BIG_ENDIAN
  52. # else
  53. # error "Cannot determine endianness"
  54. # endif
  55. #endif
  56. // Same comment as the enum. Sorry.
  57. #if __BYTE_ORDER == __BIG_ENDIAN
  58. # define ntohll(n) (n)
  59. # define htonll(n) (n)
  60. #elif __BYTE_ORDER == __LITTLE_ENDIAN
  61. # if defined(__GNUC__) && defined(__GLIBC__)
  62. # include <byteswap.h>
  63. # define ntohll(n) bswap_64(n)
  64. # define htonll(n) bswap_64(n)
  65. # else /* GNUC & GLIBC */
  66. # define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
  67. # define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
  68. # endif /* GNUC & GLIBC */
  69. #else /* __BYTE_ORDER */
  70. # error "Can't define htonll or ntohll!"
  71. #endif
  72. // Doing a benchmark shows that interning actually makes a difference, amazingly.
  73. #define INTERN_STRING(value) _intern_ ## value
  74. #define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
  75. #define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
  76. // Py_ssize_t was not defined before Python 2.5
  77. #if (PY_VERSION_HEX < 0x02050000)
  78. typedef int Py_ssize_t;
  79. #endif
  80. /**
  81. * A cache of the spec_args for a set or list,
  82. * so we don't have to keep calling PyTuple_GET_ITEM.
  83. */
  84. typedef struct {
  85. TType element_type;
  86. PyObject* typeargs;
  87. } SetListTypeArgs;
  88. /**
  89. * A cache of the spec_args for a map,
  90. * so we don't have to keep calling PyTuple_GET_ITEM.
  91. */
  92. typedef struct {
  93. TType ktag;
  94. TType vtag;
  95. PyObject* ktypeargs;
  96. PyObject* vtypeargs;
  97. } MapTypeArgs;
  98. /**
  99. * A cache of the spec_args for a struct,
  100. * so we don't have to keep calling PyTuple_GET_ITEM.
  101. */
  102. typedef struct {
  103. PyObject* klass;
  104. PyObject* spec;
  105. } StructTypeArgs;
  106. /**
  107. * A cache of the item spec from a struct specification,
  108. * so we don't have to keep calling PyTuple_GET_ITEM.
  109. */
  110. typedef struct {
  111. int tag;
  112. TType type;
  113. PyObject* attrname;
  114. PyObject* typeargs;
  115. PyObject* defval;
  116. } StructItemSpec;
  117. /**
  118. * A cache of the two key attributes of a CReadableTransport,
  119. * so we don't have to keep calling PyObject_GetAttr.
  120. */
  121. typedef struct {
  122. PyObject* stringiobuf;
  123. PyObject* refill_callable;
  124. } DecodeBuffer;
  125. /** Pointer to interned string to speed up attribute lookup. */
  126. static PyObject* INTERN_STRING(cstringio_buf);
  127. /** Pointer to interned string to speed up attribute lookup. */
  128. static PyObject* INTERN_STRING(cstringio_refill);
  129. static inline bool
  130. check_ssize_t_32(Py_ssize_t len) {
  131. // error from getting the int
  132. if (INT_CONV_ERROR_OCCURRED(len)) {
  133. return false;
  134. }
  135. if (!CHECK_RANGE(len, 0, INT32_MAX)) {
  136. PyErr_SetString(PyExc_OverflowError, "string size out of range");
  137. return false;
  138. }
  139. return true;
  140. }
  141. static inline bool
  142. parse_pyint(PyObject* o, int32_t* ret, int32_t min, int32_t max) {
  143. long val = PyInt_AsLong(o);
  144. if (INT_CONV_ERROR_OCCURRED(val)) {
  145. return false;
  146. }
  147. if (!CHECK_RANGE(val, min, max)) {
  148. PyErr_SetString(PyExc_OverflowError, "int out of range");
  149. return false;
  150. }
  151. *ret = (int32_t) val;
  152. return true;
  153. }
  154. /* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
  155. static bool
  156. parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
  157. if (PyTuple_Size(typeargs) != 2) {
  158. PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for list/set type args");
  159. return false;
  160. }
  161. dest->element_type = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
  162. if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
  163. return false;
  164. }
  165. dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
  166. return true;
  167. }
  168. static bool
  169. parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
  170. if (PyTuple_Size(typeargs) != 4) {
  171. PyErr_SetString(PyExc_TypeError, "expecting 4 arguments for typeargs to map");
  172. return false;
  173. }
  174. dest->ktag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
  175. if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
  176. return false;
  177. }
  178. dest->vtag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2));
  179. if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
  180. return false;
  181. }
  182. dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
  183. dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
  184. return true;
  185. }
  186. static bool
  187. parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
  188. if (PyTuple_Size(typeargs) != 2) {
  189. PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
  190. return false;
  191. }
  192. dest->klass = PyTuple_GET_ITEM(typeargs, 0);
  193. dest->spec = PyTuple_GET_ITEM(typeargs, 1);
  194. return true;
  195. }
  196. static int
  197. parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
  198. // i'd like to use ParseArgs here, but it seems to be a bottleneck.
  199. if (PyTuple_Size(spec_tuple) != 5) {
  200. PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for spec tuple");
  201. return false;
  202. }
  203. dest->tag = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0));
  204. if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
  205. return false;
  206. }
  207. dest->type = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1));
  208. if (INT_CONV_ERROR_OCCURRED(dest->type)) {
  209. return false;
  210. }
  211. dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
  212. dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
  213. dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
  214. return true;
  215. }
  216. /* ====== END UTILITIES ====== */
  217. /* ====== BEGIN WRITING FUNCTIONS ====== */
  218. /* --- LOW-LEVEL WRITING FUNCTIONS --- */
  219. static void writeByte(PyObject* outbuf, int8_t val) {
  220. int8_t net = val;
  221. PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int8_t));
  222. }
  223. static void writeI16(PyObject* outbuf, int16_t val) {
  224. int16_t net = (int16_t)htons(val);
  225. PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int16_t));
  226. }
  227. static void writeI32(PyObject* outbuf, int32_t val) {
  228. int32_t net = (int32_t)htonl(val);
  229. PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int32_t));
  230. }
  231. static void writeI64(PyObject* outbuf, int64_t val) {
  232. int64_t net = (int64_t)htonll(val);
  233. PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int64_t));
  234. }
  235. static void writeDouble(PyObject* outbuf, double dub) {
  236. // Unfortunately, bitwise_cast doesn't work in C. Bad C!
  237. union {
  238. double f;
  239. int64_t t;
  240. } transfer;
  241. transfer.f = dub;
  242. writeI64(outbuf, transfer.t);
  243. }
  244. /* --- MAIN RECURSIVE OUTPUT FUCNTION -- */
  245. static int
  246. output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
  247. /*
  248. * Refcounting Strategy:
  249. *
  250. * We assume that elements of the thrift_spec tuple are not going to be
  251. * mutated, so we don't ref count those at all. Other than that, we try to
  252. * keep a reference to all the user-created objects while we work with them.
  253. * output_val assumes that a reference is already held. The *caller* is
  254. * responsible for handling references
  255. */
  256. switch (type) {
  257. case T_BOOL: {
  258. int v = PyObject_IsTrue(value);
  259. if (v == -1) {
  260. return false;
  261. }
  262. writeByte(output, (int8_t) v);
  263. break;
  264. }
  265. case T_I08: {
  266. int32_t val;
  267. if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
  268. return false;
  269. }
  270. writeByte(output, (int8_t) val);
  271. break;
  272. }
  273. case T_I16: {
  274. int32_t val;
  275. if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
  276. return false;
  277. }
  278. writeI16(output, (int16_t) val);
  279. break;
  280. }
  281. case T_I32: {
  282. int32_t val;
  283. if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
  284. return false;
  285. }
  286. writeI32(output, val);
  287. break;
  288. }
  289. case T_I64: {
  290. int64_t nval = PyLong_AsLongLong(value);
  291. if (INT_CONV_ERROR_OCCURRED(nval)) {
  292. return false;
  293. }
  294. if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
  295. PyErr_SetString(PyExc_OverflowError, "int out of range");
  296. return false;
  297. }
  298. writeI64(output, nval);
  299. break;
  300. }
  301. case T_DOUBLE: {
  302. double nval = PyFloat_AsDouble(value);
  303. if (nval == -1.0 && PyErr_Occurred()) {
  304. return false;
  305. }
  306. writeDouble(output, nval);
  307. break;
  308. }
  309. case T_STRING: {
  310. Py_ssize_t len = PyString_Size(value);
  311. if (!check_ssize_t_32(len)) {
  312. return false;
  313. }
  314. writeI32(output, (int32_t) len);
  315. PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
  316. break;
  317. }
  318. case T_LIST:
  319. case T_SET: {
  320. Py_ssize_t len;
  321. SetListTypeArgs parsedargs;
  322. PyObject *item;
  323. PyObject *iterator;
  324. if (!parse_set_list_args(&parsedargs, typeargs)) {
  325. return false;
  326. }
  327. len = PyObject_Length(value);
  328. if (!check_ssize_t_32(len)) {
  329. return false;
  330. }
  331. writeByte(output, parsedargs.element_type);
  332. writeI32(output, (int32_t) len);
  333. iterator = PyObject_GetIter(value);
  334. if (iterator == NULL) {
  335. return false;
  336. }
  337. while ((item = PyIter_Next(iterator))) {
  338. if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
  339. Py_DECREF(item);
  340. Py_DECREF(iterator);
  341. return false;
  342. }
  343. Py_DECREF(item);
  344. }
  345. Py_DECREF(iterator);
  346. if (PyErr_Occurred()) {
  347. return false;
  348. }
  349. break;
  350. }
  351. case T_MAP: {
  352. PyObject *k, *v;
  353. Py_ssize_t pos = 0;
  354. Py_ssize_t len;
  355. MapTypeArgs parsedargs;
  356. len = PyDict_Size(value);
  357. if (!check_ssize_t_32(len)) {
  358. return false;
  359. }
  360. if (!parse_map_args(&parsedargs, typeargs)) {
  361. return false;
  362. }
  363. writeByte(output, parsedargs.ktag);
  364. writeByte(output, parsedargs.vtag);
  365. writeI32(output, len);
  366. // TODO(bmaurer): should support any mapping, not just dicts
  367. while (PyDict_Next(value, &pos, &k, &v)) {
  368. // TODO(dreiss): Think hard about whether these INCREFs actually
  369. // turn any unsafe scenarios into safe scenarios.
  370. Py_INCREF(k);
  371. Py_INCREF(v);
  372. if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
  373. || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
  374. Py_DECREF(k);
  375. Py_DECREF(v);
  376. return false;
  377. }
  378. }
  379. break;
  380. }
  381. // TODO(dreiss): Consider breaking this out as a function
  382. // the way we did for decode_struct.
  383. case T_STRUCT: {
  384. StructTypeArgs parsedargs;
  385. Py_ssize_t nspec;
  386. Py_ssize_t i;
  387. if (!parse_struct_args(&parsedargs, typeargs)) {
  388. return false;
  389. }
  390. nspec = PyTuple_Size(parsedargs.spec);
  391. if (nspec == -1) {
  392. return false;
  393. }
  394. for (i = 0; i < nspec; i++) {
  395. StructItemSpec parsedspec;
  396. PyObject* spec_tuple;
  397. PyObject* instval = NULL;
  398. spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
  399. if (spec_tuple == Py_None) {
  400. continue;
  401. }
  402. if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
  403. return false;
  404. }
  405. instval = PyObject_GetAttr(value, parsedspec.attrname);
  406. if (!instval) {
  407. return false;
  408. }
  409. if (instval == Py_None) {
  410. Py_DECREF(instval);
  411. continue;
  412. }
  413. writeByte(output, (int8_t) parsedspec.type);
  414. writeI16(output, parsedspec.tag);
  415. if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
  416. Py_DECREF(instval);
  417. return false;
  418. }
  419. Py_DECREF(instval);
  420. }
  421. writeByte(output, (int8_t)T_STOP);
  422. break;
  423. }
  424. case T_STOP:
  425. case T_VOID:
  426. case T_UTF16:
  427. case T_UTF8:
  428. case T_U64:
  429. default:
  430. PyErr_SetString(PyExc_TypeError, "Unexpected TType");
  431. return false;
  432. }
  433. return true;
  434. }
  435. /* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
  436. static PyObject *
  437. encode_binary(PyObject *self, PyObject *args) {
  438. PyObject* enc_obj;
  439. PyObject* type_args;
  440. PyObject* buf;
  441. PyObject* ret = NULL;
  442. if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
  443. return NULL;
  444. }
  445. buf = PycStringIO->NewOutput(INIT_OUTBUF_SIZE);
  446. if (output_val(buf, enc_obj, T_STRUCT, type_args)) {
  447. ret = PycStringIO->cgetvalue(buf);
  448. }
  449. Py_DECREF(buf);
  450. return ret;
  451. }
  452. /* ====== END WRITING FUNCTIONS ====== */
  453. /* ====== BEGIN READING FUNCTIONS ====== */
  454. /* --- LOW-LEVEL READING FUNCTIONS --- */
  455. static void
  456. free_decodebuf(DecodeBuffer* d) {
  457. Py_XDECREF(d->stringiobuf);
  458. Py_XDECREF(d->refill_callable);
  459. }
  460. static bool
  461. decode_buffer_from_obj(DecodeBuffer* dest, PyObject* obj) {
  462. dest->stringiobuf = PyObject_GetAttr(obj, INTERN_STRING(cstringio_buf));
  463. if (!dest->stringiobuf) {
  464. return false;
  465. }
  466. if (!PycStringIO_InputCheck(dest->stringiobuf)) {
  467. free_decodebuf(dest);
  468. PyErr_SetString(PyExc_TypeError, "expecting stringio input");
  469. return false;
  470. }
  471. dest->refill_callable = PyObject_GetAttr(obj, INTERN_STRING(cstringio_refill));
  472. if(!dest->refill_callable) {
  473. free_decodebuf(dest);
  474. return false;
  475. }
  476. if (!PyCallable_Check(dest->refill_callable)) {
  477. free_decodebuf(dest);
  478. PyErr_SetString(PyExc_TypeError, "expecting callable");
  479. return false;
  480. }
  481. return true;
  482. }
  483. static bool readBytes(DecodeBuffer* input, char** output, int len) {
  484. int read;
  485. // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
  486. // the partial read instead of forcing the transport
  487. // to prepend it to its buffer.
  488. read = PycStringIO->cread(input->stringiobuf, output, len);
  489. if (read == len) {
  490. return true;
  491. } else if (read == -1) {
  492. return false;
  493. } else {
  494. PyObject* newiobuf;
  495. // using building functions as this is a rare codepath
  496. newiobuf = PyObject_CallFunction(
  497. input->refill_callable, "s#i", *output, read, len, NULL);
  498. if (newiobuf == NULL) {
  499. return false;
  500. }
  501. // must do this *AFTER* the call so that we don't deref the io buffer
  502. Py_CLEAR(input->stringiobuf);
  503. input->stringiobuf = newiobuf;
  504. read = PycStringIO->cread(input->stringiobuf, output, len);
  505. if (read == len) {
  506. return true;
  507. } else if (read == -1) {
  508. return false;
  509. } else {
  510. // TODO(dreiss): This could be a valid code path for big binary blobs.
  511. PyErr_SetString(PyExc_TypeError,
  512. "refill claimed to have refilled the buffer, but didn't!!");
  513. return false;
  514. }
  515. }
  516. }
  517. static int8_t readByte(DecodeBuffer* input) {
  518. char* buf;
  519. if (!readBytes(input, &buf, sizeof(int8_t))) {
  520. return -1;
  521. }
  522. return *(int8_t*) buf;
  523. }
  524. static int16_t readI16(DecodeBuffer* input) {
  525. char* buf;
  526. if (!readBytes(input, &buf, sizeof(int16_t))) {
  527. return -1;
  528. }
  529. return (int16_t) ntohs(*(int16_t*) buf);
  530. }
  531. static int32_t readI32(DecodeBuffer* input) {
  532. char* buf;
  533. if (!readBytes(input, &buf, sizeof(int32_t))) {
  534. return -1;
  535. }
  536. return (int32_t) ntohl(*(int32_t*) buf);
  537. }
  538. static int64_t readI64(DecodeBuffer* input) {
  539. char* buf;
  540. if (!readBytes(input, &buf, sizeof(int64_t))) {
  541. return -1;
  542. }
  543. return (int64_t) ntohll(*(int64_t*) buf);
  544. }
  545. static double readDouble(DecodeBuffer* input) {
  546. union {
  547. int64_t f;
  548. double t;
  549. } transfer;
  550. transfer.f = readI64(input);
  551. if (transfer.f == -1) {
  552. return -1;
  553. }
  554. return transfer.t;
  555. }
  556. static bool
  557. checkTypeByte(DecodeBuffer* input, TType expected) {
  558. TType got = readByte(input);
  559. if (INT_CONV_ERROR_OCCURRED(got)) {
  560. return false;
  561. }
  562. if (expected != got) {
  563. PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
  564. return false;
  565. }
  566. return true;
  567. }
  568. static bool
  569. skip(DecodeBuffer* input, TType type) {
  570. #define SKIPBYTES(n) \
  571. do { \
  572. if (!readBytes(input, &dummy_buf, (n))) { \
  573. return false; \
  574. } \
  575. } while(0)
  576. char* dummy_buf;
  577. switch (type) {
  578. case T_BOOL:
  579. case T_I08: SKIPBYTES(1); break;
  580. case T_I16: SKIPBYTES(2); break;
  581. case T_I32: SKIPBYTES(4); break;
  582. case T_I64:
  583. case T_DOUBLE: SKIPBYTES(8); break;
  584. case T_STRING: {
  585. // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
  586. int len = readI32(input);
  587. if (!check_ssize_t_32(len)) {
  588. return false;
  589. }
  590. SKIPBYTES(len);
  591. break;
  592. }
  593. case T_LIST:
  594. case T_SET: {
  595. TType etype;
  596. int len, i;
  597. etype = readByte(input);
  598. if (etype == -1) {
  599. return false;
  600. }
  601. len = readI32(input);
  602. if (!check_ssize_t_32(len)) {
  603. return false;
  604. }
  605. for (i = 0; i < len; i++) {
  606. if (!skip(input, etype)) {
  607. return false;
  608. }
  609. }
  610. break;
  611. }
  612. case T_MAP: {
  613. TType ktype, vtype;
  614. int len, i;
  615. ktype = readByte(input);
  616. if (ktype == -1) {
  617. return false;
  618. }
  619. vtype = readByte(input);
  620. if (vtype == -1) {
  621. return false;
  622. }
  623. len = readI32(input);
  624. if (!check_ssize_t_32(len)) {
  625. return false;
  626. }
  627. for (i = 0; i < len; i++) {
  628. if (!(skip(input, ktype) && skip(input, vtype))) {
  629. return false;
  630. }
  631. }
  632. break;
  633. }
  634. case T_STRUCT: {
  635. while (true) {
  636. TType type;
  637. type = readByte(input);
  638. if (type == -1) {
  639. return false;
  640. }
  641. if (type == T_STOP)
  642. break;
  643. SKIPBYTES(2); // tag
  644. if (!skip(input, type)) {
  645. return false;
  646. }
  647. }
  648. break;
  649. }
  650. case T_STOP:
  651. case T_VOID:
  652. case T_UTF16:
  653. case T_UTF8:
  654. case T_U64:
  655. default:
  656. PyErr_SetString(PyExc_TypeError, "Unexpected TType");
  657. return false;
  658. }
  659. return true;
  660. #undef SKIPBYTES
  661. }
  662. /* --- HELPER FUNCTION FOR DECODE_VAL --- */
  663. static PyObject*
  664. decode_val(DecodeBuffer* input, TType type, PyObject* typeargs);
  665. static bool
  666. decode_struct(DecodeBuffer* input, PyObject* output, PyObject* spec_seq) {
  667. int spec_seq_len = PyTuple_Size(spec_seq);
  668. if (spec_seq_len == -1) {
  669. return false;
  670. }
  671. while (true) {
  672. TType type;
  673. int16_t tag;
  674. PyObject* item_spec;
  675. PyObject* fieldval = NULL;
  676. StructItemSpec parsedspec;
  677. type = readByte(input);
  678. if (type == -1) {
  679. return false;
  680. }
  681. if (type == T_STOP) {
  682. break;
  683. }
  684. tag = readI16(input);
  685. if (INT_CONV_ERROR_OCCURRED(tag)) {
  686. return false;
  687. }
  688. if (tag >= 0 && tag < spec_seq_len) {
  689. item_spec = PyTuple_GET_ITEM(spec_seq, tag);
  690. } else {
  691. item_spec = Py_None;
  692. }
  693. if (item_spec == Py_None) {
  694. if (!skip(input, type)) {
  695. return false;
  696. } else {
  697. continue;
  698. }
  699. }
  700. if (!parse_struct_item_spec(&parsedspec, item_spec)) {
  701. return false;
  702. }
  703. if (parsedspec.type != type) {
  704. PyErr_SetString(PyExc_TypeError, "struct field had wrong type while reading");
  705. return false;
  706. }
  707. fieldval = decode_val(input, parsedspec.type, parsedspec.typeargs);
  708. if (fieldval == NULL) {
  709. return false;
  710. }
  711. if (PyObject_SetAttr(output, parsedspec.attrname, fieldval) == -1) {
  712. Py_DECREF(fieldval);
  713. return false;
  714. }
  715. Py_DECREF(fieldval);
  716. }
  717. return true;
  718. }
  719. /* --- MAIN RECURSIVE INPUT FUCNTION --- */
  720. // Returns a new reference.
  721. static PyObject*
  722. decode_val(DecodeBuffer* input, TType type, PyObject* typeargs) {
  723. switch (type) {
  724. case T_BOOL: {
  725. int8_t v = readByte(input);
  726. if (INT_CONV_ERROR_OCCURRED(v)) {
  727. return NULL;
  728. }
  729. switch (v) {
  730. case 0: Py_RETURN_FALSE;
  731. case 1: Py_RETURN_TRUE;
  732. // Don't laugh. This is a potentially serious issue.
  733. default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL;
  734. }
  735. break;
  736. }
  737. case T_I08: {
  738. int8_t v = readByte(input);
  739. if (INT_CONV_ERROR_OCCURRED(v)) {
  740. return NULL;
  741. }
  742. return PyInt_FromLong(v);
  743. }
  744. case T_I16: {
  745. int16_t v = readI16(input);
  746. if (INT_CONV_ERROR_OCCURRED(v)) {
  747. return NULL;
  748. }
  749. return PyInt_FromLong(v);
  750. }
  751. case T_I32: {
  752. int32_t v = readI32(input);
  753. if (INT_CONV_ERROR_OCCURRED(v)) {
  754. return NULL;
  755. }
  756. return PyInt_FromLong(v);
  757. }
  758. case T_I64: {
  759. int64_t v = readI64(input);
  760. if (INT_CONV_ERROR_OCCURRED(v)) {
  761. return NULL;
  762. }
  763. // TODO(dreiss): Find out if we can take this fastpath always when
  764. // sizeof(long) == sizeof(long long).
  765. if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
  766. return PyInt_FromLong((long) v);
  767. }
  768. return PyLong_FromLongLong(v);
  769. }
  770. case T_DOUBLE: {
  771. double v = readDouble(input);
  772. if (v == -1.0 && PyErr_Occurred()) {
  773. return false;
  774. }
  775. return PyFloat_FromDouble(v);
  776. }
  777. case T_STRING: {
  778. Py_ssize_t len = readI32(input);
  779. char* buf;
  780. if (!readBytes(input, &buf, len)) {
  781. return NULL;
  782. }
  783. return PyString_FromStringAndSize(buf, len);
  784. }
  785. case T_LIST:
  786. case T_SET: {
  787. SetListTypeArgs parsedargs;
  788. int32_t len;
  789. PyObject* ret = NULL;
  790. int i;
  791. if (!parse_set_list_args(&parsedargs, typeargs)) {
  792. return NULL;
  793. }
  794. if (!checkTypeByte(input, parsedargs.element_type)) {
  795. return NULL;
  796. }
  797. len = readI32(input);
  798. if (!check_ssize_t_32(len)) {
  799. return NULL;
  800. }
  801. ret = PyList_New(len);
  802. if (!ret) {
  803. return NULL;
  804. }
  805. for (i = 0; i < len; i++) {
  806. PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs);
  807. if (!item) {
  808. Py_DECREF(ret);
  809. return NULL;
  810. }
  811. PyList_SET_ITEM(ret, i, item);
  812. }
  813. // TODO(dreiss): Consider biting the bullet and making two separate cases
  814. // for list and set, avoiding this post facto conversion.
  815. if (type == T_SET) {
  816. PyObject* setret;
  817. #if (PY_VERSION_HEX < 0x02050000)
  818. // hack needed for older versions
  819. setret = PyObject_CallFunctionObjArgs((PyObject*)&PySet_Type, ret, NULL);
  820. #else
  821. // official version
  822. setret = PySet_New(ret);
  823. #endif
  824. Py_DECREF(ret);
  825. return setret;
  826. }
  827. return ret;
  828. }
  829. case T_MAP: {
  830. int32_t len;
  831. int i;
  832. MapTypeArgs parsedargs;
  833. PyObject* ret = NULL;
  834. if (!parse_map_args(&parsedargs, typeargs)) {
  835. return NULL;
  836. }
  837. if (!checkTypeByte(input, parsedargs.ktag)) {
  838. return NULL;
  839. }
  840. if (!checkTypeByte(input, parsedargs.vtag)) {
  841. return NULL;
  842. }
  843. len = readI32(input);
  844. if (!check_ssize_t_32(len)) {
  845. return false;
  846. }
  847. ret = PyDict_New();
  848. if (!ret) {
  849. goto error;
  850. }
  851. for (i = 0; i < len; i++) {
  852. PyObject* k = NULL;
  853. PyObject* v = NULL;
  854. k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs);
  855. if (k == NULL) {
  856. goto loop_error;
  857. }
  858. v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs);
  859. if (v == NULL) {
  860. goto loop_error;
  861. }
  862. if (PyDict_SetItem(ret, k, v) == -1) {
  863. goto loop_error;
  864. }
  865. Py_DECREF(k);
  866. Py_DECREF(v);
  867. continue;
  868. // Yuck! Destructors, anyone?
  869. loop_error:
  870. Py_XDECREF(k);
  871. Py_XDECREF(v);
  872. goto error;
  873. }
  874. return ret;
  875. error:
  876. Py_XDECREF(ret);
  877. return NULL;
  878. }
  879. case T_STRUCT: {
  880. StructTypeArgs parsedargs;
  881. if (!parse_struct_args(&parsedargs, typeargs)) {
  882. return NULL;
  883. }
  884. PyObject* ret = PyObject_CallObject(parsedargs.klass, NULL);
  885. if (!ret) {
  886. return NULL;
  887. }
  888. if (!decode_struct(input, ret, parsedargs.spec)) {
  889. Py_DECREF(ret);
  890. return NULL;
  891. }
  892. return ret;
  893. }
  894. case T_STOP:
  895. case T_VOID:
  896. case T_UTF16:
  897. case T_UTF8:
  898. case T_U64:
  899. default:
  900. PyErr_SetString(PyExc_TypeError, "Unexpected TType");
  901. return NULL;
  902. }
  903. }
  904. /* --- TOP-LEVEL WRAPPER FOR INPUT -- */
  905. static PyObject*
  906. decode_binary(PyObject *self, PyObject *args) {
  907. PyObject* output_obj = NULL;
  908. PyObject* transport = NULL;
  909. PyObject* typeargs = NULL;
  910. StructTypeArgs parsedargs;
  911. DecodeBuffer input = {};
  912. if (!PyArg_ParseTuple(args, "OOO", &output_obj, &transport, &typeargs)) {
  913. return NULL;
  914. }
  915. if (!parse_struct_args(&parsedargs, typeargs)) {
  916. return NULL;
  917. }
  918. if (!decode_buffer_from_obj(&input, transport)) {
  919. return NULL;
  920. }
  921. if (!decode_struct(&input, output_obj, parsedargs.spec)) {
  922. free_decodebuf(&input);
  923. return NULL;
  924. }
  925. free_decodebuf(&input);
  926. Py_RETURN_NONE;
  927. }
  928. /* ====== END READING FUNCTIONS ====== */
  929. /* -- PYTHON MODULE SETUP STUFF --- */
  930. static PyMethodDef ThriftFastBinaryMethods[] = {
  931. {"encode_binary", encode_binary, METH_VARARGS, ""},
  932. {"decode_binary", decode_binary, METH_VARARGS, ""},
  933. {NULL, NULL, 0, NULL} /* Sentinel */
  934. };
  935. PyMODINIT_FUNC
  936. initfastbinary(void) {
  937. #define INIT_INTERN_STRING(value) \
  938. do { \
  939. INTERN_STRING(value) = PyString_InternFromString(#value); \
  940. if(!INTERN_STRING(value)) return; \
  941. } while(0)
  942. INIT_INTERN_STRING(cstringio_buf);
  943. INIT_INTERN_STRING(cstringio_refill);
  944. #undef INIT_INTERN_STRING
  945. PycString_IMPORT;
  946. if (PycStringIO == NULL) return;
  947. (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
  948. }