PageRenderTime 35ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/mercurial/parsers.c

https://bitbucket.org/mirror/mercurial/
C | 2173 lines | 1731 code | 283 blank | 159 comment | 464 complexity | a099d4aa4889684238c366ed7a6cc6eb MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. parsers.c - efficient content parsing
  3. Copyright 2008 Matt Mackall <mpm@selenic.com> and others
  4. This software may be used and distributed according to the terms of
  5. the GNU General Public License, incorporated herein by reference.
  6. */
  7. #include <Python.h>
  8. #include <ctype.h>
  9. #include <stddef.h>
  10. #include <string.h>
  11. #include "util.h"
  12. static char *versionerrortext = "Python minor version mismatch";
  13. static int8_t hextable[256] = {
  14. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  15. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  16. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  17. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
  18. -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
  19. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  20. -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
  21. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  22. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  23. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  24. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  25. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  26. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  27. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  28. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  29. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
  30. };
  31. static inline int hexdigit(const char *p, Py_ssize_t off)
  32. {
  33. int8_t val = hextable[(unsigned char)p[off]];
  34. if (val >= 0) {
  35. return val;
  36. }
  37. PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
  38. return 0;
  39. }
  40. /*
  41. * Turn a hex-encoded string into binary.
  42. */
  43. static PyObject *unhexlify(const char *str, int len)
  44. {
  45. PyObject *ret;
  46. char *d;
  47. int i;
  48. ret = PyBytes_FromStringAndSize(NULL, len / 2);
  49. if (!ret)
  50. return NULL;
  51. d = PyBytes_AsString(ret);
  52. for (i = 0; i < len;) {
  53. int hi = hexdigit(str, i++);
  54. int lo = hexdigit(str, i++);
  55. *d++ = (hi << 4) | lo;
  56. }
  57. return ret;
  58. }
  59. /*
  60. * This code assumes that a manifest is stitched together with newline
  61. * ('\n') characters.
  62. */
  63. static PyObject *parse_manifest(PyObject *self, PyObject *args)
  64. {
  65. PyObject *mfdict, *fdict;
  66. char *str, *start, *end;
  67. int len;
  68. if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
  69. &PyDict_Type, &mfdict,
  70. &PyDict_Type, &fdict,
  71. &str, &len))
  72. goto quit;
  73. start = str;
  74. end = str + len;
  75. while (start < end) {
  76. PyObject *file = NULL, *node = NULL;
  77. PyObject *flags = NULL;
  78. char *zero = NULL, *newline = NULL;
  79. ptrdiff_t nlen;
  80. zero = memchr(start, '\0', end - start);
  81. if (!zero) {
  82. PyErr_SetString(PyExc_ValueError,
  83. "manifest entry has no separator");
  84. goto quit;
  85. }
  86. newline = memchr(zero + 1, '\n', end - (zero + 1));
  87. if (!newline) {
  88. PyErr_SetString(PyExc_ValueError,
  89. "manifest contains trailing garbage");
  90. goto quit;
  91. }
  92. file = PyBytes_FromStringAndSize(start, zero - start);
  93. if (!file)
  94. goto bail;
  95. nlen = newline - zero - 1;
  96. node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
  97. if (!node)
  98. goto bail;
  99. if (nlen > 40) {
  100. flags = PyBytes_FromStringAndSize(zero + 41,
  101. nlen - 40);
  102. if (!flags)
  103. goto bail;
  104. if (PyDict_SetItem(fdict, file, flags) == -1)
  105. goto bail;
  106. }
  107. if (PyDict_SetItem(mfdict, file, node) == -1)
  108. goto bail;
  109. start = newline + 1;
  110. Py_XDECREF(flags);
  111. Py_XDECREF(node);
  112. Py_XDECREF(file);
  113. continue;
  114. bail:
  115. Py_XDECREF(flags);
  116. Py_XDECREF(node);
  117. Py_XDECREF(file);
  118. goto quit;
  119. }
  120. Py_INCREF(Py_None);
  121. return Py_None;
  122. quit:
  123. return NULL;
  124. }
  125. static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
  126. int size, int mtime)
  127. {
  128. dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
  129. &dirstateTupleType);
  130. if (!t)
  131. return NULL;
  132. t->state = state;
  133. t->mode = mode;
  134. t->size = size;
  135. t->mtime = mtime;
  136. return t;
  137. }
  138. static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
  139. PyObject *kwds)
  140. {
  141. /* We do all the initialization here and not a tp_init function because
  142. * dirstate_tuple is immutable. */
  143. dirstateTupleObject *t;
  144. char state;
  145. int size, mode, mtime;
  146. if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
  147. return NULL;
  148. t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
  149. if (!t)
  150. return NULL;
  151. t->state = state;
  152. t->mode = mode;
  153. t->size = size;
  154. t->mtime = mtime;
  155. return (PyObject *)t;
  156. }
  157. static void dirstate_tuple_dealloc(PyObject *o)
  158. {
  159. PyObject_Del(o);
  160. }
  161. static Py_ssize_t dirstate_tuple_length(PyObject *o)
  162. {
  163. return 4;
  164. }
  165. static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
  166. {
  167. dirstateTupleObject *t = (dirstateTupleObject *)o;
  168. switch (i) {
  169. case 0:
  170. return PyBytes_FromStringAndSize(&t->state, 1);
  171. case 1:
  172. return PyInt_FromLong(t->mode);
  173. case 2:
  174. return PyInt_FromLong(t->size);
  175. case 3:
  176. return PyInt_FromLong(t->mtime);
  177. default:
  178. PyErr_SetString(PyExc_IndexError, "index out of range");
  179. return NULL;
  180. }
  181. }
  182. static PySequenceMethods dirstate_tuple_sq = {
  183. dirstate_tuple_length, /* sq_length */
  184. 0, /* sq_concat */
  185. 0, /* sq_repeat */
  186. dirstate_tuple_item, /* sq_item */
  187. 0, /* sq_ass_item */
  188. 0, /* sq_contains */
  189. 0, /* sq_inplace_concat */
  190. 0 /* sq_inplace_repeat */
  191. };
  192. PyTypeObject dirstateTupleType = {
  193. PyVarObject_HEAD_INIT(NULL, 0)
  194. "dirstate_tuple", /* tp_name */
  195. sizeof(dirstateTupleObject),/* tp_basicsize */
  196. 0, /* tp_itemsize */
  197. (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
  198. 0, /* tp_print */
  199. 0, /* tp_getattr */
  200. 0, /* tp_setattr */
  201. 0, /* tp_compare */
  202. 0, /* tp_repr */
  203. 0, /* tp_as_number */
  204. &dirstate_tuple_sq, /* tp_as_sequence */
  205. 0, /* tp_as_mapping */
  206. 0, /* tp_hash */
  207. 0, /* tp_call */
  208. 0, /* tp_str */
  209. 0, /* tp_getattro */
  210. 0, /* tp_setattro */
  211. 0, /* tp_as_buffer */
  212. Py_TPFLAGS_DEFAULT, /* tp_flags */
  213. "dirstate tuple", /* tp_doc */
  214. 0, /* tp_traverse */
  215. 0, /* tp_clear */
  216. 0, /* tp_richcompare */
  217. 0, /* tp_weaklistoffset */
  218. 0, /* tp_iter */
  219. 0, /* tp_iternext */
  220. 0, /* tp_methods */
  221. 0, /* tp_members */
  222. 0, /* tp_getset */
  223. 0, /* tp_base */
  224. 0, /* tp_dict */
  225. 0, /* tp_descr_get */
  226. 0, /* tp_descr_set */
  227. 0, /* tp_dictoffset */
  228. 0, /* tp_init */
  229. 0, /* tp_alloc */
  230. dirstate_tuple_new, /* tp_new */
  231. };
  232. static PyObject *parse_dirstate(PyObject *self, PyObject *args)
  233. {
  234. PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
  235. PyObject *fname = NULL, *cname = NULL, *entry = NULL;
  236. char state, *cur, *str, *cpos;
  237. int mode, size, mtime;
  238. unsigned int flen;
  239. int len, pos = 40;
  240. if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
  241. &PyDict_Type, &dmap,
  242. &PyDict_Type, &cmap,
  243. &str, &len))
  244. goto quit;
  245. /* read parents */
  246. if (len < 40)
  247. goto quit;
  248. parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
  249. if (!parents)
  250. goto quit;
  251. /* read filenames */
  252. while (pos >= 40 && pos < len) {
  253. cur = str + pos;
  254. /* unpack header */
  255. state = *cur;
  256. mode = getbe32(cur + 1);
  257. size = getbe32(cur + 5);
  258. mtime = getbe32(cur + 9);
  259. flen = getbe32(cur + 13);
  260. pos += 17;
  261. cur += 17;
  262. if (flen > len - pos) {
  263. PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
  264. goto quit;
  265. }
  266. entry = (PyObject *)make_dirstate_tuple(state, mode, size,
  267. mtime);
  268. cpos = memchr(cur, 0, flen);
  269. if (cpos) {
  270. fname = PyBytes_FromStringAndSize(cur, cpos - cur);
  271. cname = PyBytes_FromStringAndSize(cpos + 1,
  272. flen - (cpos - cur) - 1);
  273. if (!fname || !cname ||
  274. PyDict_SetItem(cmap, fname, cname) == -1 ||
  275. PyDict_SetItem(dmap, fname, entry) == -1)
  276. goto quit;
  277. Py_DECREF(cname);
  278. } else {
  279. fname = PyBytes_FromStringAndSize(cur, flen);
  280. if (!fname ||
  281. PyDict_SetItem(dmap, fname, entry) == -1)
  282. goto quit;
  283. }
  284. Py_DECREF(fname);
  285. Py_DECREF(entry);
  286. fname = cname = entry = NULL;
  287. pos += flen;
  288. }
  289. ret = parents;
  290. Py_INCREF(ret);
  291. quit:
  292. Py_XDECREF(fname);
  293. Py_XDECREF(cname);
  294. Py_XDECREF(entry);
  295. Py_XDECREF(parents);
  296. return ret;
  297. }
  298. /*
  299. * Efficiently pack a dirstate object into its on-disk format.
  300. */
  301. static PyObject *pack_dirstate(PyObject *self, PyObject *args)
  302. {
  303. PyObject *packobj = NULL;
  304. PyObject *map, *copymap, *pl, *mtime_unset = NULL;
  305. Py_ssize_t nbytes, pos, l;
  306. PyObject *k, *v, *pn;
  307. char *p, *s;
  308. double now;
  309. if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
  310. &PyDict_Type, &map, &PyDict_Type, &copymap,
  311. &pl, &now))
  312. return NULL;
  313. if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
  314. PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
  315. return NULL;
  316. }
  317. /* Figure out how much we need to allocate. */
  318. for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
  319. PyObject *c;
  320. if (!PyString_Check(k)) {
  321. PyErr_SetString(PyExc_TypeError, "expected string key");
  322. goto bail;
  323. }
  324. nbytes += PyString_GET_SIZE(k) + 17;
  325. c = PyDict_GetItem(copymap, k);
  326. if (c) {
  327. if (!PyString_Check(c)) {
  328. PyErr_SetString(PyExc_TypeError,
  329. "expected string key");
  330. goto bail;
  331. }
  332. nbytes += PyString_GET_SIZE(c) + 1;
  333. }
  334. }
  335. packobj = PyString_FromStringAndSize(NULL, nbytes);
  336. if (packobj == NULL)
  337. goto bail;
  338. p = PyString_AS_STRING(packobj);
  339. pn = PySequence_ITEM(pl, 0);
  340. if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
  341. PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
  342. goto bail;
  343. }
  344. memcpy(p, s, l);
  345. p += 20;
  346. pn = PySequence_ITEM(pl, 1);
  347. if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
  348. PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
  349. goto bail;
  350. }
  351. memcpy(p, s, l);
  352. p += 20;
  353. for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
  354. dirstateTupleObject *tuple;
  355. char state;
  356. uint32_t mode, size, mtime;
  357. Py_ssize_t len, l;
  358. PyObject *o;
  359. char *t;
  360. if (!dirstate_tuple_check(v)) {
  361. PyErr_SetString(PyExc_TypeError,
  362. "expected a dirstate tuple");
  363. goto bail;
  364. }
  365. tuple = (dirstateTupleObject *)v;
  366. state = tuple->state;
  367. mode = tuple->mode;
  368. size = tuple->size;
  369. mtime = tuple->mtime;
  370. if (state == 'n' && mtime == (uint32_t)now) {
  371. /* See pure/parsers.py:pack_dirstate for why we do
  372. * this. */
  373. mtime = -1;
  374. mtime_unset = (PyObject *)make_dirstate_tuple(
  375. state, mode, size, mtime);
  376. if (!mtime_unset)
  377. goto bail;
  378. if (PyDict_SetItem(map, k, mtime_unset) == -1)
  379. goto bail;
  380. Py_DECREF(mtime_unset);
  381. mtime_unset = NULL;
  382. }
  383. *p++ = state;
  384. putbe32(mode, p);
  385. putbe32(size, p + 4);
  386. putbe32(mtime, p + 8);
  387. t = p + 12;
  388. p += 16;
  389. len = PyString_GET_SIZE(k);
  390. memcpy(p, PyString_AS_STRING(k), len);
  391. p += len;
  392. o = PyDict_GetItem(copymap, k);
  393. if (o) {
  394. *p++ = '\0';
  395. l = PyString_GET_SIZE(o);
  396. memcpy(p, PyString_AS_STRING(o), l);
  397. p += l;
  398. len += l + 1;
  399. }
  400. putbe32((uint32_t)len, t);
  401. }
  402. pos = p - PyString_AS_STRING(packobj);
  403. if (pos != nbytes) {
  404. PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
  405. (long)pos, (long)nbytes);
  406. goto bail;
  407. }
  408. return packobj;
  409. bail:
  410. Py_XDECREF(mtime_unset);
  411. Py_XDECREF(packobj);
  412. return NULL;
  413. }
  414. /*
  415. * A base-16 trie for fast node->rev mapping.
  416. *
  417. * Positive value is index of the next node in the trie
  418. * Negative value is a leaf: -(rev + 1)
  419. * Zero is empty
  420. */
  421. typedef struct {
  422. int children[16];
  423. } nodetree;
  424. /*
  425. * This class has two behaviours.
  426. *
  427. * When used in a list-like way (with integer keys), we decode an
  428. * entry in a RevlogNG index file on demand. Our last entry is a
  429. * sentinel, always a nullid. We have limited support for
  430. * integer-keyed insert and delete, only at elements right before the
  431. * sentinel.
  432. *
  433. * With string keys, we lazily perform a reverse mapping from node to
  434. * rev, using a base-16 trie.
  435. */
  436. typedef struct {
  437. PyObject_HEAD
  438. /* Type-specific fields go here. */
  439. PyObject *data; /* raw bytes of index */
  440. PyObject **cache; /* cached tuples */
  441. const char **offsets; /* populated on demand */
  442. Py_ssize_t raw_length; /* original number of elements */
  443. Py_ssize_t length; /* current number of elements */
  444. PyObject *added; /* populated on demand */
  445. PyObject *headrevs; /* cache, invalidated on changes */
  446. nodetree *nt; /* base-16 trie */
  447. int ntlength; /* # nodes in use */
  448. int ntcapacity; /* # nodes allocated */
  449. int ntdepth; /* maximum depth of tree */
  450. int ntsplits; /* # splits performed */
  451. int ntrev; /* last rev scanned */
  452. int ntlookups; /* # lookups */
  453. int ntmisses; /* # lookups that miss the cache */
  454. int inlined;
  455. } indexObject;
  456. static Py_ssize_t index_length(const indexObject *self)
  457. {
  458. if (self->added == NULL)
  459. return self->length;
  460. return self->length + PyList_GET_SIZE(self->added);
  461. }
  462. static PyObject *nullentry;
  463. static const char nullid[20];
  464. static long inline_scan(indexObject *self, const char **offsets);
  465. #if LONG_MAX == 0x7fffffffL
  466. static char *tuple_format = "Kiiiiiis#";
  467. #else
  468. static char *tuple_format = "kiiiiiis#";
  469. #endif
  470. /* A RevlogNG v1 index entry is 64 bytes long. */
  471. static const long v1_hdrsize = 64;
  472. /*
  473. * Return a pointer to the beginning of a RevlogNG record.
  474. */
  475. static const char *index_deref(indexObject *self, Py_ssize_t pos)
  476. {
  477. if (self->inlined && pos > 0) {
  478. if (self->offsets == NULL) {
  479. self->offsets = malloc(self->raw_length *
  480. sizeof(*self->offsets));
  481. if (self->offsets == NULL)
  482. return (const char *)PyErr_NoMemory();
  483. inline_scan(self, self->offsets);
  484. }
  485. return self->offsets[pos];
  486. }
  487. return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
  488. }
  489. /*
  490. * RevlogNG format (all in big endian, data may be inlined):
  491. * 6 bytes: offset
  492. * 2 bytes: flags
  493. * 4 bytes: compressed length
  494. * 4 bytes: uncompressed length
  495. * 4 bytes: base revision
  496. * 4 bytes: link revision
  497. * 4 bytes: parent 1 revision
  498. * 4 bytes: parent 2 revision
  499. * 32 bytes: nodeid (only 20 bytes used)
  500. */
  501. static PyObject *index_get(indexObject *self, Py_ssize_t pos)
  502. {
  503. uint64_t offset_flags;
  504. int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
  505. const char *c_node_id;
  506. const char *data;
  507. Py_ssize_t length = index_length(self);
  508. PyObject *entry;
  509. if (pos < 0)
  510. pos += length;
  511. if (pos < 0 || pos >= length) {
  512. PyErr_SetString(PyExc_IndexError, "revlog index out of range");
  513. return NULL;
  514. }
  515. if (pos == length - 1) {
  516. Py_INCREF(nullentry);
  517. return nullentry;
  518. }
  519. if (pos >= self->length - 1) {
  520. PyObject *obj;
  521. obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
  522. Py_INCREF(obj);
  523. return obj;
  524. }
  525. if (self->cache) {
  526. if (self->cache[pos]) {
  527. Py_INCREF(self->cache[pos]);
  528. return self->cache[pos];
  529. }
  530. } else {
  531. self->cache = calloc(self->raw_length, sizeof(PyObject *));
  532. if (self->cache == NULL)
  533. return PyErr_NoMemory();
  534. }
  535. data = index_deref(self, pos);
  536. if (data == NULL)
  537. return NULL;
  538. offset_flags = getbe32(data + 4);
  539. if (pos == 0) /* mask out version number for the first entry */
  540. offset_flags &= 0xFFFF;
  541. else {
  542. uint32_t offset_high = getbe32(data);
  543. offset_flags |= ((uint64_t)offset_high) << 32;
  544. }
  545. comp_len = getbe32(data + 8);
  546. uncomp_len = getbe32(data + 12);
  547. base_rev = getbe32(data + 16);
  548. link_rev = getbe32(data + 20);
  549. parent_1 = getbe32(data + 24);
  550. parent_2 = getbe32(data + 28);
  551. c_node_id = data + 32;
  552. entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
  553. uncomp_len, base_rev, link_rev,
  554. parent_1, parent_2, c_node_id, 20);
  555. if (entry) {
  556. PyObject_GC_UnTrack(entry);
  557. Py_INCREF(entry);
  558. }
  559. self->cache[pos] = entry;
  560. return entry;
  561. }
  562. /*
  563. * Return the 20-byte SHA of the node corresponding to the given rev.
  564. */
  565. static const char *index_node(indexObject *self, Py_ssize_t pos)
  566. {
  567. Py_ssize_t length = index_length(self);
  568. const char *data;
  569. if (pos == length - 1 || pos == INT_MAX)
  570. return nullid;
  571. if (pos >= length)
  572. return NULL;
  573. if (pos >= self->length - 1) {
  574. PyObject *tuple, *str;
  575. tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
  576. str = PyTuple_GetItem(tuple, 7);
  577. return str ? PyString_AS_STRING(str) : NULL;
  578. }
  579. data = index_deref(self, pos);
  580. return data ? data + 32 : NULL;
  581. }
  582. static int nt_insert(indexObject *self, const char *node, int rev);
  583. static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
  584. {
  585. if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
  586. return -1;
  587. if (*nodelen == 20)
  588. return 0;
  589. PyErr_SetString(PyExc_ValueError, "20-byte hash required");
  590. return -1;
  591. }
  592. static PyObject *index_insert(indexObject *self, PyObject *args)
  593. {
  594. PyObject *obj;
  595. char *node;
  596. long offset;
  597. Py_ssize_t len, nodelen;
  598. if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
  599. return NULL;
  600. if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
  601. PyErr_SetString(PyExc_TypeError, "8-tuple required");
  602. return NULL;
  603. }
  604. if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
  605. return NULL;
  606. len = index_length(self);
  607. if (offset < 0)
  608. offset += len;
  609. if (offset != len - 1) {
  610. PyErr_SetString(PyExc_IndexError,
  611. "insert only supported at index -1");
  612. return NULL;
  613. }
  614. if (offset > INT_MAX) {
  615. PyErr_SetString(PyExc_ValueError,
  616. "currently only 2**31 revs supported");
  617. return NULL;
  618. }
  619. if (self->added == NULL) {
  620. self->added = PyList_New(0);
  621. if (self->added == NULL)
  622. return NULL;
  623. }
  624. if (PyList_Append(self->added, obj) == -1)
  625. return NULL;
  626. if (self->nt)
  627. nt_insert(self, node, (int)offset);
  628. Py_CLEAR(self->headrevs);
  629. Py_RETURN_NONE;
  630. }
  631. static void _index_clearcaches(indexObject *self)
  632. {
  633. if (self->cache) {
  634. Py_ssize_t i;
  635. for (i = 0; i < self->raw_length; i++)
  636. Py_CLEAR(self->cache[i]);
  637. free(self->cache);
  638. self->cache = NULL;
  639. }
  640. if (self->offsets) {
  641. free(self->offsets);
  642. self->offsets = NULL;
  643. }
  644. if (self->nt) {
  645. free(self->nt);
  646. self->nt = NULL;
  647. }
  648. Py_CLEAR(self->headrevs);
  649. }
  650. static PyObject *index_clearcaches(indexObject *self)
  651. {
  652. _index_clearcaches(self);
  653. self->ntlength = self->ntcapacity = 0;
  654. self->ntdepth = self->ntsplits = 0;
  655. self->ntrev = -1;
  656. self->ntlookups = self->ntmisses = 0;
  657. Py_RETURN_NONE;
  658. }
  659. static PyObject *index_stats(indexObject *self)
  660. {
  661. PyObject *obj = PyDict_New();
  662. if (obj == NULL)
  663. return NULL;
  664. #define istat(__n, __d) \
  665. if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
  666. goto bail;
  667. if (self->added) {
  668. Py_ssize_t len = PyList_GET_SIZE(self->added);
  669. if (PyDict_SetItemString(obj, "index entries added",
  670. PyInt_FromSsize_t(len)) == -1)
  671. goto bail;
  672. }
  673. if (self->raw_length != self->length - 1)
  674. istat(raw_length, "revs on disk");
  675. istat(length, "revs in memory");
  676. istat(ntcapacity, "node trie capacity");
  677. istat(ntdepth, "node trie depth");
  678. istat(ntlength, "node trie count");
  679. istat(ntlookups, "node trie lookups");
  680. istat(ntmisses, "node trie misses");
  681. istat(ntrev, "node trie last rev scanned");
  682. istat(ntsplits, "node trie splits");
  683. #undef istat
  684. return obj;
  685. bail:
  686. Py_XDECREF(obj);
  687. return NULL;
  688. }
  689. /*
  690. * When we cache a list, we want to be sure the caller can't mutate
  691. * the cached copy.
  692. */
  693. static PyObject *list_copy(PyObject *list)
  694. {
  695. Py_ssize_t len = PyList_GET_SIZE(list);
  696. PyObject *newlist = PyList_New(len);
  697. Py_ssize_t i;
  698. if (newlist == NULL)
  699. return NULL;
  700. for (i = 0; i < len; i++) {
  701. PyObject *obj = PyList_GET_ITEM(list, i);
  702. Py_INCREF(obj);
  703. PyList_SET_ITEM(newlist, i, obj);
  704. }
  705. return newlist;
  706. }
  707. static PyObject *index_headrevs(indexObject *self)
  708. {
  709. Py_ssize_t i, len, addlen;
  710. char *nothead = NULL;
  711. PyObject *heads;
  712. if (self->headrevs)
  713. return list_copy(self->headrevs);
  714. len = index_length(self) - 1;
  715. heads = PyList_New(0);
  716. if (heads == NULL)
  717. goto bail;
  718. if (len == 0) {
  719. PyObject *nullid = PyInt_FromLong(-1);
  720. if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
  721. Py_XDECREF(nullid);
  722. goto bail;
  723. }
  724. goto done;
  725. }
  726. nothead = calloc(len, 1);
  727. if (nothead == NULL)
  728. goto bail;
  729. for (i = 0; i < self->raw_length; i++) {
  730. const char *data = index_deref(self, i);
  731. int parent_1 = getbe32(data + 24);
  732. int parent_2 = getbe32(data + 28);
  733. if (parent_1 >= 0)
  734. nothead[parent_1] = 1;
  735. if (parent_2 >= 0)
  736. nothead[parent_2] = 1;
  737. }
  738. addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
  739. for (i = 0; i < addlen; i++) {
  740. PyObject *rev = PyList_GET_ITEM(self->added, i);
  741. PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
  742. PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
  743. long parent_1, parent_2;
  744. if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
  745. PyErr_SetString(PyExc_TypeError,
  746. "revlog parents are invalid");
  747. goto bail;
  748. }
  749. parent_1 = PyInt_AS_LONG(p1);
  750. parent_2 = PyInt_AS_LONG(p2);
  751. if (parent_1 >= 0)
  752. nothead[parent_1] = 1;
  753. if (parent_2 >= 0)
  754. nothead[parent_2] = 1;
  755. }
  756. for (i = 0; i < len; i++) {
  757. PyObject *head;
  758. if (nothead[i])
  759. continue;
  760. head = PyInt_FromLong(i);
  761. if (head == NULL || PyList_Append(heads, head) == -1) {
  762. Py_XDECREF(head);
  763. goto bail;
  764. }
  765. }
  766. done:
  767. self->headrevs = heads;
  768. free(nothead);
  769. return list_copy(self->headrevs);
  770. bail:
  771. Py_XDECREF(heads);
  772. free(nothead);
  773. return NULL;
  774. }
  775. static inline int nt_level(const char *node, Py_ssize_t level)
  776. {
  777. int v = node[level>>1];
  778. if (!(level & 1))
  779. v >>= 4;
  780. return v & 0xf;
  781. }
  782. /*
  783. * Return values:
  784. *
  785. * -4: match is ambiguous (multiple candidates)
  786. * -2: not found
  787. * rest: valid rev
  788. */
  789. static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
  790. int hex)
  791. {
  792. int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
  793. int level, maxlevel, off;
  794. if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
  795. return -1;
  796. if (self->nt == NULL)
  797. return -2;
  798. if (hex)
  799. maxlevel = nodelen > 40 ? 40 : (int)nodelen;
  800. else
  801. maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
  802. for (level = off = 0; level < maxlevel; level++) {
  803. int k = getnybble(node, level);
  804. nodetree *n = &self->nt[off];
  805. int v = n->children[k];
  806. if (v < 0) {
  807. const char *n;
  808. Py_ssize_t i;
  809. v = -v - 1;
  810. n = index_node(self, v);
  811. if (n == NULL)
  812. return -2;
  813. for (i = level; i < maxlevel; i++)
  814. if (getnybble(node, i) != nt_level(n, i))
  815. return -2;
  816. return v;
  817. }
  818. if (v == 0)
  819. return -2;
  820. off = v;
  821. }
  822. /* multiple matches against an ambiguous prefix */
  823. return -4;
  824. }
  825. static int nt_new(indexObject *self)
  826. {
  827. if (self->ntlength == self->ntcapacity) {
  828. self->ntcapacity *= 2;
  829. self->nt = realloc(self->nt,
  830. self->ntcapacity * sizeof(nodetree));
  831. if (self->nt == NULL) {
  832. PyErr_SetString(PyExc_MemoryError, "out of memory");
  833. return -1;
  834. }
  835. memset(&self->nt[self->ntlength], 0,
  836. sizeof(nodetree) * (self->ntcapacity - self->ntlength));
  837. }
  838. return self->ntlength++;
  839. }
  840. static int nt_insert(indexObject *self, const char *node, int rev)
  841. {
  842. int level = 0;
  843. int off = 0;
  844. while (level < 40) {
  845. int k = nt_level(node, level);
  846. nodetree *n;
  847. int v;
  848. n = &self->nt[off];
  849. v = n->children[k];
  850. if (v == 0) {
  851. n->children[k] = -rev - 1;
  852. return 0;
  853. }
  854. if (v < 0) {
  855. const char *oldnode = index_node(self, -v - 1);
  856. int noff;
  857. if (!oldnode || !memcmp(oldnode, node, 20)) {
  858. n->children[k] = -rev - 1;
  859. return 0;
  860. }
  861. noff = nt_new(self);
  862. if (noff == -1)
  863. return -1;
  864. /* self->nt may have been changed by realloc */
  865. self->nt[off].children[k] = noff;
  866. off = noff;
  867. n = &self->nt[off];
  868. n->children[nt_level(oldnode, ++level)] = v;
  869. if (level > self->ntdepth)
  870. self->ntdepth = level;
  871. self->ntsplits += 1;
  872. } else {
  873. level += 1;
  874. off = v;
  875. }
  876. }
  877. return -1;
  878. }
  879. static int nt_init(indexObject *self)
  880. {
  881. if (self->nt == NULL) {
  882. if (self->raw_length > INT_MAX) {
  883. PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
  884. return -1;
  885. }
  886. self->ntcapacity = self->raw_length < 4
  887. ? 4 : (int)self->raw_length / 2;
  888. self->nt = calloc(self->ntcapacity, sizeof(nodetree));
  889. if (self->nt == NULL) {
  890. PyErr_NoMemory();
  891. return -1;
  892. }
  893. self->ntlength = 1;
  894. self->ntrev = (int)index_length(self) - 1;
  895. self->ntlookups = 1;
  896. self->ntmisses = 0;
  897. if (nt_insert(self, nullid, INT_MAX) == -1)
  898. return -1;
  899. }
  900. return 0;
  901. }
  902. /*
  903. * Return values:
  904. *
  905. * -3: error (exception set)
  906. * -2: not found (no exception set)
  907. * rest: valid rev
  908. */
  909. static int index_find_node(indexObject *self,
  910. const char *node, Py_ssize_t nodelen)
  911. {
  912. int rev;
  913. self->ntlookups++;
  914. rev = nt_find(self, node, nodelen, 0);
  915. if (rev >= -1)
  916. return rev;
  917. if (nt_init(self) == -1)
  918. return -3;
  919. /*
  920. * For the first handful of lookups, we scan the entire index,
  921. * and cache only the matching nodes. This optimizes for cases
  922. * like "hg tip", where only a few nodes are accessed.
  923. *
  924. * After that, we cache every node we visit, using a single
  925. * scan amortized over multiple lookups. This gives the best
  926. * bulk performance, e.g. for "hg log".
  927. */
  928. if (self->ntmisses++ < 4) {
  929. for (rev = self->ntrev - 1; rev >= 0; rev--) {
  930. const char *n = index_node(self, rev);
  931. if (n == NULL)
  932. return -2;
  933. if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
  934. if (nt_insert(self, n, rev) == -1)
  935. return -3;
  936. break;
  937. }
  938. }
  939. } else {
  940. for (rev = self->ntrev - 1; rev >= 0; rev--) {
  941. const char *n = index_node(self, rev);
  942. if (n == NULL) {
  943. self->ntrev = rev + 1;
  944. return -2;
  945. }
  946. if (nt_insert(self, n, rev) == -1) {
  947. self->ntrev = rev + 1;
  948. return -3;
  949. }
  950. if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
  951. break;
  952. }
  953. }
  954. self->ntrev = rev;
  955. }
  956. if (rev >= 0)
  957. return rev;
  958. return -2;
  959. }
  960. static PyObject *raise_revlog_error(void)
  961. {
  962. static PyObject *errclass;
  963. PyObject *mod = NULL, *errobj;
  964. if (errclass == NULL) {
  965. PyObject *dict;
  966. mod = PyImport_ImportModule("mercurial.error");
  967. if (mod == NULL)
  968. goto classfail;
  969. dict = PyModule_GetDict(mod);
  970. if (dict == NULL)
  971. goto classfail;
  972. errclass = PyDict_GetItemString(dict, "RevlogError");
  973. if (errclass == NULL) {
  974. PyErr_SetString(PyExc_SystemError,
  975. "could not find RevlogError");
  976. goto classfail;
  977. }
  978. Py_INCREF(errclass);
  979. }
  980. errobj = PyObject_CallFunction(errclass, NULL);
  981. if (errobj == NULL)
  982. return NULL;
  983. PyErr_SetObject(errclass, errobj);
  984. return errobj;
  985. classfail:
  986. Py_XDECREF(mod);
  987. return NULL;
  988. }
  989. static PyObject *index_getitem(indexObject *self, PyObject *value)
  990. {
  991. char *node;
  992. Py_ssize_t nodelen;
  993. int rev;
  994. if (PyInt_Check(value))
  995. return index_get(self, PyInt_AS_LONG(value));
  996. if (node_check(value, &node, &nodelen) == -1)
  997. return NULL;
  998. rev = index_find_node(self, node, nodelen);
  999. if (rev >= -1)
  1000. return PyInt_FromLong(rev);
  1001. if (rev == -2)
  1002. raise_revlog_error();
  1003. return NULL;
  1004. }
  1005. static int nt_partialmatch(indexObject *self, const char *node,
  1006. Py_ssize_t nodelen)
  1007. {
  1008. int rev;
  1009. if (nt_init(self) == -1)
  1010. return -3;
  1011. if (self->ntrev > 0) {
  1012. /* ensure that the radix tree is fully populated */
  1013. for (rev = self->ntrev - 1; rev >= 0; rev--) {
  1014. const char *n = index_node(self, rev);
  1015. if (n == NULL)
  1016. return -2;
  1017. if (nt_insert(self, n, rev) == -1)
  1018. return -3;
  1019. }
  1020. self->ntrev = rev;
  1021. }
  1022. return nt_find(self, node, nodelen, 1);
  1023. }
  1024. static PyObject *index_partialmatch(indexObject *self, PyObject *args)
  1025. {
  1026. const char *fullnode;
  1027. int nodelen;
  1028. char *node;
  1029. int rev, i;
  1030. if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
  1031. return NULL;
  1032. if (nodelen < 4) {
  1033. PyErr_SetString(PyExc_ValueError, "key too short");
  1034. return NULL;
  1035. }
  1036. if (nodelen > 40) {
  1037. PyErr_SetString(PyExc_ValueError, "key too long");
  1038. return NULL;
  1039. }
  1040. for (i = 0; i < nodelen; i++)
  1041. hexdigit(node, i);
  1042. if (PyErr_Occurred()) {
  1043. /* input contains non-hex characters */
  1044. PyErr_Clear();
  1045. Py_RETURN_NONE;
  1046. }
  1047. rev = nt_partialmatch(self, node, nodelen);
  1048. switch (rev) {
  1049. case -4:
  1050. raise_revlog_error();
  1051. case -3:
  1052. return NULL;
  1053. case -2:
  1054. Py_RETURN_NONE;
  1055. case -1:
  1056. return PyString_FromStringAndSize(nullid, 20);
  1057. }
  1058. fullnode = index_node(self, rev);
  1059. if (fullnode == NULL) {
  1060. PyErr_Format(PyExc_IndexError,
  1061. "could not access rev %d", rev);
  1062. return NULL;
  1063. }
  1064. return PyString_FromStringAndSize(fullnode, 20);
  1065. }
  1066. static PyObject *index_m_get(indexObject *self, PyObject *args)
  1067. {
  1068. Py_ssize_t nodelen;
  1069. PyObject *val;
  1070. char *node;
  1071. int rev;
  1072. if (!PyArg_ParseTuple(args, "O", &val))
  1073. return NULL;
  1074. if (node_check(val, &node, &nodelen) == -1)
  1075. return NULL;
  1076. rev = index_find_node(self, node, nodelen);
  1077. if (rev == -3)
  1078. return NULL;
  1079. if (rev == -2)
  1080. Py_RETURN_NONE;
  1081. return PyInt_FromLong(rev);
  1082. }
  1083. static int index_contains(indexObject *self, PyObject *value)
  1084. {
  1085. char *node;
  1086. Py_ssize_t nodelen;
  1087. if (PyInt_Check(value)) {
  1088. long rev = PyInt_AS_LONG(value);
  1089. return rev >= -1 && rev < index_length(self);
  1090. }
  1091. if (node_check(value, &node, &nodelen) == -1)
  1092. return -1;
  1093. switch (index_find_node(self, node, nodelen)) {
  1094. case -3:
  1095. return -1;
  1096. case -2:
  1097. return 0;
  1098. default:
  1099. return 1;
  1100. }
  1101. }
  1102. static inline void index_get_parents(indexObject *self, int rev, int *ps)
  1103. {
  1104. if (rev >= self->length - 1) {
  1105. PyObject *tuple = PyList_GET_ITEM(self->added,
  1106. rev - self->length + 1);
  1107. ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
  1108. ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
  1109. } else {
  1110. const char *data = index_deref(self, rev);
  1111. ps[0] = getbe32(data + 24);
  1112. ps[1] = getbe32(data + 28);
  1113. }
  1114. }
  1115. typedef uint64_t bitmask;
  1116. /*
  1117. * Given a disjoint set of revs, return all candidates for the
  1118. * greatest common ancestor. In revset notation, this is the set
  1119. * "heads(::a and ::b and ...)"
  1120. */
  1121. static PyObject *find_gca_candidates(indexObject *self, const int *revs,
  1122. int revcount)
  1123. {
  1124. const bitmask allseen = (1ull << revcount) - 1;
  1125. const bitmask poison = 1ull << revcount;
  1126. PyObject *gca = PyList_New(0);
  1127. int i, v, interesting;
  1128. int maxrev = -1;
  1129. long sp;
  1130. bitmask *seen;
  1131. if (gca == NULL)
  1132. return PyErr_NoMemory();
  1133. for (i = 0; i < revcount; i++) {
  1134. if (revs[i] > maxrev)
  1135. maxrev = revs[i];
  1136. }
  1137. seen = calloc(sizeof(*seen), maxrev + 1);
  1138. if (seen == NULL) {
  1139. Py_DECREF(gca);
  1140. return PyErr_NoMemory();
  1141. }
  1142. for (i = 0; i < revcount; i++)
  1143. seen[revs[i]] = 1ull << i;
  1144. interesting = revcount;
  1145. for (v = maxrev; v >= 0 && interesting; v--) {
  1146. long sv = seen[v];
  1147. int parents[2];
  1148. if (!sv)
  1149. continue;
  1150. if (sv < poison) {
  1151. interesting -= 1;
  1152. if (sv == allseen) {
  1153. PyObject *obj = PyInt_FromLong(v);
  1154. if (obj == NULL)
  1155. goto bail;
  1156. if (PyList_Append(gca, obj) == -1) {
  1157. Py_DECREF(obj);
  1158. goto bail;
  1159. }
  1160. sv |= poison;
  1161. for (i = 0; i < revcount; i++) {
  1162. if (revs[i] == v)
  1163. goto done;
  1164. }
  1165. }
  1166. }
  1167. index_get_parents(self, v, parents);
  1168. for (i = 0; i < 2; i++) {
  1169. int p = parents[i];
  1170. if (p == -1)
  1171. continue;
  1172. sp = seen[p];
  1173. if (sv < poison) {
  1174. if (sp == 0) {
  1175. seen[p] = sv;
  1176. interesting++;
  1177. }
  1178. else if (sp != sv)
  1179. seen[p] |= sv;
  1180. } else {
  1181. if (sp && sp < poison)
  1182. interesting--;
  1183. seen[p] = sv;
  1184. }
  1185. }
  1186. }
  1187. done:
  1188. free(seen);
  1189. return gca;
  1190. bail:
  1191. free(seen);
  1192. Py_XDECREF(gca);
  1193. return NULL;
  1194. }
  1195. /*
  1196. * Given a disjoint set of revs, return the subset with the longest
  1197. * path to the root.
  1198. */
  1199. static PyObject *find_deepest(indexObject *self, PyObject *revs)
  1200. {
  1201. const Py_ssize_t revcount = PyList_GET_SIZE(revs);
  1202. static const Py_ssize_t capacity = 24;
  1203. int *depth, *interesting = NULL;
  1204. int i, j, v, ninteresting;
  1205. PyObject *dict = NULL, *keys = NULL;
  1206. long *seen = NULL;
  1207. int maxrev = -1;
  1208. long final;
  1209. if (revcount > capacity) {
  1210. PyErr_Format(PyExc_OverflowError,
  1211. "bitset size (%ld) > capacity (%ld)",
  1212. (long)revcount, (long)capacity);
  1213. return NULL;
  1214. }
  1215. for (i = 0; i < revcount; i++) {
  1216. int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
  1217. if (n > maxrev)
  1218. maxrev = n;
  1219. }
  1220. depth = calloc(sizeof(*depth), maxrev + 1);
  1221. if (depth == NULL)
  1222. return PyErr_NoMemory();
  1223. seen = calloc(sizeof(*seen), maxrev + 1);
  1224. if (seen == NULL) {
  1225. PyErr_NoMemory();
  1226. goto bail;
  1227. }
  1228. interesting = calloc(sizeof(*interesting), 2 << revcount);
  1229. if (interesting == NULL) {
  1230. PyErr_NoMemory();
  1231. goto bail;
  1232. }
  1233. if (PyList_Sort(revs) == -1)
  1234. goto bail;
  1235. for (i = 0; i < revcount; i++) {
  1236. int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
  1237. long b = 1l << i;
  1238. depth[n] = 1;
  1239. seen[n] = b;
  1240. interesting[b] = 1;
  1241. }
  1242. ninteresting = (int)revcount;
  1243. for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
  1244. int dv = depth[v];
  1245. int parents[2];
  1246. long sv;
  1247. if (dv == 0)
  1248. continue;
  1249. sv = seen[v];
  1250. index_get_parents(self, v, parents);
  1251. for (i = 0; i < 2; i++) {
  1252. int p = parents[i];
  1253. long nsp, sp;
  1254. int dp;
  1255. if (p == -1)
  1256. continue;
  1257. dp = depth[p];
  1258. nsp = sp = seen[p];
  1259. if (dp <= dv) {
  1260. depth[p] = dv + 1;
  1261. if (sp != sv) {
  1262. interesting[sv] += 1;
  1263. nsp = seen[p] = sv;
  1264. if (sp) {
  1265. interesting[sp] -= 1;
  1266. if (interesting[sp] == 0)
  1267. ninteresting -= 1;
  1268. }
  1269. }
  1270. }
  1271. else if (dv == dp - 1) {
  1272. nsp = sp | sv;
  1273. if (nsp == sp)
  1274. continue;
  1275. seen[p] = nsp;
  1276. interesting[sp] -= 1;
  1277. if (interesting[sp] == 0 && interesting[nsp] > 0)
  1278. ninteresting -= 1;
  1279. interesting[nsp] += 1;
  1280. }
  1281. }
  1282. interesting[sv] -= 1;
  1283. if (interesting[sv] == 0)
  1284. ninteresting -= 1;
  1285. }
  1286. final = 0;
  1287. j = ninteresting;
  1288. for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
  1289. if (interesting[i] == 0)
  1290. continue;
  1291. final |= i;
  1292. j -= 1;
  1293. }
  1294. if (final == 0) {
  1295. keys = PyList_New(0);
  1296. goto bail;
  1297. }
  1298. dict = PyDict_New();
  1299. if (dict == NULL)
  1300. goto bail;
  1301. for (i = 0; i < revcount; i++) {
  1302. PyObject *key;
  1303. if ((final & (1 << i)) == 0)
  1304. continue;
  1305. key = PyList_GET_ITEM(revs, i);
  1306. Py_INCREF(key);
  1307. Py_INCREF(Py_None);
  1308. if (PyDict_SetItem(dict, key, Py_None) == -1) {
  1309. Py_DECREF(key);
  1310. Py_DECREF(Py_None);
  1311. goto bail;
  1312. }
  1313. }
  1314. keys = PyDict_Keys(dict);
  1315. bail:
  1316. free(depth);
  1317. free(seen);
  1318. free(interesting);
  1319. Py_XDECREF(dict);
  1320. return keys;
  1321. }
  1322. /*
  1323. * Given a (possibly overlapping) set of revs, return the greatest
  1324. * common ancestors: those with the longest path to the root.
  1325. */
  1326. static PyObject *index_ancestors(indexObject *self, PyObject *args)
  1327. {
  1328. PyObject *ret = NULL, *gca = NULL;
  1329. Py_ssize_t argcount, i, len;
  1330. bitmask repeat = 0;
  1331. int revcount = 0;
  1332. int *revs;
  1333. argcount = PySequence_Length(args);
  1334. revs = malloc(argcount * sizeof(*revs));
  1335. if (argcount > 0 && revs == NULL)
  1336. return PyErr_NoMemory();
  1337. len = index_length(self) - 1;
  1338. for (i = 0; i < argcount; i++) {
  1339. static const int capacity = 24;
  1340. PyObject *obj = PySequence_GetItem(args, i);
  1341. bitmask x;
  1342. long val;
  1343. if (!PyInt_Check(obj)) {
  1344. PyErr_SetString(PyExc_TypeError,
  1345. "arguments must all be ints");
  1346. goto bail;
  1347. }
  1348. val = PyInt_AsLong(obj);
  1349. if (val == -1) {
  1350. ret = PyList_New(0);
  1351. goto done;
  1352. }
  1353. if (val < 0 || val >= len) {
  1354. PyErr_SetString(PyExc_IndexError,
  1355. "index out of range");
  1356. goto bail;
  1357. }
  1358. /* this cheesy bloom filter lets us avoid some more
  1359. * expensive duplicate checks in the common set-is-disjoint
  1360. * case */
  1361. x = 1ull << (val & 0x3f);
  1362. if (repeat & x) {
  1363. int k;
  1364. for (k = 0; k < revcount; k++) {
  1365. if (val == revs[k])
  1366. goto duplicate;
  1367. }
  1368. }
  1369. else repeat |= x;
  1370. if (revcount >= capacity) {
  1371. PyErr_Format(PyExc_OverflowError,
  1372. "bitset size (%d) > capacity (%d)",
  1373. revcount, capacity);
  1374. goto bail;
  1375. }
  1376. revs[revcount++] = (int)val;
  1377. duplicate:;
  1378. }
  1379. if (revcount == 0) {
  1380. ret = PyList_New(0);
  1381. goto done;
  1382. }
  1383. if (revcount == 1) {
  1384. PyObject *obj;
  1385. ret = PyList_New(1);
  1386. if (ret == NULL)
  1387. goto bail;
  1388. obj = PyInt_FromLong(revs[0]);
  1389. if (obj == NULL)
  1390. goto bail;
  1391. PyList_SET_ITEM(ret, 0, obj);
  1392. goto done;
  1393. }
  1394. gca = find_gca_candidates(self, revs, revcount);
  1395. if (gca == NULL)
  1396. goto bail;
  1397. if (PyList_GET_SIZE(gca) <= 1) {
  1398. ret = gca;
  1399. Py_INCREF(gca);
  1400. }
  1401. else ret = find_deepest(self, gca);
  1402. done:
  1403. free(revs);
  1404. Py_XDECREF(gca);
  1405. return ret;
  1406. bail:
  1407. free(revs);
  1408. Py_XDECREF(gca);
  1409. Py_XDECREF(ret);
  1410. return NULL;
  1411. }
  1412. /*
  1413. * Given a (possibly overlapping) set of revs, return all the
  1414. * common ancestors heads: heads(::args[0] and ::a[1] and ...)
  1415. */
  1416. static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
  1417. {
  1418. PyObject *ret = NULL;
  1419. Py_ssize_t argcount, i, len;
  1420. bitmask repeat = 0;
  1421. int revcount = 0;
  1422. int *revs;
  1423. argcount = PySequence_Length(args);
  1424. revs = malloc(argcount * sizeof(*revs));
  1425. if (argcount > 0 && revs == NULL)
  1426. return PyErr_NoMemory();
  1427. len = index_length(self) - 1;
  1428. for (i = 0; i < argcount; i++) {
  1429. static const int capacity = 24;
  1430. PyObject *obj = PySequence_GetItem(args, i);
  1431. bitmask x;
  1432. long val;
  1433. if (!PyInt_Check(obj)) {
  1434. PyErr_SetString(PyExc_TypeError,
  1435. "arguments must all be ints");
  1436. goto bail;
  1437. }
  1438. val = PyInt_AsLong(obj);
  1439. if (val == -1) {
  1440. ret = PyList_New(0);
  1441. goto done;
  1442. }
  1443. if (val < 0 || val >= len) {
  1444. PyErr_SetString(PyExc_IndexError,
  1445. "index out of range");
  1446. goto bail;
  1447. }
  1448. /* this cheesy bloom filter lets us avoid some more
  1449. * expensive duplicate checks in the common set-is-disjoint
  1450. * case */
  1451. x = 1ull << (val & 0x3f);
  1452. if (repeat & x) {
  1453. int k;
  1454. for (k = 0; k < revcount; k++) {
  1455. if (val == revs[k])
  1456. goto duplicate;
  1457. }
  1458. }
  1459. else repeat |= x;
  1460. if (revcount >= capacity) {
  1461. PyErr_Format(PyExc_OverflowError,
  1462. "bitset size (%d) > capacity (%d)",
  1463. revcount, capacity);
  1464. goto bail;
  1465. }
  1466. revs[revcount++] = (int)val;
  1467. duplicate:;
  1468. }
  1469. if (revcount == 0) {
  1470. ret = PyList_New(0);
  1471. goto done;
  1472. }
  1473. if (revcount == 1) {
  1474. PyObject *obj;
  1475. ret = PyList_New(1);
  1476. if (ret == NULL)
  1477. goto bail;
  1478. obj = PyInt_FromLong(revs[0]);
  1479. if (obj == NULL)
  1480. goto bail;
  1481. PyList_SET_ITEM(ret, 0, obj);
  1482. goto done;
  1483. }
  1484. ret = find_gca_candidates(self, revs, revcount);
  1485. if (ret == NULL)
  1486. goto bail;
  1487. done:
  1488. free(revs);
  1489. return ret;
  1490. bail:
  1491. free(revs);
  1492. Py_XDECREF(ret);
  1493. return NULL;
  1494. }
  1495. /*
  1496. * Invalidate any trie entries introduced by added revs.
  1497. */
  1498. static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
  1499. {
  1500. Py_ssize_t i, len = PyList_GET_SIZE(self->added);
  1501. for (i = start; i < len; i++) {
  1502. PyObject *tuple = PyList_GET_ITEM(self->added, i);
  1503. PyObject *node = PyTuple_GET_ITEM(tuple, 7);
  1504. nt_insert(self, PyString_AS_STRING(node), -1);
  1505. }
  1506. if (start == 0)
  1507. Py_CLEAR(self->added);
  1508. }
  1509. /*
  1510. * Delete a numeric range of revs, which must be at the end of the
  1511. * range, but exclude the sentinel nullid entry.
  1512. */
  1513. static int index_slice_del(indexObject *self, PyObject *item)
  1514. {
  1515. Py_ssize_t start, stop, step, slicelength;
  1516. Py_ssize_t length = index_length(self);
  1517. int ret = 0;
  1518. if (PySlice_GetIndicesEx((PySliceObject*)item, length,
  1519. &start, &stop, &step, &slicelength) < 0)
  1520. return -1;
  1521. if (slicelength <= 0)
  1522. return 0;
  1523. if ((step < 0 && start < stop) || (step > 0 && start > stop))
  1524. stop = start;
  1525. if (step < 0) {
  1526. stop = start + 1;
  1527. start = stop + step*(slicelength - 1) - 1;
  1528. step = -step;
  1529. }
  1530. if (step != 1) {
  1531. PyErr_SetString(PyExc_ValueError,
  1532. "revlog index delete requires step size of 1");
  1533. return -1;
  1534. }
  1535. if (stop != length - 1) {
  1536. PyErr_SetString(PyExc_IndexError,
  1537. "revlog index deletion indices are invalid");
  1538. return -1;
  1539. }
  1540. if (start < self->length - 1) {
  1541. if (self->nt) {
  1542. Py_ssize_t i;
  1543. for (i = start + 1; i < self->length - 1; i++) {
  1544. const char *node = index_node(self, i);
  1545. if (node)
  1546. nt_insert(self, node, -1);
  1547. }
  1548. if (self->added)
  1549. nt_invalidate_added(self, 0);
  1550. if (self->ntrev > start)
  1551. self->ntrev = (int)start;
  1552. }
  1553. self->length = start + 1;
  1554. if (start < self->raw_length) {
  1555. if (self->cache) {
  1556. Py_ssize_t i;
  1557. for (i = start; i < self->raw_length; i++)
  1558. Py_CLEAR(self->cache[i]);
  1559. }
  1560. self->raw_length = start;
  1561. }
  1562. goto done;
  1563. }
  1564. if (self->nt) {
  1565. nt_invalidate_added(self, start - self->length + 1);
  1566. if (self->ntrev > start)
  1567. self->ntrev = (int)start;
  1568. }
  1569. if (self->added)
  1570. ret = PyList_SetSlice(self->added, start - self->length + 1,
  1571. PyList_GET_SIZE(self->added), NULL);
  1572. done:
  1573. Py_CLEAR(self->headrevs);
  1574. return ret;
  1575. }
  1576. /*
  1577. * Supported ops:
  1578. *
  1579. * slice deletion
  1580. * string assignment (extend node->rev mapping)
  1581. * string deletion (shrink node->rev mapping)
  1582. */
  1583. static int index_assign_subscript(indexObject *self, PyObject *item,
  1584. PyObject *value)
  1585. {
  1586. char *node;
  1587. Py_ssize_t nodelen;
  1588. long rev;
  1589. if (PySlice_Check(item) && value == NULL)
  1590. return index_slice_del(self, item);
  1591. if (node_check(item, &node, &nodelen) == -1)
  1592. return -1;
  1593. if (value == NULL)
  1594. return self->nt ? nt_insert(self, node, -1) : 0;
  1595. rev = PyInt_AsLong(value);
  1596. if (rev > INT_MAX || rev < 0) {
  1597. if (!PyErr_Occurred())
  1598. PyErr_SetString(PyExc_ValueError, "rev out of range");
  1599. return -1;
  1600. }
  1601. return nt_insert(self, node, (int)rev);
  1602. }
  1603. /*
  1604. * Find all RevlogNG entries in an index that has inline data. Update
  1605. * the optional "offsets" table with those entries.
  1606. */
  1607. static long inline_scan(indexObject *self, const char **offsets)
  1608. {
  1609. const char *data = PyString_AS_STRING(self->data);
  1610. Py_ssize_t pos = 0;
  1611. Py_ssize_t end = PyString_GET_SIZE(self->data);
  1612. long incr = v1_hdrsize;
  1613. Py_ssize_t len = 0;
  1614. while (pos + v1_hdrsize <= end && pos >= 0) {
  1615. uint32_t comp_len;
  1616. /* 3rd element of header is length of compressed inline data */
  1617. comp_len = getbe32(data + pos + 8);
  1618. incr = v1_hdrsize + comp_len;
  1619. if (offsets)
  1620. offsets[len] = data + pos;
  1621. len++;
  1622. pos += incr;
  1623. }
  1624. if (pos != end) {
  1625. if (!PyErr_Occurred())
  1626. PyErr_SetString(PyExc_ValueError, "corrupt index file");
  1627. return -1;
  1628. }
  1629. return len;
  1630. }
  1631. static int index_init(indexObject *self, PyObject *args)
  1632. {
  1633. PyObject *data_obj, *inlined_obj;
  1634. Py_ssize_t size;
  1635. /* Initialize before argument-checking to avoid index_dealloc() crash. */
  1636. self->raw_length = 0;
  1637. self->added = NULL;
  1638. self->cache = NULL;
  1639. self->data = NULL;
  1640. self->headrevs = NULL;
  1641. self->nt = NULL;
  1642. self->offsets = NULL;
  1643. if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
  1644. return -1;
  1645. if (!PyString_Check(data_obj)) {
  1646. PyErr_SetString(PyExc_TypeError, "data is not a string");
  1647. return -1;
  1648. }
  1649. size = PyString_GET_SIZE(data_obj);
  1650. self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
  1651. self->data = data_obj;
  1652. self->ntlength = self->ntcapacity = 0;
  1653. self->ntdepth = self->ntsplits = 0;
  1654. self->ntlookups = self->ntmisses = 0;
  1655. self->ntrev = -1;
  1656. Py_INCREF(self->data);
  1657. if (self->inlined) {
  1658. long len = inline_scan(self, NULL);
  1659. if (len == -1)
  1660. goto bail;
  1661. self->raw_length = len;
  1662. self->length = len + 1;
  1663. } else {
  1664. if (size % v1_hdrsize) {
  1665. PyErr_SetString(PyExc_ValueError, "corrupt index file");
  1666. goto bail;
  1667. }
  1668. self->raw_length = size / v1_hdrsize;
  1669. self->length = self->raw_length + 1;
  1670. }
  1671. return 0;
  1672. bail:
  1673. return -1;
  1674. }
  1675. static PyObject *index_nodemap(indexObject *self)
  1676. {
  1677. Py_INCREF(self);
  1678. return (PyObject *)self;
  1679. }
  1680. static void index_dealloc(indexObject *self)
  1681. {
  1682. _index_clearcaches(self);
  1683. Py_XDECREF(self->data);
  1684. Py_XDECREF(self->added);
  1685. PyObject_Del(self);
  1686. }
  1687. static PySequenceMethods index_sequence_methods = {
  1688. (lenfunc)index_length, /* sq_length */
  1689. 0, /* sq_concat */
  1690. 0, /* sq_repeat */
  1691. (ssizeargfunc)index_get, /* sq_item */
  1692. 0, /* sq_slice */
  1693. 0, /* sq_ass_item */
  1694. 0, /* sq_ass_slice */
  1695. (objobjproc)index_contains, /* sq_contains */
  1696. };
  1697. static PyMappingMethods index_mapping_methods = {
  1698. (lenfunc)index_length, /* mp_length */
  1699. (binaryfunc)index_getitem, /* mp_subscript */
  1700. (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
  1701. };
  1702. static PyMethodDef index_methods[] = {
  1703. {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
  1704. "return the gca set of the given revs"},
  1705. {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
  1706. METH_VARARGS,
  1707. "return the heads of the common ancestors of the given revs"},
  1708. {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
  1709. "clear the index caches"},
  1710. {"get", (PyCFunction)index_m_get, METH_VARARGS,
  1711. "get an index entry"},
  1712. {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
  1713. "get head revisions"},
  1714. {"insert", (PyCFunction)index_insert, METH_VARARGS,
  1715. "insert an index entry"},
  1716. {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
  1717. "match a potentially ambiguous node ID"},
  1718. {"stats", (PyCFunction)index_stats, METH_NOARGS,
  1719. "stats for the index"},
  1720. {NULL} /* Sentinel */
  1721. };
  1722. static PyGetSetDef index_getset[] = {
  1723. {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
  1724. {NULL} /* Sentinel */
  1725. };
  1726. static PyTypeObject indexType = {
  1727. PyObject_HEAD_INIT(NULL)
  1728. 0, /* ob_size */
  1729. "parsers.index", /* tp_name */
  1730. sizeof(indexObject), /* tp_basicsize */
  1731. 0, /* tp_itemsize */
  1732. (destructor)index_dealloc, /* tp_dealloc */
  1733. 0, /* tp_print */
  1734. 0, /* tp_getattr */
  1735. 0, /* tp_setattr */
  1736. 0, /* tp_compare */
  1737. 0, /* tp_repr */
  1738. 0, /* tp_as_number */
  1739. &index_sequence_methods, /* tp_as_sequence */
  1740. &index_mapping_methods, /* tp_as_mapping */
  1741. 0, /* tp_hash */
  1742. 0, /* tp_call */
  1743. 0, /* tp_str */
  1744. 0, /* tp_getattro */
  1745. 0, /* tp_setattro */
  1746. 0, /* tp_as_buffer */
  1747. Py_TPFLAGS_DEFAULT, /* tp_flags */
  1748. "revlog index", /* tp_doc */
  1749. 0, /* tp_traverse */
  1750. 0, /* tp_clear */
  1751. 0, /* tp_richcompare */
  1752. 0, /* tp_weaklistoffset */
  1753. 0, /* tp_iter */
  1754. 0, /* tp_iternext */
  1755. index_methods, /* tp_methods */
  1756. 0, /* tp_members */
  1757. index_getset, /* tp_getset */
  1758. 0, /* tp_base */
  1759. 0, /* tp_dict */
  1760. 0, /* tp_descr_get */
  1761. 0, /* tp_descr_set */
  1762. 0, /* tp_dictoffset */
  1763. (initproc)index_init, /* tp_init */
  1764. 0, /* tp_alloc */
  1765. };
  1766. /*
  1767. * returns a tuple of the form (index, index, cache) with elements as
  1768. * follows:
  1769. *
  1770. * index: an index object that lazily parses RevlogNG records
  1771. * cache: if data is inlined, a tuple (index_file_content, 0), else None
  1772. *
  1773. * added complications are for backwards compatibility
  1774. */
  1775. static PyObject *parse_index2(PyObject *self, PyObject *args)
  1776. {
  1777. PyObject *tuple = NULL, *cache = NULL;
  1778. indexObject *idx;
  1779. int ret;
  1780. idx = PyObject_New(indexObject, &indexType);
  1781. if (idx == NULL)
  1782. goto bail;
  1783. ret = index_init(idx, args);
  1784. if (ret == -1)
  1785. goto bail;
  1786. if (idx->inlined) {
  1787. cache = Py_BuildValue("iO", 0, idx->data);
  1788. if (cache == NULL)
  1789. goto bail;
  1790. } else {
  1791. cache = Py_None;
  1792. Py_INCREF(cache);
  1793. }
  1794. tuple = Py_BuildValue("NN", idx, cache);
  1795. if (!tuple)
  1796. goto bail;
  1797. return tuple;
  1798. bail:
  1799. Py_XDECREF(idx);
  1800. Py_XDECREF(cache);
  1801. Py_XDECREF(tuple);
  1802. return NULL;
  1803. }
  1804. static char parsers_doc[] = "Efficient content parsing.";
  1805. PyObject *encodedir(PyObject *self, PyObject *args);
  1806. PyObject *pathencode(PyObject *self, PyObject *args);
  1807. PyObject *lowerencode(PyObject *self, PyObject *args);
  1808. static PyMethodDef methods[] = {
  1809. {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
  1810. {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
  1811. {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
  1812. {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
  1813. {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
  1814. {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
  1815. {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
  1816. {NULL, NULL}
  1817. };
  1818. void dirs_module_init(PyObject *mod);
  1819. static void module_init(PyObject *mod)
  1820. {
  1821. /* This module constant has two purposes. First, it lets us unit test
  1822. * the ImportError raised without hard-coding any error text. This
  1823. * means we can change the text in the future without breaking tests,
  1824. * even across changesets without a recompile. Second, its presence
  1825. * can be used to determine whether the version-checking logic is
  1826. * present, which also helps in testing across changesets without a
  1827. * recompile. Note that this means the pure-Python version of parsers
  1828. * should not have this module constant. */
  1829. PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
  1830. dirs_module_init(mod);
  1831. indexType.tp_new = PyType_GenericNew;
  1832. if (PyType_Ready(&indexType) < 0 ||
  1833. PyType_Ready(&dirstateTupleType) < 0)
  1834. return;
  1835. Py_INCREF(&indexType);
  1836. PyModule_AddObject(mod, "index", (PyObject *)&indexType);
  1837. Py_INCREF(&dirstateTupleType);
  1838. PyModule_AddObject(mod, "dirstatetuple",
  1839. (PyObject *)&dirstateTupleType);
  1840. nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
  1841. -1, -1, -1, -1, nullid, 20);
  1842. if (nullentry)
  1843. PyObject_GC_UnTrack(nullentry);
  1844. }
  1845. static int check_python_version(void)
  1846. {
  1847. PyObject *sys = PyImport_ImportModule("sys");
  1848. long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
  1849. /* sys.hexversion is a 32-bit number by default, so the -1 case
  1850. * should only occur in unusual circumstances (e.g. if sys.hexversion
  1851. * is manually set to an invalid value). */
  1852. if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
  1853. PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
  1854. "modules were compiled with Python " PY_VERSION ", but "
  1855. "Mercurial is currently using Python with sys.hexversion=%ld: "
  1856. "Python %s\n at: %s", versionerrortext, hexversion,
  1857. Py_GetVersion(), Py_GetProgramFullPath());
  1858. return -1;
  1859. }
  1860. return 0;
  1861. }
  1862. #ifdef IS_PY3K
  1863. static struct PyModuleDef parsers_module = {
  1864. PyModuleDef_HEAD_INIT,
  1865. "parsers",
  1866. parsers_doc,
  1867. -1,
  1868. methods
  1869. };
  1870. PyMODINIT_FUNC PyInit_parsers(void)
  1871. {
  1872. PyObject *mod;
  1873. if (check_python_version() == -1)
  1874. return;
  1875. mod = PyModule_Create(&parsers_module);
  1876. module_init(mod);
  1877. return mod;
  1878. }
  1879. #else
  1880. PyMODINIT_FUNC initparsers(void)
  1881. {
  1882. PyObject *mod;
  1883. if (check_python_version() == -1)
  1884. return;
  1885. mod = Py_InitModule3("parsers", methods, parsers_doc);
  1886. module_init(mod);
  1887. }
  1888. #endif