PageRenderTime 55ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/rtyper/lltypesystem/rdict.py

https://bitbucket.org/pypy/pypy/
Python | 923 lines | 690 code | 104 blank | 129 comment | 139 complexity | 2f68aefd5ab8fb3d863c950286da3430 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.tool.pairtype import pairtype
  2. from rpython.flowspace.model import Constant
  3. from rpython.rtyper.rdict import AbstractDictRepr, AbstractDictIteratorRepr
  4. from rpython.rtyper.lltypesystem import lltype
  5. from rpython.rtyper.lltypesystem.lloperation import llop
  6. from rpython.rlib import objectmodel, jit
  7. from rpython.rtyper.debug import ll_assert
  8. from rpython.rlib.rarithmetic import r_uint, intmask, LONG_BIT
  9. from rpython.rtyper import rmodel
  10. from rpython.rtyper.error import TyperError
  11. HIGHEST_BIT = r_uint(intmask(1 << (LONG_BIT - 1)))
  12. MASK = r_uint(intmask(HIGHEST_BIT - 1))
  13. # ____________________________________________________________
  14. #
  15. # generic implementation of RPython dictionary, with parametric DICTKEY and
  16. # DICTVALUE types.
  17. #
  18. # XXX for immutable dicts, the array should be inlined and
  19. # resize_counter and everused are not needed.
  20. #
  21. # struct dictentry {
  22. # DICTKEY key;
  23. # bool f_valid; # (optional) the entry is filled
  24. # bool f_everused; # (optional) the entry is or has ever been filled
  25. # DICTVALUE value;
  26. # int f_hash; # (optional) key hash, if hard to recompute
  27. # }
  28. #
  29. # struct dicttable {
  30. # int num_items;
  31. # int resize_counter;
  32. # Array *entries;
  33. # (Function DICTKEY, DICTKEY -> bool) *fnkeyeq;
  34. # (Function DICTKEY -> int) *fnkeyhash;
  35. # }
  36. #
  37. #
  38. class DictRepr(AbstractDictRepr):
  39. def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
  40. custom_eq_hash=None, force_non_null=False):
  41. self.rtyper = rtyper
  42. self.DICT = lltype.GcForwardReference()
  43. self.lowleveltype = lltype.Ptr(self.DICT)
  44. self.custom_eq_hash = custom_eq_hash is not None
  45. if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup()
  46. assert callable(key_repr)
  47. self._key_repr_computer = key_repr
  48. else:
  49. self.external_key_repr, self.key_repr = self.pickkeyrepr(key_repr)
  50. if not isinstance(value_repr, rmodel.Repr): # not computed yet, done by setup()
  51. assert callable(value_repr)
  52. self._value_repr_computer = value_repr
  53. else:
  54. self.external_value_repr, self.value_repr = self.pickrepr(value_repr)
  55. self.dictkey = dictkey
  56. self.dictvalue = dictvalue
  57. self.dict_cache = {}
  58. self._custom_eq_hash_repr = custom_eq_hash
  59. self.force_non_null = force_non_null
  60. # setup() needs to be called to finish this initialization
  61. def _externalvsinternal(self, rtyper, item_repr):
  62. return rmodel.externalvsinternal(self.rtyper, item_repr)
  63. def _setup_repr(self):
  64. if 'key_repr' not in self.__dict__:
  65. key_repr = self._key_repr_computer()
  66. self.external_key_repr, self.key_repr = self.pickkeyrepr(key_repr)
  67. if 'value_repr' not in self.__dict__:
  68. self.external_value_repr, self.value_repr = self.pickrepr(self._value_repr_computer())
  69. if isinstance(self.DICT, lltype.GcForwardReference):
  70. self.DICTKEY = self.key_repr.lowleveltype
  71. self.DICTVALUE = self.value_repr.lowleveltype
  72. # compute the shape of the DICTENTRY structure
  73. entryfields = []
  74. entrymeths = {
  75. 'allocate': lltype.typeMethod(_ll_malloc_entries),
  76. 'delete': _ll_free_entries,
  77. 'must_clear_key': (isinstance(self.DICTKEY, lltype.Ptr)
  78. and self.DICTKEY._needsgc()),
  79. 'must_clear_value': (isinstance(self.DICTVALUE, lltype.Ptr)
  80. and self.DICTVALUE._needsgc()),
  81. }
  82. # * the key
  83. entryfields.append(("key", self.DICTKEY))
  84. # * if NULL is not a valid ll value for the key or the value
  85. # field of the entry, it can be used as a marker for
  86. # never-used entries. Otherwise, we need an explicit flag.
  87. s_key = self.dictkey.s_value
  88. s_value = self.dictvalue.s_value
  89. nullkeymarker = not self.key_repr.can_ll_be_null(s_key)
  90. nullvaluemarker = not self.value_repr.can_ll_be_null(s_value)
  91. if self.force_non_null:
  92. if not nullkeymarker:
  93. rmodel.warning("%s can be null, but forcing non-null in dict key" % s_key)
  94. nullkeymarker = True
  95. if not nullvaluemarker:
  96. rmodel.warning("%s can be null, but forcing non-null in dict value" % s_value)
  97. nullvaluemarker = True
  98. dummykeyobj = self.key_repr.get_ll_dummyval_obj(self.rtyper,
  99. s_key)
  100. dummyvalueobj = self.value_repr.get_ll_dummyval_obj(self.rtyper,
  101. s_value)
  102. # * the state of the entry - trying to encode it as dummy objects
  103. if nullkeymarker and dummykeyobj:
  104. # all the state can be encoded in the key
  105. entrymeths['everused'] = ll_everused_from_key
  106. entrymeths['dummy_obj'] = dummykeyobj
  107. entrymeths['valid'] = ll_valid_from_key
  108. entrymeths['mark_deleted'] = ll_mark_deleted_in_key
  109. # the key is overwritten by 'dummy' when the entry is deleted
  110. entrymeths['must_clear_key'] = False
  111. elif nullvaluemarker and dummyvalueobj:
  112. # all the state can be encoded in the value
  113. entrymeths['everused'] = ll_everused_from_value
  114. entrymeths['dummy_obj'] = dummyvalueobj
  115. entrymeths['valid'] = ll_valid_from_value
  116. entrymeths['mark_deleted'] = ll_mark_deleted_in_value
  117. # value is overwritten by 'dummy' when entry is deleted
  118. entrymeths['must_clear_value'] = False
  119. else:
  120. # we need a flag to know if the entry was ever used
  121. # (we cannot use a NULL as a marker for this, because
  122. # the key and value will be reset to NULL to clear their
  123. # reference)
  124. entryfields.append(("f_everused", lltype.Bool))
  125. entrymeths['everused'] = ll_everused_from_flag
  126. # can we still rely on a dummy obj to mark deleted entries?
  127. if dummykeyobj:
  128. entrymeths['dummy_obj'] = dummykeyobj
  129. entrymeths['valid'] = ll_valid_from_key
  130. entrymeths['mark_deleted'] = ll_mark_deleted_in_key
  131. # key is overwritten by 'dummy' when entry is deleted
  132. entrymeths['must_clear_key'] = False
  133. elif dummyvalueobj:
  134. entrymeths['dummy_obj'] = dummyvalueobj
  135. entrymeths['valid'] = ll_valid_from_value
  136. entrymeths['mark_deleted'] = ll_mark_deleted_in_value
  137. # value is overwritten by 'dummy' when entry is deleted
  138. entrymeths['must_clear_value'] = False
  139. else:
  140. entryfields.append(("f_valid", lltype.Bool))
  141. entrymeths['valid'] = ll_valid_from_flag
  142. entrymeths['mark_deleted'] = ll_mark_deleted_in_flag
  143. # * the value
  144. entryfields.append(("value", self.DICTVALUE))
  145. # * the hash, if needed
  146. if self.custom_eq_hash:
  147. fasthashfn = None
  148. else:
  149. fasthashfn = self.key_repr.get_ll_fasthash_function()
  150. if getattr(self.key_repr.get_ll_eq_function(),
  151. 'no_direct_compare', False):
  152. entrymeths['no_direct_compare'] = True
  153. if fasthashfn is None:
  154. entryfields.append(("f_hash", lltype.Signed))
  155. entrymeths['hash'] = ll_hash_from_cache
  156. else:
  157. entrymeths['hash'] = ll_hash_recomputed
  158. entrymeths['fasthashfn'] = fasthashfn
  159. # Build the lltype data structures
  160. self.DICTENTRY = lltype.Struct("dictentry", *entryfields)
  161. self.DICTENTRYARRAY = lltype.GcArray(self.DICTENTRY,
  162. adtmeths=entrymeths)
  163. fields = [ ("num_items", lltype.Signed),
  164. ("resize_counter", lltype.Signed),
  165. ("entries", lltype.Ptr(self.DICTENTRYARRAY)) ]
  166. if self.custom_eq_hash:
  167. self.r_rdict_eqfn, self.r_rdict_hashfn = self._custom_eq_hash_repr()
  168. fields.extend([ ("fnkeyeq", self.r_rdict_eqfn.lowleveltype),
  169. ("fnkeyhash", self.r_rdict_hashfn.lowleveltype) ])
  170. adtmeths = {
  171. 'keyhash': ll_keyhash_custom,
  172. 'keyeq': ll_keyeq_custom,
  173. 'r_rdict_eqfn': self.r_rdict_eqfn,
  174. 'r_rdict_hashfn': self.r_rdict_hashfn,
  175. 'paranoia': True,
  176. }
  177. else:
  178. # figure out which functions must be used to hash and compare
  179. ll_keyhash = self.key_repr.get_ll_hash_function()
  180. ll_keyeq = self.key_repr.get_ll_eq_function() # can be None
  181. ll_keyhash = lltype.staticAdtMethod(ll_keyhash)
  182. if ll_keyeq is not None:
  183. ll_keyeq = lltype.staticAdtMethod(ll_keyeq)
  184. adtmeths = {
  185. 'keyhash': ll_keyhash,
  186. 'keyeq': ll_keyeq,
  187. 'paranoia': False,
  188. }
  189. adtmeths['KEY'] = self.DICTKEY
  190. adtmeths['VALUE'] = self.DICTVALUE
  191. adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
  192. self.DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
  193. *fields))
  194. def convert_const(self, dictobj):
  195. from rpython.rtyper.lltypesystem import llmemory
  196. # get object from bound dict methods
  197. #dictobj = getattr(dictobj, '__self__', dictobj)
  198. if dictobj is None:
  199. return lltype.nullptr(self.DICT)
  200. if not isinstance(dictobj, (dict, objectmodel.r_dict)):
  201. raise TypeError("expected a dict: %r" % (dictobj,))
  202. try:
  203. key = Constant(dictobj)
  204. return self.dict_cache[key]
  205. except KeyError:
  206. self.setup()
  207. l_dict = ll_newdict_size(self.DICT, len(dictobj))
  208. self.dict_cache[key] = l_dict
  209. r_key = self.key_repr
  210. if r_key.lowleveltype == llmemory.Address:
  211. raise TypeError("No prebuilt dicts of address keys")
  212. r_value = self.value_repr
  213. if isinstance(dictobj, objectmodel.r_dict):
  214. if self.r_rdict_eqfn.lowleveltype != lltype.Void:
  215. l_fn = self.r_rdict_eqfn.convert_const(dictobj.key_eq)
  216. l_dict.fnkeyeq = l_fn
  217. if self.r_rdict_hashfn.lowleveltype != lltype.Void:
  218. l_fn = self.r_rdict_hashfn.convert_const(dictobj.key_hash)
  219. l_dict.fnkeyhash = l_fn
  220. for dictkeycontainer, dictvalue in dictobj._dict.items():
  221. llkey = r_key.convert_const(dictkeycontainer.key)
  222. llvalue = r_value.convert_const(dictvalue)
  223. ll_dict_insertclean(l_dict, llkey, llvalue,
  224. dictkeycontainer.hash)
  225. return l_dict
  226. else:
  227. for dictkey, dictvalue in dictobj.items():
  228. llkey = r_key.convert_const(dictkey)
  229. llvalue = r_value.convert_const(dictvalue)
  230. ll_dict_insertclean(l_dict, llkey, llvalue,
  231. l_dict.keyhash(llkey))
  232. return l_dict
  233. def rtype_len(self, hop):
  234. v_dict, = hop.inputargs(self)
  235. return hop.gendirectcall(ll_dict_len, v_dict)
  236. def rtype_bool(self, hop):
  237. v_dict, = hop.inputargs(self)
  238. return hop.gendirectcall(ll_dict_bool, v_dict)
  239. def make_iterator_repr(self, *variant):
  240. return DictIteratorRepr(self, *variant)
  241. def rtype_method_get(self, hop):
  242. v_dict, v_key, v_default = hop.inputargs(self, self.key_repr,
  243. self.value_repr)
  244. hop.exception_cannot_occur()
  245. v_res = hop.gendirectcall(ll_get, v_dict, v_key, v_default)
  246. return self.recast_value(hop.llops, v_res)
  247. def rtype_method_setdefault(self, hop):
  248. v_dict, v_key, v_default = hop.inputargs(self, self.key_repr,
  249. self.value_repr)
  250. hop.exception_cannot_occur()
  251. v_res = hop.gendirectcall(ll_setdefault, v_dict, v_key, v_default)
  252. return self.recast_value(hop.llops, v_res)
  253. def rtype_method_copy(self, hop):
  254. v_dict, = hop.inputargs(self)
  255. hop.exception_cannot_occur()
  256. return hop.gendirectcall(ll_copy, v_dict)
  257. def rtype_method_update(self, hop):
  258. v_dic1, v_dic2 = hop.inputargs(self, self)
  259. hop.exception_cannot_occur()
  260. return hop.gendirectcall(ll_update, v_dic1, v_dic2)
  261. def rtype_method__prepare_dict_update(self, hop):
  262. v_dict, v_num = hop.inputargs(self, lltype.Signed)
  263. hop.exception_cannot_occur()
  264. hop.gendirectcall(ll_prepare_dict_update, v_dict, v_num)
  265. def _rtype_method_kvi(self, hop, ll_func):
  266. v_dic, = hop.inputargs(self)
  267. r_list = hop.r_result
  268. cLIST = hop.inputconst(lltype.Void, r_list.lowleveltype.TO)
  269. hop.exception_cannot_occur()
  270. return hop.gendirectcall(ll_func, cLIST, v_dic)
  271. def rtype_method_keys(self, hop):
  272. return self._rtype_method_kvi(hop, ll_dict_keys)
  273. def rtype_method_values(self, hop):
  274. return self._rtype_method_kvi(hop, ll_dict_values)
  275. def rtype_method_items(self, hop):
  276. return self._rtype_method_kvi(hop, ll_dict_items)
  277. def rtype_bltn_list(self, hop):
  278. return self._rtype_method_kvi(hop, ll_dict_keys)
  279. def rtype_method_iterkeys(self, hop):
  280. hop.exception_cannot_occur()
  281. return DictIteratorRepr(self, "keys").newiter(hop)
  282. def rtype_method_itervalues(self, hop):
  283. hop.exception_cannot_occur()
  284. return DictIteratorRepr(self, "values").newiter(hop)
  285. def rtype_method_iteritems(self, hop):
  286. hop.exception_cannot_occur()
  287. return DictIteratorRepr(self, "items").newiter(hop)
  288. def rtype_method_clear(self, hop):
  289. v_dict, = hop.inputargs(self)
  290. hop.exception_cannot_occur()
  291. return hop.gendirectcall(ll_clear, v_dict)
  292. def rtype_method_popitem(self, hop):
  293. v_dict, = hop.inputargs(self)
  294. r_tuple = hop.r_result
  295. cTUPLE = hop.inputconst(lltype.Void, r_tuple.lowleveltype)
  296. hop.exception_is_here()
  297. return hop.gendirectcall(ll_popitem, cTUPLE, v_dict)
  298. def rtype_method_pop(self, hop):
  299. if hop.nb_args == 2:
  300. v_args = hop.inputargs(self, self.key_repr)
  301. target = ll_pop
  302. elif hop.nb_args == 3:
  303. v_args = hop.inputargs(self, self.key_repr, self.value_repr)
  304. target = ll_pop_default
  305. hop.exception_is_here()
  306. v_res = hop.gendirectcall(target, *v_args)
  307. return self.recast_value(hop.llops, v_res)
  308. class __extend__(pairtype(DictRepr, rmodel.Repr)):
  309. def rtype_getitem((r_dict, r_key), hop):
  310. v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
  311. if not r_dict.custom_eq_hash:
  312. hop.has_implicit_exception(KeyError) # record that we know about it
  313. hop.exception_is_here()
  314. v_res = hop.gendirectcall(ll_dict_getitem, v_dict, v_key)
  315. return r_dict.recast_value(hop.llops, v_res)
  316. def rtype_delitem((r_dict, r_key), hop):
  317. v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
  318. if not r_dict.custom_eq_hash:
  319. hop.has_implicit_exception(KeyError) # record that we know about it
  320. hop.exception_is_here()
  321. return hop.gendirectcall(ll_dict_delitem, v_dict, v_key)
  322. def rtype_setitem((r_dict, r_key), hop):
  323. v_dict, v_key, v_value = hop.inputargs(r_dict, r_dict.key_repr, r_dict.value_repr)
  324. if r_dict.custom_eq_hash:
  325. hop.exception_is_here()
  326. else:
  327. hop.exception_cannot_occur()
  328. hop.gendirectcall(ll_dict_setitem, v_dict, v_key, v_value)
  329. def rtype_contains((r_dict, r_key), hop):
  330. v_dict, v_key = hop.inputargs(r_dict, r_dict.key_repr)
  331. hop.exception_is_here()
  332. return hop.gendirectcall(ll_contains, v_dict, v_key)
  333. class __extend__(pairtype(DictRepr, DictRepr)):
  334. def convert_from_to((r_dict1, r_dict2), v, llops):
  335. # check that we don't convert from Dicts with
  336. # different key/value types
  337. if r_dict1.dictkey is None or r_dict2.dictkey is None:
  338. return NotImplemented
  339. if r_dict1.dictkey is not r_dict2.dictkey:
  340. return NotImplemented
  341. if r_dict1.dictvalue is None or r_dict2.dictvalue is None:
  342. return NotImplemented
  343. if r_dict1.dictvalue is not r_dict2.dictvalue:
  344. return NotImplemented
  345. return v
  346. # ____________________________________________________________
  347. #
  348. # Low-level methods. These can be run for testing, but are meant to
  349. # be direct_call'ed from rtyped flow graphs, which means that they will
  350. # get flowed and annotated, mostly with SomePtr.
  351. def ll_everused_from_flag(entries, i):
  352. return entries[i].f_everused
  353. def ll_everused_from_key(entries, i):
  354. return bool(entries[i].key)
  355. def ll_everused_from_value(entries, i):
  356. return bool(entries[i].value)
  357. def ll_valid_from_flag(entries, i):
  358. return entries[i].f_valid
  359. def ll_mark_deleted_in_flag(entries, i):
  360. entries[i].f_valid = False
  361. def ll_valid_from_key(entries, i):
  362. ENTRIES = lltype.typeOf(entries).TO
  363. dummy = ENTRIES.dummy_obj.ll_dummy_value
  364. return entries.everused(i) and entries[i].key != dummy
  365. def ll_mark_deleted_in_key(entries, i):
  366. ENTRIES = lltype.typeOf(entries).TO
  367. dummy = ENTRIES.dummy_obj.ll_dummy_value
  368. entries[i].key = dummy
  369. def ll_valid_from_value(entries, i):
  370. ENTRIES = lltype.typeOf(entries).TO
  371. dummy = ENTRIES.dummy_obj.ll_dummy_value
  372. return entries.everused(i) and entries[i].value != dummy
  373. def ll_mark_deleted_in_value(entries, i):
  374. ENTRIES = lltype.typeOf(entries).TO
  375. dummy = ENTRIES.dummy_obj.ll_dummy_value
  376. entries[i].value = dummy
  377. def ll_hash_from_cache(entries, i):
  378. return entries[i].f_hash
  379. def ll_hash_recomputed(entries, i):
  380. ENTRIES = lltype.typeOf(entries).TO
  381. return ENTRIES.fasthashfn(entries[i].key)
  382. def ll_get_value(d, i):
  383. return d.entries[i].value
  384. def ll_keyhash_custom(d, key):
  385. DICT = lltype.typeOf(d).TO
  386. return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
  387. def ll_keyeq_custom(d, key1, key2):
  388. DICT = lltype.typeOf(d).TO
  389. return objectmodel.hlinvoke(DICT.r_rdict_eqfn, d.fnkeyeq, key1, key2)
  390. def ll_dict_len(d):
  391. return d.num_items
  392. def ll_dict_bool(d):
  393. # check if a dict is True, allowing for None
  394. return bool(d) and d.num_items != 0
  395. def ll_dict_getitem(d, key):
  396. i = ll_dict_lookup(d, key, d.keyhash(key))
  397. if not i & HIGHEST_BIT:
  398. return ll_get_value(d, i)
  399. else:
  400. raise KeyError
  401. def ll_dict_setitem(d, key, value):
  402. hash = d.keyhash(key)
  403. i = ll_dict_lookup(d, key, hash)
  404. return _ll_dict_setitem_lookup_done(d, key, value, hash, i)
  405. # It may be safe to look inside always, it has a few branches though, and their
  406. # frequencies needs to be investigated.
  407. @jit.look_inside_iff(lambda d, key, value, hash, i: jit.isvirtual(d) and jit.isconstant(key))
  408. def _ll_dict_setitem_lookup_done(d, key, value, hash, i):
  409. valid = (i & HIGHEST_BIT) == 0
  410. i = i & MASK
  411. ENTRY = lltype.typeOf(d.entries).TO.OF
  412. entry = d.entries[i]
  413. if not d.entries.everused(i):
  414. # a new entry that was never used before
  415. ll_assert(not valid, "valid but not everused")
  416. rc = d.resize_counter - 3
  417. if rc <= 0: # if needed, resize the dict -- before the insertion
  418. ll_dict_resize(d)
  419. i = ll_dict_lookup_clean(d, hash) # then redo the lookup for 'key'
  420. entry = d.entries[i]
  421. rc = d.resize_counter - 3
  422. ll_assert(rc > 0, "ll_dict_resize failed?")
  423. d.resize_counter = rc
  424. if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
  425. entry.value = value
  426. else:
  427. # override an existing or deleted entry
  428. entry.value = value
  429. if valid:
  430. return
  431. entry.key = key
  432. if hasattr(ENTRY, 'f_hash'): entry.f_hash = hash
  433. if hasattr(ENTRY, 'f_valid'): entry.f_valid = True
  434. d.num_items += 1
  435. def ll_dict_insertclean(d, key, value, hash):
  436. # Internal routine used by ll_dict_resize() to insert an item which is
  437. # known to be absent from the dict. This routine also assumes that
  438. # the dict contains no deleted entries. This routine has the advantage
  439. # of never calling d.keyhash() and d.keyeq(), so it cannot call back
  440. # to user code. ll_dict_insertclean() doesn't resize the dict, either.
  441. i = ll_dict_lookup_clean(d, hash)
  442. ENTRY = lltype.typeOf(d.entries).TO.OF
  443. entry = d.entries[i]
  444. entry.value = value
  445. entry.key = key
  446. if hasattr(ENTRY, 'f_hash'): entry.f_hash = hash
  447. if hasattr(ENTRY, 'f_valid'): entry.f_valid = True
  448. if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
  449. d.num_items += 1
  450. d.resize_counter -= 3
  451. def ll_dict_delitem(d, key):
  452. i = ll_dict_lookup(d, key, d.keyhash(key))
  453. if i & HIGHEST_BIT:
  454. raise KeyError
  455. _ll_dict_del(d, i)
  456. @jit.look_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i))
  457. def _ll_dict_del(d, i):
  458. d.entries.mark_deleted(i)
  459. d.num_items -= 1
  460. # clear the key and the value if they are GC pointers
  461. ENTRIES = lltype.typeOf(d.entries).TO
  462. ENTRY = ENTRIES.OF
  463. entry = d.entries[i]
  464. if ENTRIES.must_clear_key:
  465. entry.key = lltype.nullptr(ENTRY.key.TO)
  466. if ENTRIES.must_clear_value:
  467. entry.value = lltype.nullptr(ENTRY.value.TO)
  468. #
  469. # The rest is commented out: like CPython we no longer shrink the
  470. # dictionary here. It may shrink later if we try to append a number
  471. # of new items to it. Unsure if this behavior was designed in
  472. # CPython or is accidental. A design reason would be that if you
  473. # delete all items in a dictionary (e.g. with a series of
  474. # popitem()), then CPython avoids shrinking the table several times.
  475. #num_entries = len(d.entries)
  476. #if num_entries > DICT_INITSIZE and d.num_items <= num_entries / 4:
  477. # ll_dict_resize(d)
  478. # A previous xxx: move the size checking and resize into a single
  479. # call which is opaque to the JIT when the dict isn't virtual, to
  480. # avoid extra branches.
  481. def ll_dict_resize(d):
  482. # make a 'new_size' estimate and shrink it if there are many
  483. # deleted entry markers. See CPython for why it is a good idea to
  484. # quadruple the dictionary size as long as it's not too big.
  485. # (Quadrupling comes from '(d.num_items + d.num_items + 1) * 2'
  486. # as long as num_items is not too large.)
  487. num_extra = min(d.num_items + 1, 30000)
  488. _ll_dict_resize_to(d, num_extra)
  489. ll_dict_resize.oopspec = 'dict.resize(d)'
  490. def _ll_dict_resize_to(d, num_extra):
  491. new_estimate = (d.num_items + num_extra) * 2
  492. new_size = DICT_INITSIZE
  493. while new_size <= new_estimate:
  494. new_size *= 2
  495. old_entries = d.entries
  496. old_size = len(d.entries)
  497. d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
  498. d.num_items = 0
  499. d.resize_counter = new_size * 2
  500. i = 0
  501. while i < old_size:
  502. if old_entries.valid(i):
  503. hash = old_entries.hash(i)
  504. entry = old_entries[i]
  505. ll_dict_insertclean(d, entry.key, entry.value, hash)
  506. i += 1
  507. old_entries.delete()
  508. # ------- a port of CPython's dictobject.c's lookdict implementation -------
  509. PERTURB_SHIFT = 5
  510. @jit.look_inside_iff(lambda d, key, hash: jit.isvirtual(d) and jit.isconstant(key))
  511. @jit.oopspec('dict.lookup(d, key, hash)')
  512. def ll_dict_lookup(d, key, hash):
  513. entries = d.entries
  514. ENTRIES = lltype.typeOf(entries).TO
  515. direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
  516. mask = len(entries) - 1
  517. i = r_uint(hash & mask)
  518. # do the first try before any looping
  519. if entries.valid(i):
  520. checkingkey = entries[i].key
  521. if direct_compare and checkingkey == key:
  522. return i # found the entry
  523. if d.keyeq is not None and entries.hash(i) == hash:
  524. # correct hash, maybe the key is e.g. a different pointer to
  525. # an equal object
  526. found = d.keyeq(checkingkey, key)
  527. if d.paranoia:
  528. if (entries != d.entries or
  529. not entries.valid(i) or entries[i].key != checkingkey):
  530. # the compare did major nasty stuff to the dict: start over
  531. return ll_dict_lookup(d, key, hash)
  532. if found:
  533. return i # found the entry
  534. freeslot = -1
  535. elif entries.everused(i):
  536. freeslot = intmask(i)
  537. else:
  538. return i | HIGHEST_BIT # pristine entry -- lookup failed
  539. # In the loop, a deleted entry (everused and not valid) is by far
  540. # (factor of 100s) the least likely outcome, so test for that last.
  541. perturb = r_uint(hash)
  542. while 1:
  543. # compute the next index using unsigned arithmetic
  544. i = (i << 2) + i + perturb + 1
  545. i = i & mask
  546. # keep 'i' as a signed number here, to consistently pass signed
  547. # arguments to the small helper methods.
  548. if not entries.everused(i):
  549. if freeslot == -1:
  550. freeslot = intmask(i)
  551. return r_uint(freeslot) | HIGHEST_BIT
  552. elif entries.valid(i):
  553. checkingkey = entries[i].key
  554. if direct_compare and checkingkey == key:
  555. return i
  556. if d.keyeq is not None and entries.hash(i) == hash:
  557. # correct hash, maybe the key is e.g. a different pointer to
  558. # an equal object
  559. found = d.keyeq(checkingkey, key)
  560. if d.paranoia:
  561. if (entries != d.entries or
  562. not entries.valid(i) or entries[i].key != checkingkey):
  563. # the compare did major nasty stuff to the dict:
  564. # start over
  565. return ll_dict_lookup(d, key, hash)
  566. if found:
  567. return i # found the entry
  568. elif freeslot == -1:
  569. freeslot = intmask(i)
  570. perturb >>= PERTURB_SHIFT
  571. def ll_dict_lookup_clean(d, hash):
  572. # a simplified version of ll_dict_lookup() which assumes that the
  573. # key is new, and the dictionary doesn't contain deleted entries.
  574. # It only finds the next free slot for the given hash.
  575. entries = d.entries
  576. mask = len(entries) - 1
  577. i = r_uint(hash & mask)
  578. perturb = r_uint(hash)
  579. while entries.everused(i):
  580. i = (i << 2) + i + perturb + 1
  581. i = i & mask
  582. perturb >>= PERTURB_SHIFT
  583. return i
  584. # ____________________________________________________________
  585. #
  586. # Irregular operations.
  587. DICT_INITSIZE = 8
  588. def ll_newdict(DICT):
  589. d = DICT.allocate()
  590. d.entries = DICT.entries.TO.allocate(DICT_INITSIZE)
  591. d.num_items = 0
  592. d.resize_counter = DICT_INITSIZE * 2
  593. return d
  594. DictRepr.ll_newdict = staticmethod(ll_newdict)
  595. def ll_newdict_size(DICT, length_estimate):
  596. length_estimate = (length_estimate // 2) * 3
  597. n = DICT_INITSIZE
  598. while n < length_estimate:
  599. n *= 2
  600. d = DICT.allocate()
  601. d.entries = DICT.entries.TO.allocate(n)
  602. d.num_items = 0
  603. d.resize_counter = n * 2
  604. return d
  605. # rpython.memory.lldict uses a dict based on Struct and Array
  606. # instead of GcStruct and GcArray, which is done by using different
  607. # 'allocate' and 'delete' adtmethod implementations than the ones below
  608. def _ll_malloc_dict(DICT):
  609. return lltype.malloc(DICT)
  610. def _ll_malloc_entries(ENTRIES, n):
  611. return lltype.malloc(ENTRIES, n, zero=True)
  612. def _ll_free_entries(entries):
  613. pass
  614. # ____________________________________________________________
  615. #
  616. # Iteration.
  617. class DictIteratorRepr(AbstractDictIteratorRepr):
  618. def __init__(self, r_dict, variant="keys"):
  619. self.r_dict = r_dict
  620. self.variant = variant
  621. self.lowleveltype = lltype.Ptr(lltype.GcStruct('dictiter',
  622. ('dict', r_dict.lowleveltype),
  623. ('index', lltype.Signed)))
  624. self.ll_dictiter = ll_dictiter
  625. self._ll_dictnext = _ll_dictnext
  626. def ll_dictiter(ITERPTR, d):
  627. iter = lltype.malloc(ITERPTR.TO)
  628. iter.dict = d
  629. iter.index = 0
  630. return iter
  631. @jit.look_inside_iff(lambda iter: jit.isvirtual(iter)
  632. and (iter.dict is None or
  633. jit.isvirtual(iter.dict)))
  634. @jit.oopspec("dictiter.next(iter)")
  635. def _ll_dictnext(iter):
  636. dict = iter.dict
  637. if dict:
  638. entries = dict.entries
  639. index = iter.index
  640. assert index >= 0
  641. entries_len = len(entries)
  642. while index < entries_len:
  643. nextindex = index + 1
  644. if entries.valid(index):
  645. iter.index = nextindex
  646. return index
  647. index = nextindex
  648. # clear the reference to the dict and prevent restarts
  649. iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
  650. raise StopIteration
  651. # _____________________________________________________________
  652. # methods
  653. def ll_get(dict, key, default):
  654. i = ll_dict_lookup(dict, key, dict.keyhash(key))
  655. if not i & HIGHEST_BIT:
  656. return ll_get_value(dict, i)
  657. else:
  658. return default
  659. def ll_setdefault(dict, key, default):
  660. hash = dict.keyhash(key)
  661. i = ll_dict_lookup(dict, key, hash)
  662. if not i & HIGHEST_BIT:
  663. return ll_get_value(dict, i)
  664. else:
  665. _ll_dict_setitem_lookup_done(dict, key, default, hash, i)
  666. return default
  667. def ll_copy(dict):
  668. DICT = lltype.typeOf(dict).TO
  669. dictsize = len(dict.entries)
  670. d = DICT.allocate()
  671. d.entries = DICT.entries.TO.allocate(dictsize)
  672. d.num_items = dict.num_items
  673. d.resize_counter = dict.resize_counter
  674. if hasattr(DICT, 'fnkeyeq'): d.fnkeyeq = dict.fnkeyeq
  675. if hasattr(DICT, 'fnkeyhash'): d.fnkeyhash = dict.fnkeyhash
  676. i = 0
  677. while i < dictsize:
  678. d_entry = d.entries[i]
  679. entry = dict.entries[i]
  680. ENTRY = lltype.typeOf(d.entries).TO.OF
  681. d_entry.key = entry.key
  682. if hasattr(ENTRY, 'f_valid'): d_entry.f_valid = entry.f_valid
  683. if hasattr(ENTRY, 'f_everused'): d_entry.f_everused = entry.f_everused
  684. d_entry.value = entry.value
  685. if hasattr(ENTRY, 'f_hash'): d_entry.f_hash = entry.f_hash
  686. i += 1
  687. return d
  688. ll_copy.oopspec = 'dict.copy(dict)'
  689. def ll_clear(d):
  690. if (len(d.entries) == DICT_INITSIZE and
  691. d.resize_counter == DICT_INITSIZE * 2):
  692. return
  693. old_entries = d.entries
  694. d.entries = lltype.typeOf(old_entries).TO.allocate(DICT_INITSIZE)
  695. d.num_items = 0
  696. d.resize_counter = DICT_INITSIZE * 2
  697. old_entries.delete()
  698. ll_clear.oopspec = 'dict.clear(d)'
  699. def ll_update(dic1, dic2):
  700. if dic1 == dic2:
  701. return
  702. ll_prepare_dict_update(dic1, dic2.num_items)
  703. entries = dic2.entries
  704. d2len = len(entries)
  705. i = 0
  706. while i < d2len:
  707. if entries.valid(i):
  708. entry = entries[i]
  709. hash = entries.hash(i)
  710. key = entry.key
  711. value = entry.value
  712. j = ll_dict_lookup(dic1, key, hash)
  713. _ll_dict_setitem_lookup_done(dic1, key, value, hash, j)
  714. i += 1
  715. ll_update.oopspec = 'dict.update(dic1, dic2)'
  716. def ll_prepare_dict_update(d, num_extra):
  717. # Prescale 'd' for 'num_extra' items, assuming that most items don't
  718. # collide. If this assumption is false, 'd' becomes too large by at
  719. # most 'num_extra'. The logic is based on:
  720. # (d.resize_counter - 1) // 3 = room left in d
  721. # so, if num_extra == 1, we need d.resize_counter > 3
  722. # if num_extra == 2, we need d.resize_counter > 6 etc.
  723. # Note however a further hack: if num_extra <= d.num_items,
  724. # we avoid calling _ll_dict_resize_to here. This is to handle
  725. # the case where dict.update() actually has a lot of collisions.
  726. # If num_extra is much greater than d.num_items the conditional_call
  727. # will trigger anyway, which is really the goal.
  728. x = num_extra - d.num_items
  729. jit.conditional_call(d.resize_counter <= x * 3,
  730. _ll_dict_resize_to, d, num_extra)
  731. # this is an implementation of keys(), values() and items()
  732. # in a single function.
  733. # note that by specialization on func, three different
  734. # and very efficient functions are created.
  735. def recast(P, v):
  736. if isinstance(P, lltype.Ptr):
  737. return lltype.cast_pointer(P, v)
  738. else:
  739. return v
  740. def _make_ll_keys_values_items(kind):
  741. def ll_kvi(LIST, dic):
  742. res = LIST.ll_newlist(dic.num_items)
  743. entries = dic.entries
  744. dlen = len(entries)
  745. items = res.ll_items()
  746. i = 0
  747. p = 0
  748. while i < dlen:
  749. if entries.valid(i):
  750. ELEM = lltype.typeOf(items).TO.OF
  751. if ELEM is not lltype.Void:
  752. entry = entries[i]
  753. if kind == 'items':
  754. r = lltype.malloc(ELEM.TO)
  755. r.item0 = recast(ELEM.TO.item0, entry.key)
  756. r.item1 = recast(ELEM.TO.item1, entry.value)
  757. items[p] = r
  758. elif kind == 'keys':
  759. items[p] = recast(ELEM, entry.key)
  760. elif kind == 'values':
  761. items[p] = recast(ELEM, entry.value)
  762. p += 1
  763. i += 1
  764. assert p == res.ll_length()
  765. return res
  766. ll_kvi.oopspec = 'dict.%s(dic)' % kind
  767. return ll_kvi
  768. ll_dict_keys = _make_ll_keys_values_items('keys')
  769. ll_dict_values = _make_ll_keys_values_items('values')
  770. ll_dict_items = _make_ll_keys_values_items('items')
  771. def ll_contains(d, key):
  772. i = ll_dict_lookup(d, key, d.keyhash(key))
  773. return not i & HIGHEST_BIT
  774. POPITEMINDEX = lltype.Struct('PopItemIndex', ('nextindex', lltype.Signed))
  775. global_popitem_index = lltype.malloc(POPITEMINDEX, zero=True, immortal=True)
  776. def _ll_getnextitem(dic):
  777. entries = dic.entries
  778. ENTRY = lltype.typeOf(entries).TO.OF
  779. dmask = len(entries) - 1
  780. if hasattr(ENTRY, 'f_hash'):
  781. if entries.valid(0):
  782. return 0
  783. base = entries[0].f_hash
  784. else:
  785. base = global_popitem_index.nextindex
  786. counter = 0
  787. while counter <= dmask:
  788. i = (base + counter) & dmask
  789. counter += 1
  790. if entries.valid(i):
  791. break
  792. else:
  793. raise KeyError
  794. if hasattr(ENTRY, 'f_hash'):
  795. entries[0].f_hash = base + counter
  796. else:
  797. global_popitem_index.nextindex = base + counter
  798. return i
  799. def ll_popitem(ELEM, dic):
  800. i = _ll_getnextitem(dic)
  801. entry = dic.entries[i]
  802. r = lltype.malloc(ELEM.TO)
  803. r.item0 = recast(ELEM.TO.item0, entry.key)
  804. r.item1 = recast(ELEM.TO.item1, entry.value)
  805. _ll_dict_del(dic, r_uint(i))
  806. return r
  807. def ll_pop(dic, key):
  808. i = ll_dict_lookup(dic, key, dic.keyhash(key))
  809. if not i & HIGHEST_BIT:
  810. value = ll_get_value(dic, r_uint(i))
  811. _ll_dict_del(dic, r_uint(i))
  812. return value
  813. else:
  814. raise KeyError
  815. def ll_pop_default(dic, key, dfl):
  816. try:
  817. return ll_pop(dic, key)
  818. except KeyError:
  819. return dfl