PageRenderTime 1876ms CodeModel.GetById 183ms app.highlight 1046ms RepoModel.GetById 535ms app.codeStats 1ms

/Objects/dictobject.c

http://unladen-swallow.googlecode.com/
C | 2725 lines | 2071 code | 242 blank | 412 comment | 555 complexity | 815e6c417015d2f2f8ec89b710a95967 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1
   2/* Dictionary object implementation using a hash table */
   3
   4/* The distribution includes a separate file, Objects/dictnotes.txt,
   5   describing explorations into dictionary design and optimization.
   6   It covers typical dictionary use patterns, the parameters for
   7   tuning dictionaries, and several ideas for possible optimizations.
   8*/
   9
  10#include "Python.h"
  11
  12#include "Util/PySmallPtrSet.h"
  13
  14
  15/* Set a key error with the specified argument, wrapping it in a
  16 * tuple automatically so that tuple keys are not unpacked as the
  17 * exception arguments. */
  18static void
  19set_key_error(PyObject *arg)
  20{
  21	PyObject *tup;
  22	tup = PyTuple_Pack(1, arg);
  23	if (!tup)
  24		return; /* caller will expect error to be set anyway */
  25	PyErr_SetObject(PyExc_KeyError, tup);
  26	Py_DECREF(tup);
  27}
  28
  29/* Define this out if you don't want conversion statistics on exit. */
  30#undef SHOW_CONVERSION_COUNTS
  31
  32/* See large comment block below.  This must be >= 1. */
  33#define PERTURB_SHIFT 5
  34
  35/*
  36Major subtleties ahead:  Most hash schemes depend on having a "good" hash
  37function, in the sense of simulating randomness.  Python doesn't:  its most
  38important hash functions (for strings and ints) are very regular in common
  39cases:
  40
  41>>> map(hash, (0, 1, 2, 3))
  42[0, 1, 2, 3]
  43>>> map(hash, ("namea", "nameb", "namec", "named"))
  44[-1658398457, -1658398460, -1658398459, -1658398462]
  45>>>
  46
  47This isn't necessarily bad!  To the contrary, in a table of size 2**i, taking
  48the low-order i bits as the initial table index is extremely fast, and there
  49are no collisions at all for dicts indexed by a contiguous range of ints.
  50The same is approximately true when keys are "consecutive" strings.  So this
  51gives better-than-random behavior in common cases, and that's very desirable.
  52
  53OTOH, when collisions occur, the tendency to fill contiguous slices of the
  54hash table makes a good collision resolution strategy crucial.  Taking only
  55the last i bits of the hash code is also vulnerable:  for example, consider
  56[i << 16 for i in range(20000)] as a set of keys.  Since ints are their own
  57hash codes, and this fits in a dict of size 2**15, the last 15 bits of every
  58hash code are all 0:  they *all* map to the same table index.
  59
  60But catering to unusual cases should not slow the usual ones, so we just take
  61the last i bits anyway.  It's up to collision resolution to do the rest.  If
  62we *usually* find the key we're looking for on the first try (and, it turns
  63out, we usually do -- the table load factor is kept under 2/3, so the odds
  64are solidly in our favor), then it makes best sense to keep the initial index
  65computation dirt cheap.
  66
  67The first half of collision resolution is to visit table indices via this
  68recurrence:
  69
  70    j = ((5*j) + 1) mod 2**i
  71
  72For any initial j in range(2**i), repeating that 2**i times generates each
  73int in range(2**i) exactly once (see any text on random-number generation for
  74proof).  By itself, this doesn't help much:  like linear probing (setting
  75j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
  76order.  This would be bad, except that's not the only thing we do, and it's
  77actually *good* in the common cases where hash keys are consecutive.  In an
  78example that's really too small to make this entirely clear, for a table of
  79size 2**3 the order of indices is:
  80
  81    0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
  82
  83If two things come in at index 5, the first place we look after is index 2,
  84not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
  85Linear probing is deadly in this case because there the fixed probe order
  86is the *same* as the order consecutive keys are likely to arrive.  But it's
  87extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
  88and certain that consecutive hash codes do not.
  89
  90The other half of the strategy is to get the other bits of the hash code
  91into play.  This is done by initializing a (unsigned) vrbl "perturb" to the
  92full hash code, and changing the recurrence to:
  93
  94    j = (5*j) + 1 + perturb;
  95    perturb >>= PERTURB_SHIFT;
  96    use j % 2**i as the next table index;
  97
  98Now the probe sequence depends (eventually) on every bit in the hash code,
  99and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
 100because it quickly magnifies small differences in the bits that didn't affect
 101the initial index.  Note that because perturb is unsigned, if the recurrence
 102is executed often enough perturb eventually becomes and remains 0.  At that
 103point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
 104that's certain to find an empty slot eventually (since it generates every int
 105in range(2**i), and we make sure there's always at least one empty slot).
 106
 107Selecting a good value for PERTURB_SHIFT is a balancing act.  You want it
 108small so that the high bits of the hash code continue to affect the probe
 109sequence across iterations; but you want it large so that in really bad cases
 110the high-order hash bits have an effect on early iterations.  5 was "the
 111best" in minimizing total collisions across experiments Tim Peters ran (on
 112both normal and pathological cases), but 4 and 6 weren't significantly worse.
 113
 114Historical:  Reimer Behrends contributed the idea of using a polynomial-based
 115approach, using repeated multiplication by x in GF(2**n) where an irreducible
 116polynomial for each table size was chosen such that x was a primitive root.
 117Christian Tismer later extended that to use division by x instead, as an
 118efficient way to get the high bits of the hash code into play.  This scheme
 119also gave excellent collision statistics, but was more expensive:  two
 120if-tests were required inside the loop; computing "the next" index took about
 121the same number of operations but without as much potential parallelism
 122(e.g., computing 5*j can go on at the same time as computing 1+perturb in the
 123above, and then shifting perturb can be done while the table index is being
 124masked); and the PyDictObject struct required a member to hold the table's
 125polynomial.  In Tim's experiments the current scheme ran faster, produced
 126equally good collision statistics, needed less code & used less memory.
 127
 128Theoretical Python 2.5 headache:  hash codes are only C "long", but
 129sizeof(Py_ssize_t) > sizeof(long) may be possible.  In that case, and if a
 130dict is genuinely huge, then only the slots directly reachable via indexing
 131by a C long can be the first slot in a probe sequence.  The probe sequence
 132will still eventually reach every slot in the table, but the collision rate
 133on initial probes may be much higher than this scheme was designed for.
 134Getting a hash code as fat as Py_ssize_t is the only real cure.  But in
 135practice, this probably won't make a lick of difference for many years (at
 136which point everyone will have terabytes of RAM on 64-bit boxes).
 137*/
 138
 139/* Object used as dummy key to fill deleted entries */
 140static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
 141
 142#ifdef Py_REF_DEBUG
 143PyObject *
 144_PyDict_Dummy(void)
 145{
 146	return dummy;
 147}
 148#endif
 149
 150/* forward declarations */
 151static PyDictEntry *lookdict_string(PyDictObject *mp, PyObject *key, long hash);
 152static void notify_watchers(PyDictObject *self);
 153static void del_watchers_array(PyDictObject *self);
 154
 155#ifdef SHOW_CONVERSION_COUNTS
 156static long created = 0L;
 157static long converted = 0L;
 158
 159static void
 160show_counts(void)
 161{
 162	fprintf(stderr, "created %ld string dicts\n", created);
 163	fprintf(stderr, "converted %ld to normal dicts\n", converted);
 164	fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
 165}
 166#endif
 167
 168/* Debug statistic to compare allocations with reuse through the free list */
 169#undef SHOW_ALLOC_COUNT
 170#ifdef SHOW_ALLOC_COUNT
 171static size_t count_alloc = 0;
 172static size_t count_reuse = 0;
 173
 174static void
 175show_alloc(void)
 176{
 177	fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
 178		count_alloc);
 179	fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
 180		"d\n", count_reuse);
 181	fprintf(stderr, "%.2f%% reuse rate\n\n",
 182		(100.0*count_reuse/(count_alloc+count_reuse)));
 183}
 184#endif
 185
 186/* Initialization macros.
 187   There are two ways to create a dict:  PyDict_New() is the main C API
 188   function, and the tp_new slot maps to dict_new().  In the latter case we
 189   can save a little time over what PyDict_New does because it's guaranteed
 190   that the PyDictObject struct is already zeroed out.
 191   Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
 192   an excellent reason not to).
 193*/
 194
 195#define INIT_NONZERO_DICT_SLOTS(mp) do {				\
 196	(mp)->ma_table = (mp)->ma_smalltable;				\
 197	(mp)->ma_mask = PyDict_MINSIZE - 1;				\
 198    } while(0)
 199
 200#define EMPTY_TO_MINSIZE(mp) do {					\
 201	memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));	\
 202	(mp)->ma_used = (mp)->ma_fill = 0;				\
 203	INIT_NONZERO_DICT_SLOTS(mp);					\
 204    } while(0)
 205
 206/* Dictionary reuse scheme to save calls to malloc, free, and memset */
 207#ifndef PyDict_MAXFREELIST
 208#define PyDict_MAXFREELIST 80
 209#endif
 210static PyDictObject *free_list[PyDict_MAXFREELIST];
 211static int numfree = 0;
 212
 213void
 214PyDict_Fini(void)
 215{
 216	PyDictObject *op;
 217
 218	while (numfree) {
 219		op = free_list[--numfree];
 220		assert(PyDict_CheckExact(op));
 221		PyObject_GC_Del(op);
 222	}
 223}
 224
 225PyObject *
 226PyDict_New(void)
 227{
 228	register PyDictObject *mp;
 229	if (dummy == NULL) { /* Auto-initialize dummy */
 230		dummy = PyString_FromString("<dummy key>");
 231		if (dummy == NULL)
 232			return NULL;
 233#ifdef SHOW_CONVERSION_COUNTS
 234		Py_AtExit(show_counts);
 235#endif
 236#ifdef SHOW_ALLOC_COUNT
 237		Py_AtExit(show_alloc);
 238#endif
 239	}
 240	if (numfree) {
 241		mp = free_list[--numfree];
 242		assert (mp != NULL);
 243		assert (Py_TYPE(mp) == &PyDict_Type);
 244		_Py_NewReference((PyObject *)mp);
 245		if (mp->ma_fill) {
 246			EMPTY_TO_MINSIZE(mp);
 247		} else {
 248			/* At least set ma_table and ma_mask; these are wrong
 249			   if an empty but presized dict is added to freelist */
 250			INIT_NONZERO_DICT_SLOTS(mp);
 251		}
 252		assert (mp->ma_used == 0);
 253		assert (mp->ma_table == mp->ma_smalltable);
 254		assert (mp->ma_mask == PyDict_MINSIZE - 1);
 255#ifdef SHOW_ALLOC_COUNT
 256		count_reuse++;
 257#endif
 258	} else {
 259		mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
 260		if (mp == NULL)
 261			return NULL;
 262		EMPTY_TO_MINSIZE(mp);
 263#ifdef SHOW_ALLOC_COUNT
 264		count_alloc++;
 265#endif
 266	}
 267	mp->ma_lookup = lookdict_string;
 268#ifdef WITH_LLVM
 269	mp->ma_watchers = NULL;
 270#endif
 271#ifdef SHOW_CONVERSION_COUNTS
 272	++created;
 273#endif
 274	_PyObject_GC_TRACK(mp);
 275	return (PyObject *)mp;
 276}
 277
 278/*
 279The basic lookup function used by all operations.
 280This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
 281Open addressing is preferred over chaining since the link overhead for
 282chaining would be substantial (100% with typical malloc overhead).
 283
 284The initial probe index is computed as hash mod the table size. Subsequent
 285probe indices are computed as explained earlier.
 286
 287All arithmetic on hash should ignore overflow.
 288
 289(The details in this version are due to Tim Peters, building on many past
 290contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
 291Christian Tismer).
 292
 293lookdict() is general-purpose, and may return NULL if (and only if) a
 294comparison raises an exception (this was new in Python 2.5).
 295lookdict_string() below is specialized to string keys, comparison of which can
 296never raise an exception; that function can never return NULL.  For both, when
 297the key isn't found a PyDictEntry* is returned for which the me_value field is
 298NULL; this is the slot in the dict at which the key would have been found, and
 299the caller can (if it wishes) add the <key, value> pair to the returned
 300PyDictEntry*.
 301*/
 302static PyDictEntry *
 303lookdict(PyDictObject *mp, PyObject *key, register long hash)
 304{
 305	register size_t i;
 306	register size_t perturb;
 307	register PyDictEntry *freeslot;
 308	register size_t mask = (size_t)mp->ma_mask;
 309	PyDictEntry *ep0 = mp->ma_table;
 310	register PyDictEntry *ep;
 311	register int cmp;
 312	PyObject *startkey;
 313
 314	i = (size_t)hash & mask;
 315	ep = &ep0[i];
 316	if (ep->me_key == NULL || ep->me_key == key)
 317		return ep;
 318
 319	if (ep->me_key == dummy)
 320		freeslot = ep;
 321	else {
 322		if (ep->me_hash == hash) {
 323			startkey = ep->me_key;
 324			Py_INCREF(startkey);
 325			cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 326			Py_DECREF(startkey);
 327			if (cmp < 0)
 328				return NULL;
 329			if (ep0 == mp->ma_table && ep->me_key == startkey) {
 330				if (cmp > 0)
 331					return ep;
 332			}
 333			else {
 334				/* The compare did major nasty stuff to the
 335				 * dict:  start over.
 336				 * XXX A clever adversary could prevent this
 337				 * XXX from terminating.
 338 				 */
 339 				return lookdict(mp, key, hash);
 340 			}
 341		}
 342		freeslot = NULL;
 343	}
 344
 345	/* In the loop, me_key == dummy is by far (factor of 100s) the
 346	   least likely outcome, so test for that last. */
 347	for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
 348		i = (i << 2) + i + perturb + 1;
 349		ep = &ep0[i & mask];
 350		if (ep->me_key == NULL)
 351			return freeslot == NULL ? ep : freeslot;
 352		if (ep->me_key == key)
 353			return ep;
 354		if (ep->me_hash == hash && ep->me_key != dummy) {
 355			startkey = ep->me_key;
 356			Py_INCREF(startkey);
 357			cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 358			Py_DECREF(startkey);
 359			if (cmp < 0)
 360				return NULL;
 361			if (ep0 == mp->ma_table && ep->me_key == startkey) {
 362				if (cmp > 0)
 363					return ep;
 364			}
 365			else {
 366				/* The compare did major nasty stuff to the
 367				 * dict:  start over.
 368				 * XXX A clever adversary could prevent this
 369				 * XXX from terminating.
 370 				 */
 371 				return lookdict(mp, key, hash);
 372 			}
 373		}
 374		else if (ep->me_key == dummy && freeslot == NULL)
 375			freeslot = ep;
 376	}
 377	assert(0);	/* NOT REACHED */
 378	return 0;
 379}
 380
 381/*
 382 * Hacked up version of lookdict which can assume keys are always strings;
 383 * this assumption allows testing for errors during PyObject_RichCompareBool()
 384 * to be dropped; string-string comparisons never raise exceptions.  This also
 385 * means we don't need to go through PyObject_RichCompareBool(); we can always
 386 * use _PyString_Eq() directly.
 387 *
 388 * This is valuable because dicts with only string keys are very common.
 389 */
 390static PyDictEntry *
 391lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
 392{
 393	register size_t i;
 394	register size_t perturb;
 395	register PyDictEntry *freeslot;
 396	register size_t mask = (size_t)mp->ma_mask;
 397	PyDictEntry *ep0 = mp->ma_table;
 398	register PyDictEntry *ep;
 399
 400	/* Make sure this function doesn't have to handle non-string keys,
 401	   including subclasses of str; e.g., one reason to subclass
 402	   strings is to override __eq__, and for speed we don't cater to
 403	   that here. */
 404	if (!PyString_CheckExact(key)) {
 405#ifdef SHOW_CONVERSION_COUNTS
 406		++converted;
 407#endif
 408		mp->ma_lookup = lookdict;
 409		return lookdict(mp, key, hash);
 410	}
 411	i = hash & mask;
 412	ep = &ep0[i];
 413	if (ep->me_key == NULL || ep->me_key == key)
 414		return ep;
 415	if (ep->me_key == dummy)
 416		freeslot = ep;
 417	else {
 418		if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))
 419			return ep;
 420		freeslot = NULL;
 421	}
 422
 423	/* In the loop, me_key == dummy is by far (factor of 100s) the
 424	   least likely outcome, so test for that last. */
 425	for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
 426		i = (i << 2) + i + perturb + 1;
 427		ep = &ep0[i & mask];
 428		if (ep->me_key == NULL)
 429			return freeslot == NULL ? ep : freeslot;
 430		if (ep->me_key == key
 431		    || (ep->me_hash == hash
 432		        && ep->me_key != dummy
 433			&& _PyString_Eq(ep->me_key, key)))
 434			return ep;
 435		if (ep->me_key == dummy && freeslot == NULL)
 436			freeslot = ep;
 437	}
 438	assert(0);	/* NOT REACHED */
 439	return 0;
 440}
 441
 442/*
 443Internal routine to insert a new item into the table.
 444Used both by the internal resize routine and by the public insert routine.
 445Eats a reference to key and one to value.
 446Returns -1 if an error occurred; return 0 on success; return 1 on success if
 447the insert didn't actually change the dict.
 448*/
 449static int
 450insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
 451{
 452	PyObject *old_value;
 453	register PyDictEntry *ep;
 454	typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, long);
 455
 456	assert(mp->ma_lookup != NULL);
 457	ep = mp->ma_lookup(mp, key, hash);
 458	if (ep == NULL) {
 459		Py_DECREF(key);
 460		Py_DECREF(value);
 461		return -1;
 462	}
 463	if (ep->me_value != NULL) {
 464		old_value = ep->me_value;
 465		ep->me_value = value;
 466		Py_DECREF(old_value); /* which **CAN** re-enter */
 467		Py_DECREF(key);
 468		return old_value == value;
 469	}
 470	else {
 471		if (ep->me_key == NULL)
 472			mp->ma_fill++;
 473		else {
 474			assert(ep->me_key == dummy);
 475			Py_DECREF(dummy);
 476		}
 477		ep->me_key = key;
 478		ep->me_hash = (Py_ssize_t)hash;
 479		ep->me_value = value;
 480		mp->ma_used++;
 481	}
 482	return 0;
 483}
 484
 485/*
 486Internal routine used by dictresize() to insert an item which is
 487known to be absent from the dict.  This routine also assumes that
 488the dict contains no deleted entries.  Besides the performance benefit,
 489using insertdict() in dictresize() is dangerous (SF bug #1456209).
 490Note that no refcounts are changed by this routine; if needed, the caller
 491is responsible for incref'ing `key` and `value`.
 492*/
 493static void
 494insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
 495		 PyObject *value)
 496{
 497	register size_t i;
 498	register size_t perturb;
 499	register size_t mask = (size_t)mp->ma_mask;
 500	PyDictEntry *ep0 = mp->ma_table;
 501	register PyDictEntry *ep;
 502
 503	i = hash & mask;
 504	ep = &ep0[i];
 505	for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
 506		i = (i << 2) + i + perturb + 1;
 507		ep = &ep0[i & mask];
 508	}
 509	assert(ep->me_value == NULL);
 510	mp->ma_fill++;
 511	ep->me_key = key;
 512	ep->me_hash = (Py_ssize_t)hash;
 513	ep->me_value = value;
 514	mp->ma_used++;
 515}
 516
 517/*
 518Restructure the table by allocating a new table and reinserting all
 519items again.  When entries have been deleted, the new table may
 520actually be smaller than the old one.
 521*/
 522static int
 523dictresize(PyDictObject *mp, Py_ssize_t minused)
 524{
 525	Py_ssize_t newsize;
 526	PyDictEntry *oldtable, *newtable, *ep;
 527	Py_ssize_t i;
 528	int is_oldtable_malloced;
 529	PyDictEntry small_copy[PyDict_MINSIZE];
 530
 531	assert(minused >= 0);
 532
 533	/* Find the smallest table size > minused. */
 534	for (newsize = PyDict_MINSIZE;
 535	     newsize <= minused && newsize > 0;
 536	     newsize <<= 1)
 537		;
 538	if (newsize <= 0) {
 539		PyErr_NoMemory();
 540		return -1;
 541	}
 542
 543	/* Get space for a new table. */
 544	oldtable = mp->ma_table;
 545	assert(oldtable != NULL);
 546	is_oldtable_malloced = oldtable != mp->ma_smalltable;
 547
 548	if (newsize == PyDict_MINSIZE) {
 549		/* A large table is shrinking, or we can't get any smaller. */
 550		newtable = mp->ma_smalltable;
 551		if (newtable == oldtable) {
 552			if (mp->ma_fill == mp->ma_used) {
 553				/* No dummies, so no point doing anything. */
 554				return 0;
 555			}
 556			/* We're not going to resize it, but rebuild the
 557			   table anyway to purge old dummy entries.
 558			   Subtle:  This is *necessary* if fill==size,
 559			   as lookdict needs at least one virgin slot to
 560			   terminate failing searches.  If fill < size, it's
 561			   merely desirable, as dummies slow searches. */
 562			assert(mp->ma_fill > mp->ma_used);
 563			memcpy(small_copy, oldtable, sizeof(small_copy));
 564			oldtable = small_copy;
 565		}
 566	}
 567	else {
 568		newtable = PyMem_NEW(PyDictEntry, newsize);
 569		if (newtable == NULL) {
 570			PyErr_NoMemory();
 571			return -1;
 572		}
 573	}
 574
 575	/* Make the dict empty, using the new table. */
 576	assert(newtable != oldtable);
 577	mp->ma_table = newtable;
 578	mp->ma_mask = newsize - 1;
 579	memset(newtable, 0, sizeof(PyDictEntry) * newsize);
 580	mp->ma_used = 0;
 581	i = mp->ma_fill;
 582	mp->ma_fill = 0;
 583
 584	/* Copy the data over; this is refcount-neutral for active entries;
 585	   dummy entries aren't copied over, of course */
 586	for (ep = oldtable; i > 0; ep++) {
 587		if (ep->me_value != NULL) {	/* active entry */
 588			--i;
 589			insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
 590					 ep->me_value);
 591		}
 592		else if (ep->me_key != NULL) {	/* dummy entry */
 593			--i;
 594			assert(ep->me_key == dummy);
 595			Py_DECREF(ep->me_key);
 596		}
 597		/* else key == value == NULL:  nothing to do */
 598	}
 599
 600	if (is_oldtable_malloced)
 601		PyMem_DEL(oldtable);
 602	return 0;
 603}
 604
 605/* Create a new dictionary pre-sized to hold an estimated number of elements.
 606   Underestimates are okay because the dictionary will resize as necessary.
 607   Overestimates just mean the dictionary will be more sparse than usual.
 608*/
 609
 610PyObject *
 611_PyDict_NewPresized(Py_ssize_t minused)
 612{
 613	PyObject *op = PyDict_New();
 614
 615	if (minused>5 && op != NULL && dictresize((PyDictObject *)op, minused) == -1) {
 616		Py_DECREF(op);
 617		return NULL;
 618	}
 619	return op;
 620}
 621
 622/* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
 623 * that may occur (originally dicts supported only string keys, and exceptions
 624 * weren't possible).  So, while the original intent was that a NULL return
 625 * meant the key wasn't present, in reality it can mean that, or that an error
 626 * (suppressed) occurred while computing the key's hash, or that some error
 627 * (suppressed) occurred when comparing keys in the dict's internal probe
 628 * sequence.  A nasty example of the latter is when a Python-coded comparison
 629 * function hits a stack-depth error, which can cause this to return NULL
 630 * even if the key is present.
 631 */
 632PyObject *
 633PyDict_GetItem(PyObject *op, PyObject *key)
 634{
 635	long hash;
 636	PyDictObject *mp = (PyDictObject *)op;
 637	PyDictEntry *ep;
 638	PyThreadState *tstate;
 639	if (!PyDict_Check(op))
 640		return NULL;
 641	if (!PyString_CheckExact(key) ||
 642	    (hash = ((PyStringObject *) key)->ob_shash) == -1)
 643	{
 644		hash = PyObject_Hash(key);
 645		if (hash == -1) {
 646			PyErr_Clear();
 647			return NULL;
 648		}
 649	}
 650
 651	/* We can arrive here with a NULL tstate during initialization:
 652	   try running "python -Wi" for an example related to string
 653	   interning.  Let's just hope that no exception occurs then... */
 654	tstate = _PyThreadState_Current;
 655	if (tstate != NULL && tstate->curexc_type != NULL) {
 656		/* preserve the existing exception */
 657		PyObject *err_type, *err_value, *err_tb;
 658		PyErr_Fetch(&err_type, &err_value, &err_tb);
 659		ep = (mp->ma_lookup)(mp, key, hash);
 660		/* ignore errors */
 661		PyErr_Restore(err_type, err_value, err_tb);
 662		if (ep == NULL)
 663			return NULL;
 664	}
 665	else {
 666		ep = (mp->ma_lookup)(mp, key, hash);
 667		if (ep == NULL) {
 668			PyErr_Clear();
 669			return NULL;
 670		}
 671	}
 672	return ep->me_value;
 673}
 674
 675/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
 676 * dictionary if it's merely replacing the value for an existing key.
 677 * This means that it's safe to loop over a dictionary with PyDict_Next()
 678 * and occasionally replace a value -- but you can't insert new keys or
 679 * remove them.
 680 */
 681int
 682PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
 683{
 684	register PyDictObject *mp;
 685	register long hash;
 686	register Py_ssize_t n_used;
 687	int status;
 688
 689	if (!PyDict_Check(op)) {
 690		PyErr_BadInternalCall();
 691		return -1;
 692	}
 693	assert(key);
 694	assert(value);
 695	mp = (PyDictObject *)op;
 696	if (PyString_CheckExact(key)) {
 697		hash = ((PyStringObject *)key)->ob_shash;
 698		if (hash == -1)
 699			hash = PyObject_Hash(key);
 700	}
 701	else {
 702		hash = PyObject_Hash(key);
 703		if (hash == -1)
 704			return -1;
 705	}
 706	assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
 707	n_used = mp->ma_used;
 708	Py_INCREF(value);
 709	Py_INCREF(key);
 710	status = insertdict(mp, key, hash, value);
 711	if (status < 0)
 712		return -1;
 713	else if (status == 0)
 714		notify_watchers(mp);
 715	/* If we added a key, we can safely resize.  Otherwise just return!
 716	 * If fill >= 2/3 size, adjust size.  Normally, this doubles or
 717	 * quaduples the size, but it's also possible for the dict to shrink
 718	 * (if ma_fill is much larger than ma_used, meaning a lot of dict
 719	 * keys have been * deleted).
 720	 *
 721	 * Quadrupling the size improves average dictionary sparseness
 722	 * (reducing collisions) at the cost of some memory and iteration
 723	 * speed (which loops over every possible entry).  It also halves
 724	 * the number of expensive resize operations in a growing dictionary.
 725	 *
 726	 * Very large dictionaries (over 50K items) use doubling instead.
 727	 * This may help applications with severe memory constraints.
 728	 */
 729	if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
 730		return 0;
 731	return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
 732}
 733
 734int
 735PyDict_DelItem(PyObject *op, PyObject *key)
 736{
 737	register PyDictObject *mp;
 738	register long hash;
 739	register PyDictEntry *ep;
 740	PyObject *old_value, *old_key;
 741
 742	if (!PyDict_Check(op)) {
 743		PyErr_BadInternalCall();
 744		return -1;
 745	}
 746	assert(key);
 747	if (!PyString_CheckExact(key) ||
 748	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
 749		hash = PyObject_Hash(key);
 750		if (hash == -1)
 751			return -1;
 752	}
 753	mp = (PyDictObject *)op;
 754	ep = (mp->ma_lookup)(mp, key, hash);
 755	if (ep == NULL)
 756		return -1;
 757	if (ep->me_value == NULL) {
 758		set_key_error(key);
 759		return -1;
 760	}
 761	old_key = ep->me_key;
 762	Py_INCREF(dummy);
 763	ep->me_key = dummy;
 764	old_value = ep->me_value;
 765	ep->me_value = NULL;
 766	mp->ma_used--;
 767	Py_DECREF(old_value);
 768	Py_DECREF(old_key);
 769	notify_watchers(mp);
 770	return 0;
 771}
 772
 773void
 774PyDict_Clear(PyObject *op)
 775{
 776	PyDictObject *mp;
 777	PyDictEntry *ep, *table;
 778	int table_is_malloced;
 779	Py_ssize_t fill;
 780	PyDictEntry small_copy[PyDict_MINSIZE];
 781#ifdef Py_DEBUG
 782	Py_ssize_t i, n;
 783#endif
 784
 785	if (!PyDict_Check(op))
 786		return;
 787	mp = (PyDictObject *)op;
 788#ifdef Py_DEBUG
 789	n = mp->ma_mask + 1;
 790	i = 0;
 791#endif
 792
 793	/* Clear the list of watching code objects. */
 794	notify_watchers(mp);
 795	del_watchers_array(mp);
 796
 797	table = mp->ma_table;
 798	assert(table != NULL);
 799	table_is_malloced = table != mp->ma_smalltable;
 800
 801	/* This is delicate.  During the process of clearing the dict,
 802	 * decrefs can cause the dict to mutate.  To avoid fatal confusion
 803	 * (voice of experience), we have to make the dict empty before
 804	 * clearing the slots, and never refer to anything via mp->xxx while
 805	 * clearing.
 806	 */
 807	fill = mp->ma_fill;
 808	if (table_is_malloced)
 809		EMPTY_TO_MINSIZE(mp);
 810
 811	else if (fill > 0) {
 812		/* It's a small table with something that needs to be cleared.
 813		 * Afraid the only safe way is to copy the dict entries into
 814		 * another small table first.
 815		 */
 816		memcpy(small_copy, table, sizeof(small_copy));
 817		table = small_copy;
 818		EMPTY_TO_MINSIZE(mp);
 819	}
 820	/* else it's a small table that's already empty */
 821
 822	/* Now we can finally clear things.  If C had refcounts, we could
 823	 * assert that the refcount on table is 1 now, i.e. that this function
 824	 * has unique access to it, so decref side-effects can't alter it.
 825	 */
 826	for (ep = table; fill > 0; ++ep) {
 827#ifdef Py_DEBUG
 828		assert(i < n);
 829		++i;
 830#endif
 831		if (ep->me_key) {
 832			--fill;
 833			Py_DECREF(ep->me_key);
 834			Py_XDECREF(ep->me_value);
 835		}
 836#ifdef Py_DEBUG
 837		else
 838			assert(ep->me_value == NULL);
 839#endif
 840	}
 841
 842	if (table_is_malloced)
 843		PyMem_DEL(table);
 844}
 845
 846/*
 847 * Iterate over a dict.  Use like so:
 848 *
 849 *     Py_ssize_t i;
 850 *     PyObject *key, *value;
 851 *     i = 0;   # important!  i should not otherwise be changed by you
 852 *     while (PyDict_Next(yourdict, &i, &key, &value)) {
 853 *              Refer to borrowed references in key and value.
 854 *     }
 855 *
 856 * CAUTION:  In general, it isn't safe to use PyDict_Next in a loop that
 857 * mutates the dict.  One exception:  it is safe if the loop merely changes
 858 * the values associated with the keys (but doesn't insert new keys or
 859 * delete keys), via PyDict_SetItem().
 860 */
 861int
 862PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 863{
 864	register Py_ssize_t i;
 865	register Py_ssize_t mask;
 866	register PyDictEntry *ep;
 867
 868	if (!PyDict_Check(op))
 869		return 0;
 870	i = *ppos;
 871	if (i < 0)
 872		return 0;
 873	ep = ((PyDictObject *)op)->ma_table;
 874	mask = ((PyDictObject *)op)->ma_mask;
 875	while (i <= mask && ep[i].me_value == NULL)
 876		i++;
 877	*ppos = i+1;
 878	if (i > mask)
 879		return 0;
 880	if (pkey)
 881		*pkey = ep[i].me_key;
 882	if (pvalue)
 883		*pvalue = ep[i].me_value;
 884	return 1;
 885}
 886
 887/* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
 888int
 889_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
 890{
 891	register Py_ssize_t i;
 892	register Py_ssize_t mask;
 893	register PyDictEntry *ep;
 894
 895	if (!PyDict_Check(op))
 896		return 0;
 897	i = *ppos;
 898	if (i < 0)
 899		return 0;
 900	ep = ((PyDictObject *)op)->ma_table;
 901	mask = ((PyDictObject *)op)->ma_mask;
 902	while (i <= mask && ep[i].me_value == NULL)
 903		i++;
 904	*ppos = i+1;
 905	if (i > mask)
 906		return 0;
 907        *phash = (long)(ep[i].me_hash);
 908	if (pkey)
 909		*pkey = ep[i].me_key;
 910	if (pvalue)
 911		*pvalue = ep[i].me_value;
 912	return 1;
 913}
 914
 915/* Methods */
 916
 917static void
 918dict_dealloc(register PyDictObject *mp)
 919{
 920	register PyDictEntry *ep;
 921	Py_ssize_t fill = mp->ma_fill;
 922
 923	/* De-optimize any optimized code objects. */
 924	notify_watchers(mp);
 925	del_watchers_array(mp);
 926
 927 	PyObject_GC_UnTrack(mp);
 928	Py_TRASHCAN_SAFE_BEGIN(mp)
 929	for (ep = mp->ma_table; fill > 0; ep++) {
 930		if (ep->me_key) {
 931			--fill;
 932			Py_DECREF(ep->me_key);
 933			Py_XDECREF(ep->me_value);
 934		}
 935	}
 936	if (mp->ma_table != mp->ma_smalltable)
 937		PyMem_DEL(mp->ma_table);
 938	if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
 939		free_list[numfree++] = mp;
 940	else
 941		Py_TYPE(mp)->tp_free((PyObject *)mp);
 942	Py_TRASHCAN_SAFE_END(mp)
 943}
 944
 945static int
 946dict_print(register PyDictObject *mp, register FILE *fp, register int flags)
 947{
 948	register Py_ssize_t i;
 949	register Py_ssize_t any;
 950	int status;
 951
 952	status = Py_ReprEnter((PyObject*)mp);
 953	if (status != 0) {
 954		if (status < 0)
 955			return status;
 956		Py_BEGIN_ALLOW_THREADS
 957		fprintf(fp, "{...}");
 958		Py_END_ALLOW_THREADS
 959		return 0;
 960	}
 961
 962	Py_BEGIN_ALLOW_THREADS
 963	fprintf(fp, "{");
 964	Py_END_ALLOW_THREADS
 965	any = 0;
 966	for (i = 0; i <= mp->ma_mask; i++) {
 967		PyDictEntry *ep = mp->ma_table + i;
 968		PyObject *pvalue = ep->me_value;
 969		if (pvalue != NULL) {
 970			/* Prevent PyObject_Repr from deleting value during
 971			   key format */
 972			Py_INCREF(pvalue);
 973			if (any++ > 0) {
 974				Py_BEGIN_ALLOW_THREADS
 975				fprintf(fp, ", ");
 976				Py_END_ALLOW_THREADS
 977			}
 978			if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
 979				Py_DECREF(pvalue);
 980				Py_ReprLeave((PyObject*)mp);
 981				return -1;
 982			}
 983			Py_BEGIN_ALLOW_THREADS
 984			fprintf(fp, ": ");
 985			Py_END_ALLOW_THREADS
 986			if (PyObject_Print(pvalue, fp, 0) != 0) {
 987				Py_DECREF(pvalue);
 988				Py_ReprLeave((PyObject*)mp);
 989				return -1;
 990			}
 991			Py_DECREF(pvalue);
 992		}
 993	}
 994	Py_BEGIN_ALLOW_THREADS
 995	fprintf(fp, "}");
 996	Py_END_ALLOW_THREADS
 997	Py_ReprLeave((PyObject*)mp);
 998	return 0;
 999}
1000
1001static PyObject *
1002dict_repr(PyDictObject *mp)
1003{
1004	Py_ssize_t i;
1005	PyObject *s, *temp, *colon = NULL;
1006	PyObject *pieces = NULL, *result = NULL;
1007	PyObject *key, *value;
1008
1009	i = Py_ReprEnter((PyObject *)mp);
1010	if (i != 0) {
1011		return i > 0 ? PyString_FromString("{...}") : NULL;
1012	}
1013
1014	if (mp->ma_used == 0) {
1015		result = PyString_FromString("{}");
1016		goto Done;
1017	}
1018
1019	pieces = PyList_New(0);
1020	if (pieces == NULL)
1021		goto Done;
1022
1023	colon = PyString_FromString(": ");
1024	if (colon == NULL)
1025		goto Done;
1026
1027	/* Do repr() on each key+value pair, and insert ": " between them.
1028	   Note that repr may mutate the dict. */
1029	i = 0;
1030	while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
1031		int status;
1032		/* Prevent repr from deleting value during key format. */
1033		Py_INCREF(value);
1034		s = PyObject_Repr(key);
1035		PyString_Concat(&s, colon);
1036		PyString_ConcatAndDel(&s, PyObject_Repr(value));
1037		Py_DECREF(value);
1038		if (s == NULL)
1039			goto Done;
1040		status = PyList_Append(pieces, s);
1041		Py_DECREF(s);  /* append created a new ref */
1042		if (status < 0)
1043			goto Done;
1044	}
1045
1046	/* Add "{}" decorations to the first and last items. */
1047	assert(PyList_GET_SIZE(pieces) > 0);
1048	s = PyString_FromString("{");
1049	if (s == NULL)
1050		goto Done;
1051	temp = PyList_GET_ITEM(pieces, 0);
1052	PyString_ConcatAndDel(&s, temp);
1053	PyList_SET_ITEM(pieces, 0, s);
1054	if (s == NULL)
1055		goto Done;
1056
1057	s = PyString_FromString("}");
1058	if (s == NULL)
1059		goto Done;
1060	temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
1061	PyString_ConcatAndDel(&temp, s);
1062	PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
1063	if (temp == NULL)
1064		goto Done;
1065
1066	/* Paste them all together with ", " between. */
1067	s = PyString_FromString(", ");
1068	if (s == NULL)
1069		goto Done;
1070	result = _PyString_Join(s, pieces);
1071	Py_DECREF(s);
1072
1073Done:
1074	Py_XDECREF(pieces);
1075	Py_XDECREF(colon);
1076	Py_ReprLeave((PyObject *)mp);
1077	return result;
1078}
1079
1080static Py_ssize_t
1081dict_length(PyDictObject *mp)
1082{
1083	return mp->ma_used;
1084}
1085
1086static PyObject *
1087dict_subscript(PyDictObject *mp, register PyObject *key)
1088{
1089	PyObject *v;
1090	long hash;
1091	PyDictEntry *ep;
1092	assert(mp->ma_table != NULL);
1093	if (!PyString_CheckExact(key) ||
1094	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1095		hash = PyObject_Hash(key);
1096		if (hash == -1)
1097			return NULL;
1098	}
1099	ep = (mp->ma_lookup)(mp, key, hash);
1100	if (ep == NULL)
1101		return NULL;
1102	v = ep->me_value;
1103	if (v == NULL) {
1104		if (!PyDict_CheckExact(mp)) {
1105			/* Look up __missing__ method if we're a subclass. */
1106		    	PyObject *missing;
1107			static PyObject *missing_str = NULL;
1108			if (missing_str == NULL)
1109				missing_str =
1110				  PyString_InternFromString("__missing__");
1111			missing = _PyType_Lookup(Py_TYPE(mp), missing_str);
1112			if (missing != NULL)
1113				return PyObject_CallFunctionObjArgs(missing,
1114					(PyObject *)mp, key, NULL);
1115		}
1116		set_key_error(key);
1117		return NULL;
1118	}
1119	else
1120		Py_INCREF(v);
1121	return v;
1122}
1123
1124static int
1125dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
1126{
1127	if (w == NULL)
1128		return PyDict_DelItem((PyObject *)mp, v);
1129	else
1130		return PyDict_SetItem((PyObject *)mp, v, w);
1131}
1132
1133static PyMappingMethods dict_as_mapping = {
1134	(lenfunc)dict_length, /*mp_length*/
1135	(binaryfunc)dict_subscript, /*mp_subscript*/
1136	(objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
1137};
1138
1139static PyObject *
1140dict_keys(register PyDictObject *mp)
1141{
1142	register PyObject *v;
1143	register Py_ssize_t i, j;
1144	PyDictEntry *ep;
1145	Py_ssize_t mask, n;
1146
1147  again:
1148	n = mp->ma_used;
1149	v = PyList_New(n);
1150	if (v == NULL)
1151		return NULL;
1152	if (n != mp->ma_used) {
1153		/* Durnit.  The allocations caused the dict to resize.
1154		 * Just start over, this shouldn't normally happen.
1155		 */
1156		Py_DECREF(v);
1157		goto again;
1158	}
1159	ep = mp->ma_table;
1160	mask = mp->ma_mask;
1161	for (i = 0, j = 0; i <= mask; i++) {
1162		if (ep[i].me_value != NULL) {
1163			PyObject *key = ep[i].me_key;
1164			Py_INCREF(key);
1165			PyList_SET_ITEM(v, j, key);
1166			j++;
1167		}
1168	}
1169	assert(j == n);
1170	return v;
1171}
1172
1173static PyObject *
1174dict_values(register PyDictObject *mp)
1175{
1176	register PyObject *v;
1177	register Py_ssize_t i, j;
1178	PyDictEntry *ep;
1179	Py_ssize_t mask, n;
1180
1181  again:
1182	n = mp->ma_used;
1183	v = PyList_New(n);
1184	if (v == NULL)
1185		return NULL;
1186	if (n != mp->ma_used) {
1187		/* Durnit.  The allocations caused the dict to resize.
1188		 * Just start over, this shouldn't normally happen.
1189		 */
1190		Py_DECREF(v);
1191		goto again;
1192	}
1193	ep = mp->ma_table;
1194	mask = mp->ma_mask;
1195	for (i = 0, j = 0; i <= mask; i++) {
1196		if (ep[i].me_value != NULL) {
1197			PyObject *value = ep[i].me_value;
1198			Py_INCREF(value);
1199			PyList_SET_ITEM(v, j, value);
1200			j++;
1201		}
1202	}
1203	assert(j == n);
1204	return v;
1205}
1206
1207static PyObject *
1208dict_items(register PyDictObject *mp)
1209{
1210	register PyObject *v;
1211	register Py_ssize_t i, j, n;
1212	Py_ssize_t mask;
1213	PyObject *item, *key, *value;
1214	PyDictEntry *ep;
1215
1216	/* Preallocate the list of tuples, to avoid allocations during
1217	 * the loop over the items, which could trigger GC, which
1218	 * could resize the dict. :-(
1219	 */
1220  again:
1221	n = mp->ma_used;
1222	v = PyList_New(n);
1223	if (v == NULL)
1224		return NULL;
1225	for (i = 0; i < n; i++) {
1226		item = PyTuple_New(2);
1227		if (item == NULL) {
1228			Py_DECREF(v);
1229			return NULL;
1230		}
1231		PyList_SET_ITEM(v, i, item);
1232	}
1233	if (n != mp->ma_used) {
1234		/* Durnit.  The allocations caused the dict to resize.
1235		 * Just start over, this shouldn't normally happen.
1236		 */
1237		Py_DECREF(v);
1238		goto again;
1239	}
1240	/* Nothing we do below makes any function calls. */
1241	ep = mp->ma_table;
1242	mask = mp->ma_mask;
1243	for (i = 0, j = 0; i <= mask; i++) {
1244		if ((value=ep[i].me_value) != NULL) {
1245			key = ep[i].me_key;
1246			item = PyList_GET_ITEM(v, j);
1247			Py_INCREF(key);
1248			PyTuple_SET_ITEM(item, 0, key);
1249			Py_INCREF(value);
1250			PyTuple_SET_ITEM(item, 1, value);
1251			j++;
1252		}
1253	}
1254	assert(j == n);
1255	return v;
1256}
1257
1258static PyObject *
1259dict_fromkeys(PyObject *cls, PyObject *args)
1260{
1261	PyObject *seq;
1262	PyObject *value = Py_None;
1263	PyObject *it;	/* iter(seq) */
1264	PyObject *key;
1265	PyObject *d;
1266	int status;
1267
1268	if (!PyArg_UnpackTuple(args, "fromkeys", 1, 2, &seq, &value))
1269		return NULL;
1270
1271	d = PyObject_CallObject(cls, NULL);
1272	if (d == NULL)
1273		return NULL;
1274
1275	if (PyDict_CheckExact(d) && PyDict_CheckExact(seq)) {
1276		PyDictObject *mp = (PyDictObject *)d;
1277		PyObject *oldvalue;
1278		Py_ssize_t pos = 0;
1279		PyObject *key;
1280		long hash;
1281
1282		if (dictresize(mp, Py_SIZE(seq)))
1283			return NULL;
1284
1285		while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
1286			Py_INCREF(key);
1287			Py_INCREF(value);
1288			if (insertdict(mp, key, hash, value) < 0)
1289				return NULL;
1290		}
1291		return d;
1292	}
1293
1294	if (PyDict_CheckExact(d) && PyAnySet_CheckExact(seq)) {
1295		PyDictObject *mp = (PyDictObject *)d;
1296		Py_ssize_t pos = 0;
1297		PyObject *key;
1298		long hash;
1299
1300		if (dictresize(mp, PySet_GET_SIZE(seq)))
1301			return NULL;
1302
1303		while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
1304			Py_INCREF(key);
1305			Py_INCREF(value);
1306			if (insertdict(mp, key, hash, value) < 0)
1307				return NULL;
1308		}
1309		return d;
1310	}
1311
1312	it = PyObject_GetIter(seq);
1313	if (it == NULL){
1314		Py_DECREF(d);
1315		return NULL;
1316	}
1317
1318	if (PyDict_CheckExact(d)) {
1319		while ((key = PyIter_Next(it)) != NULL) {
1320			status = PyDict_SetItem(d, key, value);
1321			Py_DECREF(key);
1322			if (status < 0)
1323				goto Fail;
1324		}
1325	} else {
1326		while ((key = PyIter_Next(it)) != NULL) {
1327			status = PyObject_SetItem(d, key, value);
1328			Py_DECREF(key);
1329			if (status < 0)
1330				goto Fail;
1331		}
1332	}
1333
1334	if (PyErr_Occurred())
1335		goto Fail;
1336	Py_DECREF(it);
1337	return d;
1338
1339Fail:
1340	Py_DECREF(it);
1341	Py_DECREF(d);
1342	return NULL;
1343}
1344
1345static int
1346dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
1347{
1348	PyObject *arg = NULL;
1349	int result = 0;
1350
1351	if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg))
1352		result = -1;
1353
1354	else if (arg != NULL) {
1355		if (PyObject_HasAttrString(arg, "keys"))
1356			result = PyDict_Merge(self, arg, 1);
1357		else
1358			result = PyDict_MergeFromSeq2(self, arg, 1);
1359	}
1360	if (result == 0 && kwds != NULL)
1361		result = PyDict_Merge(self, kwds, 1);
1362	return result;
1363}
1364
1365static PyObject *
1366dict_update(PyObject *self, PyObject *args, PyObject *kwds)
1367{
1368	if (dict_update_common(self, args, kwds, "update") != -1)
1369		Py_RETURN_NONE;
1370	return NULL;
1371}
1372
1373/* Update unconditionally replaces existing items.
1374   Merge has a 3rd argument 'override'; if set, it acts like Update,
1375   otherwise it leaves existing items unchanged.
1376
1377   PyDict_{Update,Merge} update/merge from a mapping object.
1378
1379   PyDict_MergeFromSeq2 updates/merges from any iterable object
1380   producing iterable objects of length 2.
1381*/
1382
1383int
1384PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
1385{
1386	PyObject *it;	/* iter(seq2) */
1387	Py_ssize_t i;	/* index into seq2 of current element */
1388	PyObject *item;	/* seq2[i] */
1389	PyObject *fast;	/* item as a 2-tuple or 2-list */
1390
1391	assert(d != NULL);
1392	assert(PyDict_Check(d));
1393	assert(seq2 != NULL);
1394
1395	it = PyObject_GetIter(seq2);
1396	if (it == NULL)
1397		return -1;
1398
1399	for (i = 0; ; ++i) {
1400		PyObject *key, *value;
1401		Py_ssize_t n;
1402
1403		fast = NULL;
1404		item = PyIter_Next(it);
1405		if (item == NULL) {
1406			if (PyErr_Occurred())
1407				goto Fail;
1408			break;
1409		}
1410
1411		/* Convert item to sequence, and verify length 2. */
1412		fast = PySequence_Fast(item, "");
1413		if (fast == NULL) {
1414			if (PyErr_ExceptionMatches(PyExc_TypeError))
1415				PyErr_Format(PyExc_TypeError,
1416					"cannot convert dictionary update "
1417					"sequence element #%zd to a sequence",
1418					i);
1419			goto Fail;
1420		}
1421		n = PySequence_Fast_GET_SIZE(fast);
1422		if (n != 2) {
1423			PyErr_Format(PyExc_ValueError,
1424				     "dictionary update sequence element #%zd "
1425				     "has length %zd; 2 is required",
1426				     i, n);
1427			goto Fail;
1428		}
1429
1430		/* Update/merge with this (key, value) pair. */
1431		key = PySequence_Fast_GET_ITEM(fast, 0);
1432		value = PySequence_Fast_GET_ITEM(fast, 1);
1433		if (override || PyDict_GetItem(d, key) == NULL) {
1434			int status = PyDict_SetItem(d, key, value);
1435			if (status < 0)
1436				goto Fail;
1437		}
1438		Py_DECREF(fast);
1439		Py_DECREF(item);
1440	}
1441
1442	i = 0;
1443	goto Return;
1444Fail:
1445	Py_XDECREF(item);
1446	Py_XDECREF(fast);
1447	i = -1;
1448Return:
1449	Py_DECREF(it);
1450	return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
1451}
1452
1453int
1454PyDict_Update(PyObject *a, PyObject *b)
1455{
1456	return PyDict_Merge(a, b, 1);
1457}
1458
1459int
1460PyDict_Merge(PyObject *a, PyObject *b, int override)
1461{
1462	register PyDictObject *mp, *other;
1463	register Py_ssize_t i;
1464	PyDictEntry *entry;
1465
1466	/* We accept for the argument either a concrete dictionary object,
1467	 * or an abstract "mapping" object.  For the former, we can do
1468	 * things quite efficiently.  For the latter, we only require that
1469	 * PyMapping_Keys() and PyObject_GetItem() be supported.
1470	 */
1471	if (a == NULL || !PyDict_Check(a) || b == NULL) {
1472		PyErr_BadInternalCall();
1473		return -1;
1474	}
1475	mp = (PyDictObject*)a;
1476	if (PyDict_Check(b)) {
1477		other = (PyDictObject*)b;
1478		if (other == mp || other->ma_used == 0)
1479			/* a.update(a) or a.update({}); nothing to do */
1480			return 0;
1481		if (mp->ma_used == 0)
1482			/* Since the target dict is empty, PyDict_GetItem()
1483			 * always returns NULL.  Setting override to 1
1484			 * skips the unnecessary test.
1485			 */
1486			override = 1;
1487		/* Do one big resize at the start, rather than
1488		 * incrementally resizing as we insert new items.  Expect
1489		 * that there will be no (or few) overlapping keys.
1490		 */
1491		if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
1492		   if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
1493			   return -1;
1494		}
1495		for (i = 0; i <= other->ma_mask; i++) {
1496			entry = &other->ma_table[i];
1497			if (entry->me_value != NULL &&
1498			    (override ||
1499			     PyDict_GetItem(a, entry->me_key) == NULL)) {
1500				Py_INCREF(entry->me_key);
1501				Py_INCREF(entry->me_value);
1502				if (insertdict(mp, entry->me_key,
1503					       (long)entry->me_hash,
1504					       entry->me_value) < 0)
1505					return -1;
1506			}
1507		}
1508		notify_watchers(mp);
1509	}
1510	else {
1511		/* Do it the generic, slower way */
1512		PyObject *keys = PyMapping_Keys(b);
1513		PyObject *iter;
1514		PyObject *key, *value;
1515		int status;
1516
1517		if (keys == NULL)
1518			/* Docstring says this is equivalent to E.keys() so
1519			 * if E doesn't have a .keys() method we want
1520			 * AttributeError to percolate up.  Might as well
1521			 * do the same for any other error.
1522			 */
1523			return -1;
1524
1525		iter = PyObject_GetIter(keys);
1526		Py_DECREF(keys);
1527		if (iter == NULL)
1528			return -1;
1529
1530		for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
1531			if (!override && PyDict_GetItem(a, key) != NULL) {
1532				Py_DECREF(key);
1533				continue;
1534			}
1535			value = PyObject_GetItem(b, key);
1536			if (value == NULL) {
1537				Py_DECREF(iter);
1538				Py_DECREF(key);
1539				return -1;
1540			}
1541			status = PyDict_SetItem(a, key, value);
1542			Py_DECREF(key);
1543			Py_DECREF(value);
1544			if (status < 0) {
1545				Py_DECREF(iter);
1546				return -1;
1547			}
1548		}
1549		Py_DECREF(iter);
1550		if (PyErr_Occurred())
1551			/* Iterator completed, via error */
1552			return -1;
1553	}
1554	return 0;
1555}
1556
1557static PyObject *
1558dict_copy(register PyDictObject *mp)
1559{
1560	return PyDict_Copy((PyObject*)mp);
1561}
1562
1563PyObject *
1564PyDict_Copy(PyObject *o)
1565{
1566	PyObject *copy;
1567
1568	if (o == NULL || !PyDict_Check(o)) {
1569		PyErr_BadInternalCall();
1570		return NULL;
1571	}
1572	copy = PyDict_New();
1573	if (copy == NULL)
1574		return NULL;
1575	if (PyDict_Merge(copy, o, 1) == 0)
1576		return copy;
1577	Py_DECREF(copy);
1578	return NULL;
1579}
1580
1581Py_ssize_t
1582PyDict_Size(PyObject *mp)
1583{
1584	if (mp == NULL || !PyDict_Check(mp)) {
1585		PyErr_BadInternalCall();
1586		return -1;
1587	}
1588	return ((PyDictObject *)mp)->ma_used;
1589}
1590
1591PyObject *
1592PyDict_Keys(PyObject *mp)
1593{
1594	if (mp == NULL || !PyDict_Check(mp)) {
1595		PyErr_BadInternalCall();
1596		return NULL;
1597	}
1598	return dict_keys((PyDictObject *)mp);
1599}
1600
1601PyObject *
1602PyDict_Values(PyObject *mp)
1603{
1604	if (mp == NULL || !PyDict_Check(mp)) {
1605		PyErr_BadInternalCall();
1606		return NULL;
1607	}
1608	return dict_values((PyDictObject *)mp);
1609}
1610
1611PyObject *
1612PyDict_Items(PyObject *mp)
1613{
1614	if (mp == NULL || !PyDict_Check(mp)) {
1615		PyErr_BadInternalCall();
1616		return NULL;
1617	}
1618	return dict_items((PyDictObject *)mp);
1619}
1620
1621/* Subroutine which returns the smallest key in a for which b's value
1622   is different or absent.  The value is returned too, through the
1623   pval argument.  Both are NULL if no key in a is found for which b's status
1624   differs.  The refcounts on (and only on) non-NULL *pval and function return
1625   values must be decremented by the caller (characterize() increments them
1626   to ensure that mutating comparison and PyDict_GetItem calls can't delete
1627   them before the caller is done looking at them). */
1628
1629static PyObject *
1630characterize(PyDictObject *a, PyDictObject *b, PyObject **pval)
1631{
1632	PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
1633	PyObject *aval = NULL; /* a[akey] */
1634	Py_ssize_t i;
1635	int cmp;
1636
1637	for (i = 0; i <= a->ma_mask; i++) {
1638		PyObject *thiskey, *thisaval, *thisbval;
1639		if (a->ma_table[i].me_value == NULL)
1640			continue;
1641		thiskey = a->ma_table[i].me_key;
1642		Py_INCREF(thiskey);  /* keep alive across compares */
1643		if (akey != NULL) {
1644			cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1645			if (cmp < 0) {
1646				Py_DECREF(thiskey);
1647				goto Fail;
1648			}
1649			if (cmp > 0 ||
1650			    i > a->ma_mask ||
1651			    a->ma_table[i].me_value == NULL)
1652			{
1653				/* Not the *smallest* a key; or maybe it is
1654				 * but the compare shrunk the dict so we can't
1655				 * find its associated value anymore; or
1656				 * maybe it is but the compare deleted the
1657				 * a[thiskey] entry.
1658				 */
1659				Py_DECREF(thiskey);
1660				continue;
1661			}
1662		}
1663
1664		/* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1665		thisaval = a->ma_table[i].me_value;
1666		assert(thisaval);
1667		Py_INCREF(thisaval);   /* keep alive */
1668		thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1669		if (thisbval == NULL)
1670			cmp = 0;
1671		else {
1672			/* both dicts have thiskey:  same values? */
1673			cmp = PyObject_RichCompareBool(
1674						thisaval, thisbval, Py_EQ);
1675			if (cmp < 0) {
1676		    		Py_DECREF(thiskey);
1677		    		Py_DECREF(thisaval);
1678		    		goto Fail;
1679			}
1680		}
1681		if (cmp == 0) {
1682			/* New winner. */
1683			Py_XDECREF(akey);
1684			Py_XDECREF(aval);
1685			akey = thiskey;
1686			aval = thisaval;
1687		}
1688		else {
1689			Py_DECREF(thiskey);
1690			Py_DECREF(thisaval);
1691		}
1692	}
1693	*pval = aval;
1694	return akey;
1695
1696Fail:
1697	Py_XDECREF(akey);
1698	Py_XDECREF(aval);
1699	*pval = NULL;
1700	return NULL;
1701}
1702
1703static int
1704dict_compare(PyDictObject *a, PyDictObject *b)
1705{
1706	PyObject *adiff, *bdiff, *aval, *bval;
1707	int res;
1708
1709	/* Compare lengths first */
1710	if (a->ma_used < b->ma_used)
1711		return -1;	/* a is shorter */
1712	else if (a->ma_used > b->ma_used)
1713		return 1;	/* b is shorter */
1714
1715	/* Same length -- check all keys */
1716	bdiff = bval = NULL;
1717	adiff = characterize(a, b, &aval);
1718	if (adiff == NULL) {
1719		assert(!aval);
1720		/* Either an error, or a is a subset with the same length so
1721		 * must be equal.
1722		 */
1723		res = PyErr_Occurred() ? -1 : 0;
1724		goto Finished;
1725	}
1726	bdiff = characterize(b, a, &bval);
1727	if (bdiff == NULL && PyErr_Occurred()) {
1728		assert(!bval);
1729		res = -1;
1730		goto Finished;
1731	}
1732	res = 0;
1733	if (bdiff) {
1734		/* bdiff == NULL "should be" impossible now, but perhaps
1735		 * the last comparison done by the characterize() on a had
1736		 * the side effect of making the dicts equal!
1737		 */
1738		res = PyObject_Compare(adiff, bdiff);
1739	}
1740	if (res == 0 && bval != NULL)
1741		res = PyObject_Compare(aval, bval);
1742
1743Finished:
1744	Py_XDECREF(adiff);
1745	Py_XDECREF(bdiff);
1746	Py_XDECREF(aval);
1747	Py_XDECREF(bval);
1748	return res;
1749}
1750
1751/* Return 1 if dicts equal, 0 if not, -1 if error.
1752 * Gets out as soon as any difference is detected.
1753 * Uses only Py_EQ comparison.
1754 */
1755static int
1756dict_equal(PyDictObject *a, PyDictObject *b)
1757{
1758	Py_ssize_t i;
1759
1760	if (a->ma_used != b->ma_used)
1761		/* can't be equal if # of entries differ */
1762		return 0;
1763
1764	/* Same # of entries -- check all of 'em.  Exit early on any diff. */
1765	for (i = 0; i <= a->ma_mask; i++) {
1766		PyObject *aval = a->ma_table[i].me_value;
1767		if (aval != NULL) {
1768			int cmp;
1769			PyObject *bval;
1770			PyObject *key = a->ma_table[i].me_key;
1771			/* temporarily bump aval's refcount to ensure it stays
1772			   alive until we're done with it */
1773			Py_INCREF(aval);
1774			/* ditto for key */
1775			Py_INCREF(key);
1776			bval = PyDict_GetItem((PyObject *)b, key);
1777			Py_DECREF(key);
1778			if (bval == NULL) {
1779				Py_DECREF(aval);
1780				return 0;
1781			}
1782			cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
1783			Py_DECREF(aval);
1784			if (cmp <= 0)  /* error or not equal */
1785				return cmp;
1786 		}
1787	}
1788	return 1;
1789 }
1790
1791static PyObject *
1792dict_richcompare(PyObject *v, PyObject *w, int op)
1793{
1794	int cmp;
1795	PyObject *res;
1796
1797	if (!PyDict_Check(v) || !PyDict_Check(w)) {
1798		res = Py_NotImplemented;
1799	}
1800	else if (op == Py_EQ || op == Py_NE) {
1801		cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
1802		if (cmp < 0)
1803			return NULL;
1804		res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
1805	}
1806	else {
1807		/* Py3K warning if comparison isn't == or !=  */
1808		if (PyErr_WarnPy3k("dict inequality comparisons not supported "
1809				   "in 3.x", 1) < 0) {
1810			return NULL;
1811		}
1812		res = Py_NotImplemented;
1813	}
1814	Py_INCREF(res);
1815	return res;
1816 }
1817
1818static PyObject *
1819dict_contains(register PyDictObject *mp, PyObject *key)
1820{
1821	long hash;
1822	PyDictEntry *ep;
1823
1824	if (!PyString_CheckExact(key) ||
1825	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1826		hash = PyObject_Hash(key);
1827		if (hash == -1)
1828			return NULL;
1829	}
1830	ep = (mp->ma_lookup)(mp, key, hash);
1831	if (ep == NULL)
1832		return NULL;
1833	return PyBool_FromLong(ep->me_value != NULL);
1834}
1835
1836static PyObject *
1837dict_has_key(register PyDictObject *mp, PyObject *key)
1838{
1839	if (PyErr_WarnPy3k("dict.has_key() not supported in 3.x; "
1840			   "use the in operator", 1) < 0)
1841		return NULL;
1842	return dict_contains(mp, key);
1843}
1844
1845static PyObject *
1846dict_get(register PyDictObject *mp, PyObject *key, PyObject *failobj)
1847{
1848	PyObject *val = NULL;
1849	long hash;
1850	PyDictEntry *ep;
1851
1852	if (failobj == NULL)
1853		failobj = Py_None;
1854
1855	if (!PyString_CheckExact(key) ||
1856	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1857		hash = PyObject_Hash(key);
1858		if (hash == -1)
1859			return NULL;
1860	}
1861	ep = (mp->ma_lookup)(mp, key, hash);
1862	if (ep == NULL)
1863		return NULL;
1864	val = ep->me_value;
1865	if (val == NULL)
1866		val = failobj;
1867	Py_INCREF(val);
1868	return val;
1869}
1870
1871
1872static PyObject *
1873dict_setdefault(register PyDictObject *mp, PyObject *key, PyObject *failobj)
1874{
1875	PyObject *val = NULL;
1876	long hash;
1877	PyDictEntry *ep;
1878
1879	if (failobj == NULL)
1880		failobj = Py_None;
1881
1882	if (!PyString_CheckExact(key) ||
1883	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1884		hash = PyObject_Hash(key);
1885		if (hash == -1)
1886			return NULL;
1887	}
1888	ep = (mp->ma_lookup)(mp, key, hash);
1889	if (ep == NULL)
1890		return NULL;
1891	val = ep->me_value;
1892	if (val == NULL) {
1893		val = failobj;
1894		if (PyDict_SetItem((PyObject*)mp, key, failobj))
1895			val = NULL;
1896	}
1897	Py_XINCREF(val);
1898	return val;
1899}
1900
1901
1902static PyObject *
1903dict_clear(register PyDictObject *mp)
1904{
1905	PyDict_Clear((PyObject *)mp);
1906	Py_RETURN_NONE;
1907}
1908
1909static PyObject *
1910dict_pop(PyDictObject *mp

Large files files are truncated, but you can click here to view the full file