PageRenderTime 43ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/memory/gc/incminimark.py

https://bitbucket.org/pypy/pypy/
Python | 3111 lines | 2202 code | 146 blank | 763 comment | 207 complexity | e029fbba75bdd25239b5ec74432415f0 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. """Incremental version of the MiniMark GC.
  2. Environment variables can be used to fine-tune the following parameters:
  3. PYPY_GC_NURSERY The nursery size. Defaults to 1/2 of your cache or
  4. '4M'. Small values
  5. (like 1 or 1KB) are useful for debugging.
  6. PYPY_GC_NURSERY_DEBUG If set to non-zero, will fill nursery with garbage,
  7. to help debugging.
  8. PYPY_GC_INCREMENT_STEP The size of memory marked during the marking step.
  9. Default is size of nursery * 2. If you mark it too high
  10. your GC is not incremental at all. The minimum is set
  11. to size that survives minor collection * 1.5 so we
  12. reclaim anything all the time.
  13. PYPY_GC_MAJOR_COLLECT Major collection memory factor. Default is '1.82',
  14. which means trigger a major collection when the
  15. memory consumed equals 1.82 times the memory
  16. really used at the end of the previous major
  17. collection.
  18. PYPY_GC_GROWTH Major collection threshold's max growth rate.
  19. Default is '1.4'. Useful to collect more often
  20. than normally on sudden memory growth, e.g. when
  21. there is a temporary peak in memory usage.
  22. PYPY_GC_MAX The max heap size. If coming near this limit, it
  23. will first collect more often, then raise an
  24. RPython MemoryError, and if that is not enough,
  25. crash the program with a fatal error. Try values
  26. like '1.6GB'.
  27. PYPY_GC_MAX_DELTA The major collection threshold will never be set
  28. to more than PYPY_GC_MAX_DELTA the amount really
  29. used after a collection. Defaults to 1/8th of the
  30. total RAM size (which is constrained to be at most
  31. 2/3/4GB on 32-bit systems). Try values like '200MB'.
  32. PYPY_GC_MIN Don't collect while the memory size is below this
  33. limit. Useful to avoid spending all the time in
  34. the GC in very small programs. Defaults to 8
  35. times the nursery.
  36. PYPY_GC_DEBUG Enable extra checks around collections that are
  37. too slow for normal use. Values are 0 (off),
  38. 1 (on major collections) or 2 (also on minor
  39. collections).
  40. PYPY_GC_MAX_PINNED The maximal number of pinned objects at any point
  41. in time. Defaults to a conservative value depending
  42. on nursery size and maximum object size inside the
  43. nursery. Useful for debugging by setting it to 0.
  44. """
  45. # XXX Should find a way to bound the major collection threshold by the
  46. # XXX total addressable size. Maybe by keeping some minimarkpage arenas
  47. # XXX pre-reserved, enough for a few nursery collections? What about
  48. # XXX raw-malloced memory?
  49. # XXX try merging old_objects_pointing_to_pinned into
  50. # XXX old_objects_pointing_to_young (IRC 2014-10-22, fijal and gregor_w)
  51. import sys
  52. import os
  53. from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, llgroup
  54. from rpython.rtyper.lltypesystem.lloperation import llop
  55. from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage
  56. from rpython.memory.gc.base import GCBase, MovingGCBase
  57. from rpython.memory.gc import env
  58. from rpython.memory.support import mangle_hash
  59. from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
  60. from rpython.rlib.rarithmetic import LONG_BIT_SHIFT
  61. from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
  62. from rpython.rlib.objectmodel import specialize
  63. from rpython.memory.gc.minimarkpage import out_of_memory
  64. #
  65. # Handles the objects in 2 generations:
  66. #
  67. # * young objects: allocated in the nursery if they are not too large, or
  68. # raw-malloced otherwise. The nursery is a fixed-size memory buffer of
  69. # 4MB by default. When full, we do a minor collection;
  70. # - surviving objects from the nursery are moved outside and become old,
  71. # - non-surviving raw-malloced objects are freed,
  72. # - and pinned objects are kept at their place inside the nursery and stay
  73. # young.
  74. #
  75. # * old objects: never move again. These objects are either allocated by
  76. # minimarkpage.py (if they are small), or raw-malloced (if they are not
  77. # small). Collected by regular mark-n-sweep during major collections.
  78. #
  79. WORD = LONG_BIT // 8
  80. first_gcflag = 1 << (LONG_BIT//2)
  81. # The following flag is set on objects if we need to do something to
  82. # track the young pointers that it might contain. The flag is not set
  83. # on young objects (unless they are large arrays, see below), and we
  84. # simply assume that any young object can point to any other young object.
  85. # For old and prebuilt objects, the flag is usually set, and is cleared
  86. # when we write any pointer to it. For large arrays with
  87. # GCFLAG_HAS_CARDS, we rely on card marking to track where the
  88. # young pointers are; the flag GCFLAG_TRACK_YOUNG_PTRS is set in this
  89. # case too, to speed up the write barrier.
  90. GCFLAG_TRACK_YOUNG_PTRS = first_gcflag << 0
  91. # The following flag is set on some prebuilt objects. The flag is set
  92. # unless the object is already listed in 'prebuilt_root_objects'.
  93. # When a pointer is written inside an object with GCFLAG_NO_HEAP_PTRS
  94. # set, the write_barrier clears the flag and adds the object to
  95. # 'prebuilt_root_objects'.
  96. GCFLAG_NO_HEAP_PTRS = first_gcflag << 1
  97. # The following flag is set on surviving objects during a major collection.
  98. GCFLAG_VISITED = first_gcflag << 2
  99. # The following flag is set on nursery objects of which we asked the id
  100. # or the identityhash. It means that a space of the size of the object
  101. # has already been allocated in the nonmovable part. The same flag is
  102. # abused to mark prebuilt objects whose hash has been taken during
  103. # translation and is statically recorded.
  104. GCFLAG_HAS_SHADOW = first_gcflag << 3
  105. # The following flag is set temporarily on some objects during a major
  106. # collection. See pypy/doc/discussion/finalizer-order.txt
  107. GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4
  108. # This flag is reserved for RPython.
  109. GCFLAG_EXTRA = first_gcflag << 5
  110. # The following flag is set on externally raw_malloc'ed arrays of pointers.
  111. # They are allocated with some extra space in front of them for a bitfield,
  112. # one bit per 'card_page_indices' indices.
  113. GCFLAG_HAS_CARDS = first_gcflag << 6
  114. GCFLAG_CARDS_SET = first_gcflag << 7 # <- at least one card bit is set
  115. # note that GCFLAG_CARDS_SET is the most significant bit of a byte:
  116. # this is required for the JIT (x86)
  117. # The following flag is set on surviving raw-malloced young objects during
  118. # a minor collection.
  119. GCFLAG_VISITED_RMY = first_gcflag << 8
  120. # The following flag is set on nursery objects to keep them in the nursery.
  121. # This means that a young object with this flag is not moved out
  122. # of the nursery during a minor collection. See pin()/unpin() for further
  123. # details.
  124. GCFLAG_PINNED = first_gcflag << 9
  125. # The following flag is set only on objects outside the nursery
  126. # (i.e. old objects). Therefore we can reuse GCFLAG_PINNED as it is used for
  127. # the same feature (object pinning) and GCFLAG_PINNED is only used on nursery
  128. # objects.
  129. # If this flag is set, the flagged object is already an element of
  130. # 'old_objects_pointing_to_pinned' and doesn't have to be added again.
  131. GCFLAG_PINNED_OBJECT_PARENT_KNOWN = GCFLAG_PINNED
  132. _GCFLAG_FIRST_UNUSED = first_gcflag << 10 # the first unused bit
  133. # States for the incremental GC
  134. # The scanning phase, next step call will scan the current roots
  135. # This state must complete in a single step
  136. STATE_SCANNING = 0
  137. # The marking phase. We walk the list 'objects_to_trace' of all gray objects
  138. # and mark all of the things they point to gray. This step lasts until there
  139. # are no more gray objects. ('objects_to_trace' never contains pinned objs.)
  140. STATE_MARKING = 1
  141. # here we kill all the unvisited objects
  142. STATE_SWEEPING = 2
  143. # here we call all the finalizers
  144. STATE_FINALIZING = 3
  145. GC_STATES = ['SCANNING', 'MARKING', 'SWEEPING', 'FINALIZING']
  146. FORWARDSTUB = lltype.GcStruct('forwarding_stub',
  147. ('forw', llmemory.Address))
  148. FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
  149. NURSARRAY = lltype.Array(llmemory.Address)
  150. # ____________________________________________________________
  151. class IncrementalMiniMarkGC(MovingGCBase):
  152. _alloc_flavor_ = "raw"
  153. inline_simple_malloc = True
  154. inline_simple_malloc_varsize = True
  155. needs_write_barrier = True
  156. prebuilt_gc_objects_are_static_roots = False
  157. can_usually_pin_objects = True
  158. malloc_zero_filled = False
  159. gcflag_extra = GCFLAG_EXTRA
  160. # All objects start with a HDR, i.e. with a field 'tid' which contains
  161. # a word. This word is divided in two halves: the lower half contains
  162. # the typeid, and the upper half contains various flags, as defined
  163. # by GCFLAG_xxx above.
  164. HDR = lltype.Struct('header', ('tid', lltype.Signed))
  165. typeid_is_in_field = 'tid'
  166. withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW
  167. # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW;
  168. # then they are one word longer, the extra word storing the hash.
  169. # During a minor collection, the objects in the nursery that are
  170. # moved outside are changed in-place: their header is replaced with
  171. # the value -42, and the following word is set to the address of
  172. # where the object was moved. This means that all objects in the
  173. # nursery need to be at least 2 words long, but objects outside the
  174. # nursery don't need to.
  175. minimal_size_in_nursery = (
  176. llmemory.sizeof(HDR) + llmemory.sizeof(llmemory.Address))
  177. TRANSLATION_PARAMS = {
  178. # Automatically adjust the size of the nursery and the
  179. # 'major_collection_threshold' from the environment.
  180. # See docstring at the start of the file.
  181. "read_from_env": True,
  182. # The size of the nursery. Note that this is only used as a
  183. # fall-back number.
  184. "nursery_size": 896*1024,
  185. # The system page size. Like malloc, we assume that it is 4K
  186. # for 32-bit systems; unlike malloc, we assume that it is 8K
  187. # for 64-bit systems, for consistent results.
  188. "page_size": 1024*WORD,
  189. # The size of an arena. Arenas are groups of pages allocated
  190. # together.
  191. "arena_size": 65536*WORD,
  192. # The maximum size of an object allocated compactly. All objects
  193. # that are larger are just allocated with raw_malloc(). Note that
  194. # the size limit for being first allocated in the nursery is much
  195. # larger; see below.
  196. "small_request_threshold": 35*WORD,
  197. # Full collection threshold: after a major collection, we record
  198. # the total size consumed; and after every minor collection, if the
  199. # total size is now more than 'major_collection_threshold' times,
  200. # we trigger the next major collection.
  201. "major_collection_threshold": 1.82,
  202. # Threshold to avoid that the total heap size grows by a factor of
  203. # major_collection_threshold at every collection: it can only
  204. # grow at most by the following factor from one collection to the
  205. # next. Used e.g. when there is a sudden, temporary peak in memory
  206. # usage; this avoids that the upper bound grows too fast.
  207. "growth_rate_max": 1.4,
  208. # The number of array indices that are mapped to a single bit in
  209. # write_barrier_from_array(). Must be a power of two. The default
  210. # value of 128 means that card pages are 512 bytes (1024 on 64-bits)
  211. # in regular arrays of pointers; more in arrays whose items are
  212. # larger. A value of 0 disables card marking.
  213. "card_page_indices": 128,
  214. # Objects whose total size is at least 'large_object' bytes are
  215. # allocated out of the nursery immediately, as old objects. The
  216. # minimal allocated size of the nursery is 2x the following
  217. # number (by default, at least 132KB on 32-bit and 264KB on 64-bit).
  218. "large_object": (16384+512)*WORD,
  219. }
  220. def __init__(self, config,
  221. read_from_env=False,
  222. nursery_size=32*WORD,
  223. nursery_cleanup=9*WORD,
  224. page_size=16*WORD,
  225. arena_size=64*WORD,
  226. small_request_threshold=5*WORD,
  227. major_collection_threshold=2.5,
  228. growth_rate_max=2.5, # for tests
  229. card_page_indices=0,
  230. large_object=8*WORD,
  231. ArenaCollectionClass=None,
  232. **kwds):
  233. "NOT_RPYTHON"
  234. MovingGCBase.__init__(self, config, **kwds)
  235. assert small_request_threshold % WORD == 0
  236. self.read_from_env = read_from_env
  237. self.nursery_size = nursery_size
  238. self.small_request_threshold = small_request_threshold
  239. self.major_collection_threshold = major_collection_threshold
  240. self.growth_rate_max = growth_rate_max
  241. self.num_major_collects = 0
  242. self.min_heap_size = 0.0
  243. self.max_heap_size = 0.0
  244. self.max_heap_size_already_raised = False
  245. self.max_delta = float(r_uint(-1))
  246. self.max_number_of_pinned_objects = 0 # computed later
  247. #
  248. self.card_page_indices = card_page_indices
  249. if self.card_page_indices > 0:
  250. self.card_page_shift = 0
  251. while (1 << self.card_page_shift) < self.card_page_indices:
  252. self.card_page_shift += 1
  253. #
  254. # 'large_object' limit how big objects can be in the nursery, so
  255. # it gives a lower bound on the allowed size of the nursery.
  256. self.nonlarge_max = large_object - 1
  257. #
  258. self.nursery = llmemory.NULL
  259. self.nursery_free = llmemory.NULL
  260. self.nursery_top = llmemory.NULL
  261. self.debug_tiny_nursery = -1
  262. self.debug_rotating_nurseries = lltype.nullptr(NURSARRAY)
  263. self.extra_threshold = 0
  264. #
  265. # The ArenaCollection() handles the nonmovable objects allocation.
  266. if ArenaCollectionClass is None:
  267. from rpython.memory.gc import minimarkpage
  268. ArenaCollectionClass = minimarkpage.ArenaCollection
  269. self.ac = ArenaCollectionClass(arena_size, page_size,
  270. small_request_threshold)
  271. #
  272. # Used by minor collection: a list of (mostly non-young) objects that
  273. # (may) contain a pointer to a young object. Populated by
  274. # the write barrier: when we clear GCFLAG_TRACK_YOUNG_PTRS, we
  275. # add it to this list.
  276. # Note that young array objects may (by temporary "mistake") be added
  277. # to this list, but will be removed again at the start of the next
  278. # minor collection.
  279. self.old_objects_pointing_to_young = self.AddressStack()
  280. #
  281. # Similar to 'old_objects_pointing_to_young', but lists objects
  282. # that have the GCFLAG_CARDS_SET bit. For large arrays. Note
  283. # that it is possible for an object to be listed both in here
  284. # and in 'old_objects_pointing_to_young', in which case we
  285. # should just clear the cards and trace it fully, as usual.
  286. # Note also that young array objects are never listed here.
  287. self.old_objects_with_cards_set = self.AddressStack()
  288. #
  289. # A list of all prebuilt GC objects that contain pointers to the heap
  290. self.prebuilt_root_objects = self.AddressStack()
  291. #
  292. self._init_writebarrier_logic()
  293. #
  294. # The size of all the objects turned from 'young' to 'old'
  295. # since we started the last major collection cycle. This is
  296. # used to track progress of the incremental GC: normally, we
  297. # run one major GC step after each minor collection, but if a
  298. # lot of objects are made old, we need run two or more steps.
  299. # Otherwise the risk is that we create old objects faster than
  300. # we're collecting them. The 'threshold' is incremented after
  301. # each major GC step at a fixed rate; the idea is that as long
  302. # as 'size_objects_made_old > threshold_objects_made_old' then
  303. # we must do more major GC steps. See major_collection_step()
  304. # for more details.
  305. self.size_objects_made_old = r_uint(0)
  306. self.threshold_objects_made_old = r_uint(0)
  307. def setup(self):
  308. """Called at run-time to initialize the GC."""
  309. #
  310. # Hack: MovingGCBase.setup() sets up stuff related to id(), which
  311. # we implement differently anyway. So directly call GCBase.setup().
  312. GCBase.setup(self)
  313. #
  314. # Two lists of all raw_malloced objects (the objects too large)
  315. self.young_rawmalloced_objects = self.null_address_dict()
  316. self.old_rawmalloced_objects = self.AddressStack()
  317. self.raw_malloc_might_sweep = self.AddressStack()
  318. self.rawmalloced_total_size = r_uint(0)
  319. self.gc_state = STATE_SCANNING
  320. #
  321. # Two lists of all objects with finalizers. Actually they are lists
  322. # of pairs (finalization_queue_nr, object). "probably young objects"
  323. # are all traced and moved to the "old" list by the next minor
  324. # collection.
  325. self.probably_young_objects_with_finalizers = self.AddressDeque()
  326. self.old_objects_with_finalizers = self.AddressDeque()
  327. p = lltype.malloc(self._ADDRARRAY, 1, flavor='raw',
  328. track_allocation=False)
  329. self.singleaddr = llmemory.cast_ptr_to_adr(p)
  330. #
  331. # Two lists of all objects with destructors.
  332. self.young_objects_with_destructors = self.AddressStack()
  333. self.old_objects_with_destructors = self.AddressStack()
  334. #
  335. # Two lists of the objects with weakrefs. No weakref can be an
  336. # old object weakly pointing to a young object: indeed, weakrefs
  337. # are immutable so they cannot point to an object that was
  338. # created after it.
  339. self.young_objects_with_weakrefs = self.AddressStack()
  340. self.old_objects_with_weakrefs = self.AddressStack()
  341. #
  342. # Support for id and identityhash: map nursery objects with
  343. # GCFLAG_HAS_SHADOW to their future location at the next
  344. # minor collection.
  345. self.nursery_objects_shadows = self.AddressDict()
  346. #
  347. # A sorted deque containing addresses of pinned objects.
  348. # This collection is used to make sure we don't overwrite pinned objects.
  349. # Each minor collection creates a new deque containing the active pinned
  350. # objects. The addresses are used to set the next 'nursery_top'.
  351. self.nursery_barriers = self.AddressDeque()
  352. #
  353. # Counter tracking how many pinned objects currently reside inside
  354. # the nursery.
  355. self.pinned_objects_in_nursery = 0
  356. #
  357. # This flag is set if the previous minor collection found at least
  358. # one pinned object alive.
  359. self.any_pinned_object_kept = False
  360. #
  361. # Keeps track of old objects pointing to pinned objects. These objects
  362. # must be traced every minor collection. Without tracing them the
  363. # referenced pinned object wouldn't be visited and therefore collected.
  364. self.old_objects_pointing_to_pinned = self.AddressStack()
  365. self.updated_old_objects_pointing_to_pinned = False
  366. #
  367. # Allocate a nursery. In case of auto_nursery_size, start by
  368. # allocating a very small nursery, enough to do things like look
  369. # up the env var, which requires the GC; and then really
  370. # allocate the nursery of the final size.
  371. if not self.read_from_env:
  372. self.allocate_nursery()
  373. self.gc_increment_step = self.nursery_size * 4
  374. self.gc_nursery_debug = False
  375. else:
  376. #
  377. defaultsize = self.nursery_size
  378. minsize = 2 * (self.nonlarge_max + 1)
  379. self.nursery_size = minsize
  380. self.allocate_nursery()
  381. #
  382. # From there on, the GC is fully initialized and the code
  383. # below can use it
  384. newsize = env.read_from_env('PYPY_GC_NURSERY')
  385. # PYPY_GC_NURSERY=smallvalue means that minor collects occur
  386. # very frequently; the extreme case is PYPY_GC_NURSERY=1, which
  387. # forces a minor collect for every malloc. Useful to debug
  388. # external factors, like trackgcroot or the handling of the write
  389. # barrier. Implemented by still using 'minsize' for the nursery
  390. # size (needed to handle mallocs just below 'large_objects') but
  391. # hacking at the current nursery position in collect_and_reserve().
  392. if newsize <= 0:
  393. newsize = env.estimate_best_nursery_size()
  394. if newsize <= 0:
  395. newsize = defaultsize
  396. if newsize < minsize:
  397. self.debug_tiny_nursery = newsize & ~(WORD-1)
  398. newsize = minsize
  399. #
  400. major_coll = env.read_float_from_env('PYPY_GC_MAJOR_COLLECT')
  401. if major_coll > 1.0:
  402. self.major_collection_threshold = major_coll
  403. #
  404. growth = env.read_float_from_env('PYPY_GC_GROWTH')
  405. if growth > 1.0:
  406. self.growth_rate_max = growth
  407. #
  408. min_heap_size = env.read_uint_from_env('PYPY_GC_MIN')
  409. if min_heap_size > 0:
  410. self.min_heap_size = float(min_heap_size)
  411. else:
  412. # defaults to 8 times the nursery
  413. self.min_heap_size = newsize * 8
  414. #
  415. max_heap_size = env.read_uint_from_env('PYPY_GC_MAX')
  416. if max_heap_size > 0:
  417. self.max_heap_size = float(max_heap_size)
  418. #
  419. max_delta = env.read_uint_from_env('PYPY_GC_MAX_DELTA')
  420. if max_delta > 0:
  421. self.max_delta = float(max_delta)
  422. else:
  423. self.max_delta = 0.125 * env.get_total_memory()
  424. gc_increment_step = env.read_uint_from_env('PYPY_GC_INCREMENT_STEP')
  425. if gc_increment_step > 0:
  426. self.gc_increment_step = gc_increment_step
  427. else:
  428. self.gc_increment_step = newsize * 4
  429. #
  430. nursery_debug = env.read_uint_from_env('PYPY_GC_NURSERY_DEBUG')
  431. if nursery_debug > 0:
  432. self.gc_nursery_debug = True
  433. else:
  434. self.gc_nursery_debug = False
  435. self._minor_collection() # to empty the nursery
  436. llarena.arena_free(self.nursery)
  437. self.nursery_size = newsize
  438. self.allocate_nursery()
  439. #
  440. env_max_number_of_pinned_objects = os.environ.get('PYPY_GC_MAX_PINNED')
  441. if env_max_number_of_pinned_objects:
  442. try:
  443. env_max_number_of_pinned_objects = int(env_max_number_of_pinned_objects)
  444. except ValueError:
  445. env_max_number_of_pinned_objects = 0
  446. #
  447. if env_max_number_of_pinned_objects >= 0: # 0 allows to disable pinning completely
  448. self.max_number_of_pinned_objects = env_max_number_of_pinned_objects
  449. else:
  450. # Estimate this number conservatively
  451. bigobj = self.nonlarge_max + 1
  452. self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2)
  453. def _nursery_memory_size(self):
  454. extra = self.nonlarge_max + 1
  455. return self.nursery_size + extra
  456. def _alloc_nursery(self):
  457. # the start of the nursery: we actually allocate a bit more for
  458. # the nursery than really needed, to simplify pointer arithmetic
  459. # in malloc_fixedsize(). The few extra pages are never used
  460. # anyway so it doesn't even count.
  461. nursery = llarena.arena_malloc(self._nursery_memory_size(), 0)
  462. if not nursery:
  463. out_of_memory("cannot allocate nursery")
  464. return nursery
  465. def allocate_nursery(self):
  466. debug_start("gc-set-nursery-size")
  467. debug_print("nursery size:", self.nursery_size)
  468. self.nursery = self._alloc_nursery()
  469. # the current position in the nursery:
  470. self.nursery_free = self.nursery
  471. # the end of the nursery:
  472. self.nursery_top = self.nursery + self.nursery_size
  473. # initialize the threshold
  474. self.min_heap_size = max(self.min_heap_size, self.nursery_size *
  475. self.major_collection_threshold)
  476. # the following two values are usually equal, but during raw mallocs
  477. # with memory pressure accounting, next_major_collection_threshold
  478. # is decremented to make the next major collection arrive earlier.
  479. # See translator/c/test/test_newgc, test_nongc_attached_to_gc
  480. self.next_major_collection_initial = self.min_heap_size
  481. self.next_major_collection_threshold = self.min_heap_size
  482. self.set_major_threshold_from(0.0)
  483. ll_assert(self.extra_threshold == 0, "extra_threshold set too early")
  484. debug_stop("gc-set-nursery-size")
  485. def set_major_threshold_from(self, threshold, reserving_size=0):
  486. # Set the next_major_collection_threshold.
  487. threshold_max = (self.next_major_collection_initial *
  488. self.growth_rate_max)
  489. if threshold > threshold_max:
  490. threshold = threshold_max
  491. #
  492. threshold += reserving_size
  493. if threshold < self.min_heap_size:
  494. threshold = self.min_heap_size
  495. #
  496. if self.max_heap_size > 0.0 and threshold > self.max_heap_size:
  497. threshold = self.max_heap_size
  498. bounded = True
  499. else:
  500. bounded = False
  501. #
  502. self.next_major_collection_initial = threshold
  503. self.next_major_collection_threshold = threshold
  504. return bounded
  505. def post_setup(self):
  506. # set up extra stuff for PYPY_GC_DEBUG.
  507. MovingGCBase.post_setup(self)
  508. if self.DEBUG and llarena.has_protect:
  509. # gc debug mode: allocate 7 nurseries instead of just 1,
  510. # and use them alternatively, while mprotect()ing the unused
  511. # ones to detect invalid access.
  512. debug_start("gc-debug")
  513. self.debug_rotating_nurseries = lltype.malloc(
  514. NURSARRAY, 6, flavor='raw', track_allocation=False)
  515. i = 0
  516. while i < 6:
  517. nurs = self._alloc_nursery()
  518. llarena.arena_protect(nurs, self._nursery_memory_size(), True)
  519. self.debug_rotating_nurseries[i] = nurs
  520. i += 1
  521. debug_print("allocated", len(self.debug_rotating_nurseries),
  522. "extra nurseries")
  523. debug_stop("gc-debug")
  524. def debug_rotate_nursery(self):
  525. if self.debug_rotating_nurseries:
  526. debug_start("gc-debug")
  527. oldnurs = self.nursery
  528. llarena.arena_protect(oldnurs, self._nursery_memory_size(), True)
  529. #
  530. newnurs = self.debug_rotating_nurseries[0]
  531. i = 0
  532. while i < len(self.debug_rotating_nurseries) - 1:
  533. self.debug_rotating_nurseries[i] = (
  534. self.debug_rotating_nurseries[i + 1])
  535. i += 1
  536. self.debug_rotating_nurseries[i] = oldnurs
  537. #
  538. llarena.arena_protect(newnurs, self._nursery_memory_size(), False)
  539. self.nursery = newnurs
  540. self.nursery_top = self.nursery + self.nursery_size
  541. debug_print("switching from nursery", oldnurs,
  542. "to nursery", self.nursery,
  543. "size", self.nursery_size)
  544. debug_stop("gc-debug")
  545. def malloc_fixedsize(self, typeid, size,
  546. needs_finalizer=False,
  547. is_finalizer_light=False,
  548. contains_weakptr=False):
  549. size_gc_header = self.gcheaderbuilder.size_gc_header
  550. totalsize = size_gc_header + size
  551. rawtotalsize = raw_malloc_usage(totalsize)
  552. #
  553. # If the object needs a finalizer, ask for a rawmalloc.
  554. # The following check should be constant-folded.
  555. if needs_finalizer and not is_finalizer_light:
  556. # old-style finalizers only!
  557. ll_assert(not contains_weakptr,
  558. "'needs_finalizer' and 'contains_weakptr' both specified")
  559. obj = self.external_malloc(typeid, 0, alloc_young=False)
  560. res = llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
  561. self.register_finalizer(-1, res)
  562. return res
  563. #
  564. # If totalsize is greater than nonlarge_max (which should never be
  565. # the case in practice), ask for a rawmalloc. The following check
  566. # should be constant-folded.
  567. if rawtotalsize > self.nonlarge_max:
  568. ll_assert(not contains_weakptr,
  569. "'contains_weakptr' specified for a large object")
  570. obj = self.external_malloc(typeid, 0, alloc_young=True)
  571. #
  572. else:
  573. # If totalsize is smaller than minimal_size_in_nursery, round it
  574. # up. The following check should also be constant-folded.
  575. min_size = raw_malloc_usage(self.minimal_size_in_nursery)
  576. if rawtotalsize < min_size:
  577. totalsize = rawtotalsize = min_size
  578. #
  579. # Get the memory from the nursery. If there is not enough space
  580. # there, do a collect first.
  581. result = self.nursery_free
  582. ll_assert(result != llmemory.NULL, "uninitialized nursery")
  583. self.nursery_free = new_free = result + totalsize
  584. if new_free > self.nursery_top:
  585. result = self.collect_and_reserve(totalsize)
  586. #
  587. # Build the object.
  588. llarena.arena_reserve(result, totalsize)
  589. obj = result + size_gc_header
  590. self.init_gc_object(result, typeid, flags=0)
  591. #
  592. # If it is a weakref or has a lightweight destructor, record it
  593. # (checks constant-folded).
  594. if needs_finalizer:
  595. self.young_objects_with_destructors.append(obj)
  596. if contains_weakptr:
  597. self.young_objects_with_weakrefs.append(obj)
  598. return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
  599. def malloc_varsize(self, typeid, length, size, itemsize,
  600. offset_to_length):
  601. size_gc_header = self.gcheaderbuilder.size_gc_header
  602. nonvarsize = size_gc_header + size
  603. #
  604. # Compute the maximal length that makes the object still
  605. # below 'nonlarge_max'. All the following logic is usually
  606. # constant-folded because self.nonlarge_max, size and itemsize
  607. # are all constants (the arguments are constant due to
  608. # inlining).
  609. maxsize = self.nonlarge_max - raw_malloc_usage(nonvarsize)
  610. if maxsize < 0:
  611. toobig = r_uint(0) # the nonvarsize alone is too big
  612. elif raw_malloc_usage(itemsize):
  613. toobig = r_uint(maxsize // raw_malloc_usage(itemsize)) + 1
  614. else:
  615. toobig = r_uint(sys.maxint) + 1
  616. if r_uint(length) >= r_uint(toobig):
  617. #
  618. # If the total size of the object would be larger than
  619. # 'nonlarge_max', then allocate it externally. We also
  620. # go there if 'length' is actually negative.
  621. obj = self.external_malloc(typeid, length, alloc_young=True)
  622. #
  623. else:
  624. # With the above checks we know now that totalsize cannot be more
  625. # than 'nonlarge_max'; in particular, the + and * cannot overflow.
  626. totalsize = nonvarsize + itemsize * length
  627. totalsize = llarena.round_up_for_allocation(totalsize)
  628. #
  629. # 'totalsize' should contain at least the GC header and
  630. # the length word, so it should never be smaller than
  631. # 'minimal_size_in_nursery'
  632. ll_assert(raw_malloc_usage(totalsize) >=
  633. raw_malloc_usage(self.minimal_size_in_nursery),
  634. "malloc_varsize(): totalsize < minimalsize")
  635. #
  636. # Get the memory from the nursery. If there is not enough space
  637. # there, do a collect first.
  638. result = self.nursery_free
  639. ll_assert(result != llmemory.NULL, "uninitialized nursery")
  640. self.nursery_free = new_free = result + totalsize
  641. if new_free > self.nursery_top:
  642. result = self.collect_and_reserve(totalsize)
  643. #
  644. # Build the object.
  645. llarena.arena_reserve(result, totalsize)
  646. self.init_gc_object(result, typeid, flags=0)
  647. #
  648. # Set the length and return the object.
  649. obj = result + size_gc_header
  650. (obj + offset_to_length).signed[0] = length
  651. #
  652. return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
  653. def malloc_fixed_or_varsize_nonmovable(self, typeid, length):
  654. # length==0 for fixedsize
  655. obj = self.external_malloc(typeid, length, alloc_young=True)
  656. return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
  657. def collect(self, gen=2):
  658. """Do a minor (gen=0), start a major (gen=1), or do a full
  659. major (gen>=2) collection."""
  660. if gen < 0:
  661. self._minor_collection() # dangerous! no major GC cycle progress
  662. elif gen <= 1:
  663. self.minor_collection_with_major_progress()
  664. if gen == 1 and self.gc_state == STATE_SCANNING:
  665. self.major_collection_step()
  666. else:
  667. self.minor_and_major_collection()
  668. self.rrc_invoke_callback()
  669. def minor_collection_with_major_progress(self, extrasize=0):
  670. """Do a minor collection. Then, if there is already a major GC
  671. in progress, run at least one major collection step. If there is
  672. no major GC but the threshold is reached, start a major GC.
  673. """
  674. self._minor_collection()
  675. # If the gc_state is STATE_SCANNING, we're not in the middle
  676. # of an incremental major collection. In that case, wait
  677. # until there is too much garbage before starting the next
  678. # major collection. But if we are in the middle of an
  679. # incremental major collection, then always do (at least) one
  680. # step now.
  681. #
  682. # Within a major collection cycle, every call to
  683. # major_collection_step() increments
  684. # 'threshold_objects_made_old' by nursery_size/2.
  685. if self.gc_state != STATE_SCANNING or self.threshold_reached(extrasize):
  686. self.major_collection_step(extrasize)
  687. # See documentation in major_collection_step() for target invariants
  688. while self.gc_state != STATE_SCANNING: # target (A1)
  689. threshold = self.threshold_objects_made_old
  690. if threshold >= r_uint(extrasize):
  691. threshold -= r_uint(extrasize) # (*)
  692. if self.size_objects_made_old <= threshold: # target (A2)
  693. break
  694. # Note that target (A2) is tweaked by (*); see
  695. # test_gc_set_max_heap_size in translator/c, test_newgc.py
  696. self._minor_collection()
  697. self.major_collection_step(extrasize)
  698. self.rrc_invoke_callback()
  699. def collect_and_reserve(self, totalsize):
  700. """To call when nursery_free overflows nursery_top.
  701. First check if pinned objects are in front of nursery_top. If so,
  702. jump over the pinned object and try again to reserve totalsize.
  703. Otherwise do a minor collection, and possibly some steps of a
  704. major collection, and finally reserve totalsize bytes.
  705. """
  706. minor_collection_count = 0
  707. while True:
  708. self.nursery_free = llmemory.NULL # debug: don't use me
  709. # note: no "raise MemoryError" between here and the next time
  710. # we initialize nursery_free!
  711. if self.nursery_barriers.non_empty():
  712. # Pinned object in front of nursery_top. Try reserving totalsize
  713. # by jumping into the next, yet unused, area inside the
  714. # nursery. "Next area" in this case is the space between the
  715. # pinned object in front of nusery_top and the pinned object
  716. # after that. Graphically explained:
  717. #
  718. # |- allocating totalsize failed in this area
  719. # | |- nursery_top
  720. # | | |- pinned object in front of nursery_top,
  721. # v v v jump over this
  722. # +---------+--------+--------+--------+-----------+ }
  723. # | used | pinned | empty | pinned | empty | }- nursery
  724. # +---------+--------+--------+--------+-----------+ }
  725. # ^- try reserving totalsize in here next
  726. #
  727. # All pinned objects are represented by entries in
  728. # nursery_barriers (see minor_collection). The last entry is
  729. # always the end of the nursery. Therefore if nursery_barriers
  730. # contains only one element, we jump over a pinned object and
  731. # the "next area" (the space where we will try to allocate
  732. # totalsize) starts at the end of the pinned object and ends at
  733. # nursery's end.
  734. #
  735. # find the size of the pinned object after nursery_top
  736. size_gc_header = self.gcheaderbuilder.size_gc_header
  737. pinned_obj_size = size_gc_header + self.get_size(
  738. self.nursery_top + size_gc_header)
  739. #
  740. # update used nursery space to allocate objects
  741. self.nursery_free = self.nursery_top + pinned_obj_size
  742. self.nursery_top = self.nursery_barriers.popleft()
  743. else:
  744. minor_collection_count += 1
  745. if minor_collection_count == 1:
  746. self.minor_collection_with_major_progress()
  747. else:
  748. # Nursery too full again. This is likely because of
  749. # execute_finalizers() or rrc_invoke_callback().
  750. # we need to fix it with another call to minor_collection()
  751. # ---this time only the minor part so that we are sure that
  752. # the nursery is empty (apart from pinned objects).
  753. #
  754. # Note that this still works with the counters:
  755. # 'size_objects_made_old' will be increased by
  756. # the _minor_collection() below. We don't
  757. # immediately restore the target invariant that
  758. # 'size_objects_made_old <= threshold_objects_made_old'.
  759. # But we will do it in the next call to
  760. # minor_collection_with_major_progress().
  761. #
  762. ll_assert(minor_collection_count == 2,
  763. "Calling minor_collection() twice is not "
  764. "enough. Too many pinned objects?")
  765. self._minor_collection()
  766. #
  767. # Tried to do something about nursery_free overflowing
  768. # nursery_top before this point. Try to reserve totalsize now.
  769. # If this succeeds break out of loop.
  770. result = self.nursery_free
  771. if self.nursery_free + totalsize <= self.nursery_top:
  772. self.nursery_free = result + totalsize
  773. ll_assert(self.nursery_free <= self.nursery_top, "nursery overflow")
  774. break
  775. #
  776. #
  777. if self.debug_tiny_nursery >= 0: # for debugging
  778. if self.nursery_top - self.nursery_free > self.debug_tiny_nursery:
  779. self.nursery_free = self.nursery_top - self.debug_tiny_nursery
  780. #
  781. return result
  782. collect_and_reserve._dont_inline_ = True
  783. # XXX kill alloc_young and make it always True
  784. def external_malloc(self, typeid, length, alloc_young):
  785. """Allocate a large object using the ArenaCollection or
  786. raw_malloc(), possibly as an object with card marking enabled,
  787. if it has gc pointers in its var-sized part. 'length' should be
  788. specified as 0 if the object is not varsized. The returned
  789. object is fully initialized, but not zero-filled."""
  790. #
  791. # Here we really need a valid 'typeid', not 0 (as the JIT might
  792. # try to send us if there is still a bug).
  793. ll_assert(bool(self.combine(typeid, 0)),
  794. "external_malloc: typeid == 0")
  795. #
  796. # Compute the total size, carefully checking for overflows.
  797. size_gc_header = self.gcheaderbuilder.size_gc_header
  798. nonvarsize = size_gc_header + self.fixed_size(typeid)
  799. if length == 0:
  800. # this includes the case of fixed-size objects, for which we
  801. # should not even ask for the varsize_item_sizes().
  802. totalsize = nonvarsize
  803. elif length > 0:
  804. # var-sized allocation with at least one item
  805. itemsize = self.varsize_item_sizes(typeid)
  806. try:
  807. varsize = ovfcheck(itemsize * length)
  808. totalsize = ovfcheck(nonvarsize + varsize)
  809. except OverflowError:
  810. raise MemoryError
  811. else:
  812. # negative length! This likely comes from an overflow
  813. # earlier. We will just raise MemoryError here.
  814. raise MemoryError
  815. #
  816. # If somebody calls this function a lot, we must eventually
  817. # force a collection. We use threshold_reached(), which might
  818. # be true now but become false at some point after a few calls
  819. # to major_collection_step(). If there is really no memory,
  820. # then when the major collection finishes it will raise
  821. # MemoryError.
  822. if self.threshold_reached(raw_malloc_usage(totalsize)):
  823. self.minor_collection_with_major_progress(
  824. raw_malloc_usage(totalsize) + self.nursery_size // 2)
  825. #
  826. # Check if the object would fit in the ArenaCollection.
  827. # Also, an object allocated from ArenaCollection must be old.
  828. if (raw_malloc_usage(totalsize) <= self.small_request_threshold
  829. and not alloc_young):
  830. #
  831. # Yes. Round up 'totalsize' (it cannot overflow and it
  832. # must remain <= self.small_request_threshold.)
  833. totalsize = llarena.round_up_for_allocation(totalsize)
  834. ll_assert(raw_malloc_usage(totalsize) <=
  835. self.small_request_threshold,
  836. "rounding up made totalsize > small_request_threshold")
  837. #
  838. # Allocate from the ArenaCollection. Don't clear it.
  839. result = self.ac.malloc(totalsize)
  840. #
  841. extra_flags = GCFLAG_TRACK_YOUNG_PTRS
  842. #
  843. else:
  844. # No, so proceed to allocate it externally with raw_malloc().
  845. # Check if we need to introduce the card marker bits area.
  846. if (self.card_page_indices <= 0 # <- this check is constant-folded
  847. or not self.has_gcptr_in_varsize(typeid) or
  848. raw_malloc_usage(totalsize) <= self.nonlarge_max):
  849. #
  850. # In these cases, we don't want a card marker bits area.
  851. # This case also includes all fixed-size objects.
  852. cardheadersize = 0
  853. extra_flags = 0
  854. #
  855. else:
  856. # Reserve N extra words containing card bits before the object.
  857. extra_words = self.card_marking_words_for_length(length)
  858. cardheadersize = WORD * extra_words
  859. extra_flags = GCFLAG_HAS_CARDS | GCFLAG_TRACK_YOUNG_PTRS
  860. # if 'alloc_young', then we also immediately set
  861. # GCFLAG_CARDS_SET, but without adding the object to
  862. # 'old_objects_with_cards_set'. In this way it should
  863. # never be added to that list as long as it is young.
  864. if alloc_young:
  865. extra_flags |= GCFLAG_CARDS_SET
  866. #
  867. # Detect very rare cases of overflows
  868. if raw_malloc_usage(totalsize) > (sys.maxint - (WORD-1)
  869. - cardheadersize):
  870. raise MemoryError("rare case of overflow")
  871. #
  872. # Now we know that the following computations cannot overflow.
  873. # Note that round_up_for_allocation() is also needed to get the
  874. # correct number added to 'rawmalloced_total_size'.
  875. allocsize = (cardheadersize + raw_malloc_usage(
  876. llarena.round_up_for_allocation(totalsize)))
  877. #
  878. # Allocate the object using arena_malloc(), which we assume here
  879. # is just the same as raw_malloc(), but allows the extra
  880. # flexibility of saying that we have extra words in the header.
  881. # The memory returned is not cleared.
  882. arena = llarena.arena_malloc(allocsize, 0)
  883. if not arena:
  884. raise MemoryError("cannot allocate large object")
  885. #
  886. # Reserve the card mark bits as a list of single bytes,
  887. # and clear these bytes.
  888. i = 0
  889. while i < cardheadersize:
  890. p = arena + i
  891. llarena.arena_reserve(p, llmemory.sizeof(lltype.Char))
  892. p.char[0] = '\x00'
  893. i += 1
  894. #
  895. # Reserve the actual object. (This is a no-op in C).
  896. result = arena + cardheadersize
  897. llarena.arena_reserve(result, totalsize)
  898. #
  899. # Record the newly allocated object and its full malloced size.
  900. # The object is young or old depending on the argument.
  901. self.rawmalloced_total_size += r_uint(allocsize)
  902. if alloc_young:
  903. if not self.young_rawmalloced_objects:
  904. self.young_rawmalloced_objects = self.AddressDict()
  905. self.young_rawmalloced_objects.add(result + size_gc_header)
  906. else:
  907. self.old_rawmalloced_objects.append(result + size_gc_header)
  908. extra_flags |= GCFLAG_TRACK_YOUNG_PTRS
  909. #
  910. # Common code to fill the header and length of the object.
  911. self.init_gc_object(result, typeid, extra_flags)
  912. if self.is_varsize(typeid):
  913. offset_to_length = self.varsize_offset_to_length(typeid)
  914. (result + size_gc_header + offset_to_length).signed[0] = length
  915. return result + size_gc_header
  916. # ----------
  917. # Other functions in the GC API
  918. def set_max_heap_size(self, size):
  919. self.max_heap_size = float(size)
  920. if self.max_heap_size > 0.0:
  921. if self.max_heap_size < self.next_major_collection_initial:
  922. self.next_major_collection_initial = self.max_heap_size
  923. if self.max_heap_size < self.next_major_collection_threshold:
  924. self.next_major_collection_threshold = self.max_heap_size
  925. def raw_malloc_memory_pressure(self, sizehint):
  926. # Decrement by 'sizehint' plus a very little bit extra. This
  927. # is needed e.g. for _rawffi, which may allocate a lot of tiny
  928. # arrays.
  929. self.next_major_collection_threshold -= (sizehint + 2 * WORD)
  930. if self.next_major_collection_threshold < 0:
  931. # cannot trigger a full collection now, but we can ensure
  932. # that one will occur very soon
  933. self.nursery_free = self.nursery_top
  934. def can_optimize_clean_setarrayitems(self):
  935. if self.card_page_indices > 0:
  936. return False
  937. return MovingGCBase.can_optimize_clean_setarrayitems(self)
  938. def can_move(self, obj):
  939. """Overrides the parent can_move()."""
  940. return self.is_in_nursery(obj)
  941. def pin(self, obj):
  942. if self.pinned_objects_in_nursery >= self.max_number_of_pinned_objects:
  943. return False
  944. if not self.is_in_nursery(obj):
  945. # old objects are already non-moving, therefore pinning
  946. # makes no sense. If you run into this case, you may forgot
  947. # to check can_move(obj).
  948. return False
  949. if self._is_pinned(obj):
  950. # already pinned, we do not allow to pin it again.
  951. # Reason: It would be possible that the first caller unpins
  952. # while the second caller thinks it's still pinned.
  953. return False
  954. #
  955. obj_type_id = self.get_type_id(obj)
  956. if self.cannot_pin(obj_type_id):
  957. # objects containing GC pointers can't be pinned. If we would add
  958. # it, we would have to track all pinned objects and trace them
  959. # every minor collection to make sure the referenced object are
  960. # kept alive. Right now this is not a use case that's needed.
  961. # The check above also tests for being a less common kind of
  962. # object: a weakref, or one with any kind of finalizer.
  963. return False
  964. #
  965. self.header(obj).tid |= GCFLAG_PINNED
  966. self.pinned_objects_in_nursery += 1
  967. return True
  968. def unpin(self, obj):
  969. ll_assert(self._is_pinned(obj),
  970. "unpin: object is already not pinned")
  971. #
  972. self.header(obj).tid &= ~GCFLAG_PINNED
  973. self.pinned_objects_in_nursery -= 1
  974. def _is_pinned(self, obj):
  975. return (self.header(obj).tid & GCFLAG_PINNED) != 0
  976. def shrink_array(self, obj, smallerlength):
  977. #
  978. # Only objects in the nursery can be "resized". Resizing them
  979. # means recording that they have a smaller size, so that when
  980. # moved out of the nursery, they will consume less memory.
  981. # In particular, an array with GCFLAG_HAS_CARDS is never resized.
  982. # Also, a nursery object with GCFLAG_HAS_SHADOW is not resized
  983. # either, as this would potentially loose part of the memory in
  984. # the already-allocated shadow.
  985. if not self.is_in_nursery(obj):
  986. return False
  987. if self.header(obj).tid & GCFLAG_HAS_SHADOW:
  988. return False
  989. #
  990. size_gc_header = self.gcheaderbuilder.size_gc_header
  991. typeid = self.get_type_id(obj)
  992. totalsmallersize = (
  993. size_gc_header + self.fixed_size(typeid) +
  994. self.varsize_item_sizes(typeid) * smallerlength)
  995. llarena.arena_shrink_obj(obj - size_gc_header, totalsmallersize)
  996. #
  997. offset_to_length = self.varsize_offset_to_length(typeid)
  998. (obj + offset_to_length).signed[0] = smallerlength
  999. return True
  1000. # ----------
  1001. # Simple helpers
  1002. def get_type_id(self, obj):
  1003. tid = self.header(obj).tid
  1004. return llop.extract_ushort(llgroup.HALFWORD, tid)
  1005. def combine(self, typeid16, flags):
  1006. return llop.combine_ushort(lltype.Signed, typeid16, flags)
  1007. def init_gc_object(self, addr, typeid16, flags=0):
  1008. # The default 'flags' is zero. The flags GCFLAG_NO_xxx_PTRS
  1009. # have been chosen to allow 'flags' to be zero in the common
  1010. # case (hence the 'NO' in their name).
  1011. hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
  1012. hdr.tid = self.combine(typeid16, flags)
  1013. def init_gc_object_immortal(self, addr, typeid16, flags=0):
  1014. # For prebuilt GC objects, the flags must contain
  1015. # GCFLAG_NO_xxx_PTRS, at least initially.
  1016. flags |= GCFLAG_NO_HEAP_PTRS | GCFLAG_TRACK_YOUNG_PTRS
  1017. self.init_gc_object(addr, typeid16, flags)
  1018. def is_in_nursery(self, addr):
  1019. ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0,
  1020. "odd-valued (i.e. tagged) pointer unexpected here")
  1021. return self.nursery <= addr < self.nursery + self.nursery_size
  1022. def is_young_object(self, addr):
  1023. # Check if the object at 'addr' is young.
  1024. if not self.is_valid_gc_object(addr):
  1025. return False # filter out tagged pointers explicitly.
  1026. if self.nursery <= addr < self.nursery_top:
  1027. return True # addr is in the nursery
  1028. # Else, it may be in the set 'young_rawmalloced_objects'
  1029. return (bool(self.young_rawmalloced_objects) and
  1030. self.young_rawmalloced_objects.contains(addr))
  1031. def debug_is_old_object(self, addr):
  1032. return (self.is_valid_gc_object(addr)
  1033. and not self.is_young_object(addr))
  1034. def is_forwarded(self, obj):
  1035. """Returns True if the nursery obj is marked as forwarded.
  1036. Implemented a bit obscurely by checking an unrelated flag
  1037. that can never be set on a young object -- except if tid == -42.
  1038. """
  1039. ll_assert(self.is_in_nursery(obj),
  1040. "Can't forward an object outside the nursery.")
  1041. tid = self.header(obj).tid
  1042. result = (tid & GCFLAG_FINALIZATION_ORDERING != 0)
  1043. if result:
  1044. ll_assert(tid == -42, "bogus header for young obj")
  1045. else:
  1046. ll_assert(bool(tid), "bogus header (1)")
  1047. ll_assert(tid & -_GCFLAG_FIRST_UNUSED == 0, "bogus header (2)")
  1048. return result
  1049. def get_forwarding_address(self, obj):
  1050. return llmemory.cast_adr_to_ptr(obj, FORWARDSTUBPTR).forw
  1051. def get_possibly_forwarded_type_id(self, obj):
  1052. if self.is_in_nursery(obj) and self.is_forwarded(obj):
  1053. obj = self.get_forwarding_address(obj)
  1054. return self.get_type_id(obj)
  1055. def get_total_memory_used(self):
  1056. """Return the total memory used, not counting any object in the
  1057. nursery: only objects in the ArenaCollection or raw-malloced.
  1058. """
  1059. return self.ac.total_memory_used + self.rawmalloced_total_size
  1060. def threshold_reached(self, extra=0):
  1061. return (self.next_major_collection_threshold -
  1062. float(self.get_total_memory_used())) < float(extra)
  1063. def card_marking_words_for_length(self, length):
  1064. # --- Unoptimized version:
  1065. #num_bits = ((length-1) >> self.card_page_shift) + 1
  1066. #return (num_bits + (LONG_BIT - 1)) >> LONG_BIT_SHIFT
  1067. # --- Optimized version:
  1068. return intmask(
  1069. ((r_uint(length) + r_uint((LONG_BIT << self.card_page_shift) - 1)) >>
  1070. (self.card_page_shift + LONG_BIT_SHIFT)))
  1071. def card_marking_bytes_for_length(self, length):
  1072. # --- Unoptimized version:
  1073. #num_bits = ((length-1) >> self.card_page_shift) + 1
  1074. #return (num_bits + 7) >> 3
  1075. # --- Optimized version:
  1076. return intmask(
  1077. ((r_uint(length) + r_uint((8 << self.card_page_shift) - 1)) >>
  1078. (self.card_page_shift + 3)))
  1079. def debug_check_consistency(self):
  1080. if self.DEBUG:
  1081. ll_assert(not self.young_rawmalloced_objects,
  1082. "young raw-malloced objects in a major collection")
  1083. ll_assert(not self.young_objects_with_weakrefs.non_empty(),
  1084. "young objects with weakrefs in a major collection")
  1085. if self.raw_malloc_might_sweep.non_empty():
  1086. ll_assert(self.gc_state == STATE_SWEEPING,
  1087. "raw_malloc_might_sweep must be empty outside SWEEPING")
  1088. if self.gc_state == STATE_MARKING:
  1089. self.objects_to_trace.foreach(self._check_not_in_nursery, None)
  1090. self.more_objects_to_trace.foreach(self._check_not_in_nursery,
  1091. None)
  1092. self._debug_objects_to_trace_dict1 = \
  1093. self.objects_to_trace.stack2dict()
  1094. self._debug_objects_to_trace_dict2 = \
  1095. self.more_objects_to_trace.stack2dict()
  1096. MovingGCBase.debug_check_consistency(self)
  1097. self._debug_objects_to_trace_dict2.delete()
  1098. self._debug_objects_to_trace_dict1.delete()
  1099. else:
  1100. MovingGCBase.debug_check_consistency(self)
  1101. def _check_not_in_nursery(self, obj, ignore):
  1102. ll_assert(not self.is_in_nursery(obj),
  1103. "'objects_to_trace' contains a nursery object")
  1104. def debug_check_object(self, obj):
  1105. # We are after a minor collection, and possibly after a major
  1106. # collection step. No object should be in the nursery (except
  1107. # pinned ones)
  1108. if not self._is_pinned(obj):
  1109. ll_assert(not self.is_in_nursery(obj),
  1110. "object in nursery after collection")
  1111. ll_assert(self.header(obj).tid & GCFLAG_VISITED_RMY == 0,
  1112. "GCFLAG_VISITED_RMY after collection")
  1113. ll_assert(self.header(obj).tid & GCFLAG_PINNED == 0,
  1114. "GCFLAG_PINNED outside the nursery after collection")
  1115. else:
  1116. ll_assert(self.is_in_nursery(obj),
  1117. "pinned object not in nursery")
  1118. if self.gc_state == STATE_SCANNING:
  1119. self._debug_check_object_scanning(obj)
  1120. elif self.gc_state == STATE_MARKING:
  1121. self._debug_check_object_marking(obj)
  1122. elif self.gc_state == STATE_SWEEPING:
  1123. self._debug_check_object_sweeping(obj)
  1124. elif self.gc_state == STATE_FINALIZING:
  1125. self._debug_check_object_finalizing(obj)
  1126. else:
  1127. ll_assert(False, "unknown gc_state value")
  1128. def _debug_check_object_marking(self, obj):
  1129. if self.header(obj).tid & GCFLAG_VISITED != 0:
  1130. # A black object. Should NEVER point to a white object.
  1131. self.trace(obj, self._debug_check_not_white, None)
  1132. # During marking, all visited (black) objects should always have
  1133. # the GCFLAG_TRACK_YOUNG_PTRS flag set, for the write barrier to
  1134. # trigger --- at least if they contain any gc ptr. We are just
  1135. # after a minor or major collection here, so we can't see the
  1136. # object state VISITED & ~WRITE_BARRIER.
  1137. typeid = self.get_type_id(obj)
  1138. if self.has_gcptr(typeid):
  1139. ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0,
  1140. "black object without GCFLAG_TRACK_YOUNG_PTRS")
  1141. def _debug_check_not_white(self, root, ignored):
  1142. obj = root.address[0]
  1143. if self.header(obj).tid & GCFLAG_VISITED != 0:
  1144. pass # black -> black
  1145. elif (self._debug_objects_to_trace_dict1.contains(obj) or
  1146. self._debug_objects_to_trace_dict2.contains(obj)):
  1147. pass # black -> gray
  1148. elif self.header(obj).tid & GCFLAG_NO_HEAP_PTRS != 0:
  1149. pass # black -> white-but-prebuilt-so-dont-care
  1150. elif self._is_pinned(obj):
  1151. # black -> pinned: the pinned object is a white one as
  1152. # every minor collection visits them and takes care of
  1153. # visiting pinned objects.
  1154. # XXX (groggi) double check with fijal/armin
  1155. pass # black -> pinned
  1156. else:
  1157. ll_assert(False, "black -> white pointer found")
  1158. def _debug_check_object_sweeping(self, obj):
  1159. # We see only reachable objects here. They all start as VISITED
  1160. # but this flag is progressively removed in the sweeping phase.
  1161. # All objects should have this flag, except if they
  1162. # don't have any GC pointer or are pinned objects
  1163. typeid = self.get_type_id(obj)
  1164. if self.has_gcptr(typeid) and not self._is_pinned(obj):
  1165. ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0,
  1166. "missing GCFLAG_TRACK_YOUNG_PTRS")
  1167. # the GCFLAG_FINALIZATION_ORDERING should not be set between coll.
  1168. ll_assert(self.header(obj).tid & GCFLAG_FINALIZATION_ORDERING == 0,
  1169. "unexpected GCFLAG_FINALIZATION_ORDERING")
  1170. # the GCFLAG_CARDS_SET should not be set between collections
  1171. ll_assert(self.header(obj).tid & GCFLAG_CARDS_SET == 0,
  1172. "unexpected GCFLAG_CARDS_SET")
  1173. # if the GCFLAG_HAS_CARDS is set, check that all bits are zero now
  1174. if self.header(obj).tid & GCFLAG_HAS_CARDS:
  1175. if self.card_page_indices <= 0:
  1176. ll_assert(False, "GCFLAG_HAS_CARDS but not using card marking")
  1177. return
  1178. typeid = self.get_type_id(obj)
  1179. ll_assert(self.has_gcptr_in_varsize(typeid),
  1180. "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
  1181. ll_assert(self.header(obj).tid & GCFLAG_NO_HEAP_PTRS == 0,
  1182. "GCFLAG_HAS_CARDS && GCFLAG_NO_HEAP_PTRS")
  1183. offset_to_length = self.varsize_offset_to_length(typeid)
  1184. length = (obj + offset_to_length).signed[0]
  1185. extra_words = self.card_marking_words_for_length(length)
  1186. #
  1187. size_gc_header = self.gcheaderbuilder.size_gc_header
  1188. p = llarena.getfakearenaaddress(obj - size_gc_header)
  1189. i = extra_words * WORD
  1190. while i > 0:
  1191. p -= 1
  1192. ll_assert(p.char[0] == '\x00',
  1193. "the card marker bits are not cleared")
  1194. i -= 1
  1195. def _debug_check_object_finalizing(self, obj):
  1196. # Same invariants as STATE_SCANNING.
  1197. self._debug_check_object_scanning(obj)
  1198. def _debug_check_object_scanning(self, obj):
  1199. # This check is called before scanning starts.
  1200. # Scanning is done in a single step.
  1201. # the GCFLAG_VISITED should not be set between collections
  1202. ll_assert(self.header(obj).tid & GCFLAG_VISITED == 0,
  1203. "unexpected GCFLAG_VISITED")
  1204. # All other invariants from the sweeping phase should still be
  1205. # satisfied.
  1206. self._debug_check_object_sweeping(obj)
  1207. # ----------
  1208. # Write barrier
  1209. # for the JIT: a minimal description of the write_barrier() method
  1210. # (the JIT assumes it is of the shape
  1211. # "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
  1212. JIT_WB_IF_FLAG = GCFLAG_TRACK_YOUNG_PTRS
  1213. # for the JIT to generate custom code corresponding to the array
  1214. # write barrier for the simplest case of cards. If JIT_CARDS_SET
  1215. # is already set on an object, it will execute code like this:
  1216. # MOV eax, index
  1217. # SHR eax, JIT_WB_CARD_PAGE_SHIFT
  1218. # XOR eax, -8
  1219. # BTS [object], eax
  1220. if TRANSLATION_PARAMS['card_page_indices'] > 0:
  1221. JIT_WB_CARDS_SET = GCFLAG_CARDS_SET
  1222. JIT_WB_CARD_PAGE_SHIFT = 1
  1223. while ((1 << JIT_WB_CARD_PAGE_SHIFT) !=
  1224. TRANSLATION_PARAMS['card_page_indices']):
  1225. JIT_WB_CARD_PAGE_SHIFT += 1
  1226. @classmethod
  1227. def JIT_max_size_of_young_obj(cls):
  1228. return cls.TRANSLATION_PARAMS['large_object']
  1229. @classmethod
  1230. def JIT_minimal_size_in_nursery(cls):
  1231. return cls.minimal_size_in_nursery
  1232. def write_barrier(self, addr_struct):
  1233. # see OP_GC_BIT in translator/c/gc.py
  1234. if llop.gc_bit(lltype.Signed, self.header(addr_struct),
  1235. GCFLAG_TRACK_YOUNG_PTRS):
  1236. self.remember_young_pointer(addr_struct)
  1237. def write_barrier_from_array(self, addr_array, index):
  1238. if llop.gc_bit(lltype.Signed, self.header(addr_array),
  1239. GCFLAG_TRACK_YOUNG_PTRS):
  1240. if self.card_page_indices > 0:
  1241. self.remember_young_pointer_from_array2(addr_array, index)
  1242. else:
  1243. self.remember_young_pointer(addr_array)
  1244. def _init_writebarrier_logic(self):
  1245. DEBUG = self.DEBUG
  1246. # The purpose of attaching remember_young_pointer to the instance
  1247. # instead of keeping it as a regular method is to
  1248. # make the code in write_barrier() marginally smaller
  1249. # (which is important because it is inlined *everywhere*).
  1250. def remember_young_pointer(addr_struct):
  1251. # 'addr_struct' is the address of the object in which we write.
  1252. # We know that 'addr_struct' has GCFLAG_TRACK_YOUNG_PTRS so far.
  1253. #
  1254. if DEBUG: # note: PYPY_GC_DEBUG=1 does not enable this
  1255. ll_assert(self.debug_is_old_object(addr_struct) or
  1256. self.header(addr_struct).tid & GCFLAG_HAS_CARDS != 0,
  1257. "young object with GCFLAG_TRACK_YOUNG_PTRS and no cards")
  1258. #
  1259. # We need to remove the flag GCFLAG_TRACK_YOUNG_PTRS and add
  1260. # the object to the list 'old_objects_pointing_to_young'.
  1261. # We know that 'addr_struct' cannot be in the nursery,
  1262. # because nursery objects never have the flag
  1263. # GCFLAG_TRACK_YOUNG_PTRS to start with. Note that in
  1264. # theory we don't need to do that if the pointer that we're
  1265. # writing into the object isn't pointing to a young object.
  1266. # However, it isn't really a win, because then sometimes
  1267. # we're going to call this function a lot of times for the
  1268. # same object; moreover we'd need to pass the 'newvalue' as
  1269. # an argument here. The JIT has always called a
  1270. # 'newvalue'-less version, too. Moreover, the incremental
  1271. # GC nowadays relies on this fact.
  1272. self.old_objects_pointing_to_young.append(addr_struct)
  1273. objhdr = self.header(addr_struct)
  1274. objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
  1275. #
  1276. # Second part: if 'addr_struct' is actually a prebuilt GC
  1277. # object and it's the first time we see a write to it, we
  1278. # add it to the list 'prebuilt_root_objects'.
  1279. if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
  1280. objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
  1281. self.prebuilt_root_objects.append(addr_struct)
  1282. remember_young_pointer._dont_inline_ = True
  1283. self.remember_young_pointer = remember_young_pointer
  1284. #
  1285. if self.card_page_indices > 0:
  1286. self._init_writebarrier_with_card_marker()
  1287. def _init_writebarrier_with_card_marker(self):
  1288. DEBUG = self.DEBUG
  1289. def remember_young_pointer_from_array2(addr_array, index):
  1290. # 'addr_array' is the address of the object in which we write,
  1291. # which must have an array part; 'index' is the index of the
  1292. # item that is (or contains) the pointer that we write.
  1293. # We know that 'addr_array' has GCFLAG_TRACK_YOUNG_PTRS so far.
  1294. #
  1295. objhdr = self.header(addr_array)
  1296. if objhdr.tid & GCFLAG_HAS_CARDS == 0:
  1297. #
  1298. if DEBUG: # note: PYPY_GC_DEBUG=1 does not enable this
  1299. ll_assert(self.debug_is_old_object(addr_array),
  1300. "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
  1301. #
  1302. # no cards, use default logic. Mostly copied from above.
  1303. self.old_objects_pointing_to_young.append(addr_array)
  1304. objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
  1305. if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
  1306. objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
  1307. self.prebuilt_root_objects.append(addr_array)
  1308. return
  1309. #
  1310. # 'addr_array' is a raw_malloc'ed array with card markers
  1311. # in front. Compute the index of the bit to set:
  1312. bitindex = index >> self.card_page_shift
  1313. byteindex = bitindex >> 3
  1314. bitmask = 1 << (bitindex & 7)
  1315. #
  1316. # If the bit is already set, leave now.
  1317. addr_byte = self.get_card(addr_array, byteindex)
  1318. byte = ord(addr_byte.char[0])
  1319. if byte & bitmask:
  1320. return
  1321. #
  1322. # We set the flag (even if the newly written address does not
  1323. # actually point to the nursery, which seems to be ok -- actually
  1324. # it seems more important that remember_young_pointer_from_array2()
  1325. # does not take 3 arguments).
  1326. addr_byte.char[0] = chr(byte | bitmask)
  1327. #
  1328. if objhdr.tid & GCFLAG_CARDS_SET == 0:
  1329. self.old_objects_with_cards_set.append(addr_array)
  1330. objhdr.tid |= GCFLAG_CARDS_SET
  1331. remember_young_pointer_from_array2._dont_inline_ = True
  1332. ll_assert(self.card_page_indices > 0,
  1333. "non-positive card_page_indices")
  1334. self.remember_young_pointer_from_array2 = (
  1335. remember_young_pointer_from_array2)
  1336. def jit_remember_young_pointer_from_array(addr_array):
  1337. # minimal version of the above, with just one argument,
  1338. # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set
  1339. # but GCFLAG_CARDS_SET is cleared. This tries to set
  1340. # GCFLAG_CARDS_SET if possible; otherwise, it falls back
  1341. # to remember_young_pointer().
  1342. objhdr = self.header(addr_array)
  1343. if objhdr.tid & GCFLAG_HAS_CARDS:
  1344. self.old_objects_with_cards_set.append(addr_array)
  1345. objhdr.tid |= GCFLAG_CARDS_SET
  1346. else:
  1347. self.remember_young_pointer(addr_array)
  1348. self.jit_remember_young_pointer_from_array = (
  1349. jit_remember_young_pointer_from_array)
  1350. def get_card(self, obj, byteindex):
  1351. size_gc_header = self.gcheaderbuilder.size_gc_header
  1352. addr_byte = obj - size_gc_header
  1353. return llarena.getfakearenaaddress(addr_byte) + (~byteindex)
  1354. def writebarrier_before_copy(self, source_addr, dest_addr,
  1355. source_start, dest_start, length):
  1356. """ This has the same effect as calling writebarrier over
  1357. each element in dest copied from source, except it might reset
  1358. one of the following flags a bit too eagerly, which means we'll have
  1359. a bit more objects to track, but being on the safe side.
  1360. """
  1361. # obscuuuure. The flag 'updated_old_objects_pointing_to_pinned'
  1362. # is set to True when 'old_objects_pointing_to_pinned' is modified.
  1363. # Here, when it was modified, then we do a write_barrier() on
  1364. # all items in that list (there should only be a small number,
  1365. # so we don't care). The goal is that the logic that follows below
  1366. # works as expected...
  1367. if self.updated_old_objects_pointing_to_pinned:
  1368. self.old_objects_pointing_to_pinned.foreach(
  1369. self._wb_old_object_pointing_to_pinned, None)
  1370. self.updated_old_objects_pointing_to_pinned = False
  1371. #
  1372. source_hdr = self.header(source_addr)
  1373. dest_hdr = self.header(dest_addr)
  1374. if dest_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
  1375. return True
  1376. # ^^^ a fast path of write-barrier
  1377. #
  1378. if (self.card_page_indices > 0 and # check constant-folded
  1379. source_hdr.tid & GCFLAG_HAS_CARDS != 0):
  1380. #
  1381. if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
  1382. # The source object may have random young pointers.
  1383. # Return False to mean "do it manually in ll_arraycopy".
  1384. return False
  1385. #
  1386. if source_hdr.tid & GCFLAG_CARDS_SET == 0:
  1387. # The source object has no young pointers at all. Done.
  1388. return True
  1389. #
  1390. if dest_hdr.tid & GCFLAG_HAS_CARDS == 0:
  1391. # The dest object doesn't have cards. Do it manually.
  1392. return False
  1393. #
  1394. if source_start != 0 or dest_start != 0:
  1395. # Misaligned. Do it manually.
  1396. return False
  1397. #
  1398. self.manually_copy_card_bits(source_addr, dest_addr, length)
  1399. return True
  1400. #
  1401. if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
  1402. # there might be in source a pointer to a young object
  1403. self.old_objects_pointing_to_young.append(dest_addr)
  1404. dest_hdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
  1405. #
  1406. if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
  1407. if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
  1408. dest_hdr.tid &= ~GCFLAG_NO_HEAP_PTRS
  1409. self.prebuilt_root_objects.append(dest_addr)
  1410. return True
  1411. def manually_copy_card_bits(self, source_addr, dest_addr, length):
  1412. # manually copy the individual card marks from source to dest
  1413. ll_assert(self.card_page_indices > 0,
  1414. "non-positive card_page_indices")
  1415. bytes = self.card_marking_bytes_for_length(length)
  1416. #
  1417. anybyte = 0
  1418. i = 0
  1419. while i < bytes:
  1420. addr_srcbyte = self.get_card(source_addr, i)
  1421. addr_dstbyte = self.get_card(dest_addr, i)
  1422. byte = ord(addr_srcbyte.char[0])
  1423. anybyte |= byte
  1424. addr_dstbyte.char[0] = chr(ord(addr_dstbyte.char[0]) | byte)
  1425. i += 1
  1426. #
  1427. if anybyte:
  1428. dest_hdr = self.header(dest_addr)
  1429. if dest_hdr.tid & GCFLAG_CARDS_SET == 0:
  1430. self.old_objects_with_cards_set.append(dest_addr)
  1431. dest_hdr.tid |= GCFLAG_CARDS_SET
  1432. def _wb_old_object_pointing_to_pinned(self, obj, ignore):
  1433. self.write_barrier(obj)
  1434. def record_pinned_object_with_shadow(self, obj, new_shadow_object_dict):
  1435. # checks if the pinned object has a shadow and if so add it to the
  1436. # dict of shadows.
  1437. obj = obj + self.gcheaderbuilder.size_gc_header
  1438. shadow = self.nursery_objects_shadows.get(obj)
  1439. if shadow != llmemory.NULL:
  1440. # visit shadow to keep it alive
  1441. # XXX seems like it is save to set GCFLAG_VISITED, however
  1442. # should be double checked
  1443. self.header(shadow).tid |= GCFLAG_VISITED
  1444. new_shadow_object_dict.setitem(obj, shadow)
  1445. def register_finalizer(self, fq_index, gcobj):
  1446. from rpython.rtyper.lltypesystem import rffi
  1447. obj = llmemory.cast_ptr_to_adr(gcobj)
  1448. fq_index = rffi.cast(llmemory.Address, fq_index)
  1449. self.probably_young_objects_with_finalizers.append(obj)
  1450. self.probably_young_objects_with_finalizers.append(fq_index)
  1451. # ----------
  1452. # Nursery collection
  1453. def _minor_collection(self):
  1454. """Perform a minor collection: find the objects from the nursery
  1455. that remain alive and move them out."""
  1456. #
  1457. debug_start("gc-minor")
  1458. #
  1459. # All nursery barriers are invalid from this point on. They
  1460. # are evaluated anew as part of the minor collection.
  1461. self.nursery_barriers.delete()
  1462. #
  1463. # Keeps track of surviving pinned objects. See also '_trace_drag_out()'
  1464. # where this stack is filled. Pinning an object only prevents it from
  1465. # being moved, not from being collected if it is not reachable anymore.
  1466. self.surviving_pinned_objects = self.AddressStack()
  1467. # The following counter keeps track of alive and pinned young objects
  1468. # inside the nursery. We reset it here and increase it in
  1469. # '_trace_drag_out()'.
  1470. any_pinned_object_from_earlier = self.any_pinned_object_kept
  1471. self.pinned_objects_in_nursery = 0
  1472. self.any_pinned_object_kept = False
  1473. #
  1474. # Before everything else, remove from 'old_objects_pointing_to_young'
  1475. # the young arrays.
  1476. if self.young_rawmalloced_objects:
  1477. self.remove_young_arrays_from_old_objects_pointing_to_young()
  1478. #
  1479. # A special step in the STATE_MARKING phase.
  1480. if self.gc_state == STATE_MARKING:
  1481. # Copy the 'old_objects_pointing_to_young' list so far to
  1482. # 'more_objects_to_trace'. Turn black objects back to gray.
  1483. # This is because these are precisely the old objects that
  1484. # have been modified and need rescanning.
  1485. self.old_objects_pointing_to_young.foreach(
  1486. self._add_to_more_objects_to_trace_if_black, None)
  1487. # Old black objects pointing to pinned objects that may no
  1488. # longer be pinned now: careful,
  1489. # _visit_old_objects_pointing_to_pinned() will move the
  1490. # previously-pinned object, and that creates a white object.
  1491. # We prevent the "black->white" situation by forcing the
  1492. # old black object to become gray again.
  1493. self.old_objects_pointing_to_pinned.foreach(
  1494. self._add_to_more_objects_to_trace_if_black, None)
  1495. #
  1496. # First, find the roots that point to young objects. All nursery
  1497. # objects found are copied out of the nursery, and the occasional
  1498. # young raw-malloced object is flagged with GCFLAG_VISITED_RMY.
  1499. # Note that during this step, we ignore references to further
  1500. # young objects; only objects directly referenced by roots
  1501. # are copied out or flagged. They are also added to the list
  1502. # 'old_objects_pointing_to_young'.
  1503. self.nursery_surviving_size = 0
  1504. self.collect_roots_in_nursery(any_pinned_object_from_earlier)
  1505. #
  1506. # visit all objects that are known for pointing to pinned
  1507. # objects. This way we populate 'surviving_pinned_objects'
  1508. # with pinned object that are (only) visible from an old
  1509. # object.
  1510. # Additionally we create a new list as it may be that an old object
  1511. # no longer points to a pinned one. Such old objects won't be added
  1512. # again to 'old_objects_pointing_to_pinned'.
  1513. if self.old_objects_pointing_to_pinned.non_empty():
  1514. current_old_objects_pointing_to_pinned = \
  1515. self.old_objects_pointing_to_pinned
  1516. self.old_objects_pointing_to_pinned = self.AddressStack()
  1517. current_old_objects_pointing_to_pinned.foreach(
  1518. self._visit_old_objects_pointing_to_pinned, None)
  1519. current_old_objects_pointing_to_pinned.delete()
  1520. #
  1521. # visit the P list from rawrefcount, if enabled.
  1522. if self.rrc_enabled:
  1523. self.rrc_minor_collection_trace()
  1524. #
  1525. # visit the "probably young" objects with finalizers. They
  1526. # always all survive.
  1527. if self.probably_young_objects_with_finalizers.non_empty():
  1528. self.deal_with_young_objects_with_finalizers()
  1529. #
  1530. while True:
  1531. # If we are using card marking, do a partial trace of the arrays
  1532. # that are flagged with GCFLAG_CARDS_SET.
  1533. if self.card_page_indices > 0:
  1534. self.collect_cardrefs_to_nursery()
  1535. #
  1536. # Now trace objects from 'old_objects_pointing_to_young'.
  1537. # All nursery objects they reference are copied out of the
  1538. # nursery, and again added to 'old_objects_pointing_to_young'.
  1539. # All young raw-malloced object found are flagged
  1540. # GCFLAG_VISITED_RMY.
  1541. # We proceed until 'old_objects_pointing_to_young' is empty.
  1542. self.collect_oldrefs_to_nursery()
  1543. #
  1544. # We have to loop back if collect_oldrefs_to_nursery caused
  1545. # new objects to show up in old_objects_with_cards_set
  1546. if self.card_page_indices > 0:
  1547. if self.old_objects_with_cards_set.non_empty():
  1548. continue
  1549. break
  1550. #
  1551. # Now all live nursery objects should be out. Update the young
  1552. # weakrefs' targets.
  1553. if self.young_objects_with_weakrefs.non_empty():
  1554. self.invalidate_young_weakrefs()
  1555. if self.young_objects_with_destructors.non_empty():
  1556. self.deal_with_young_objects_with_destructors()
  1557. #
  1558. # Clear this mapping. Without pinned objects we just clear the dict
  1559. # as all objects in the nursery are dragged out of the nursery and, if
  1560. # needed, into their shadow. However, if we have pinned objects we have
  1561. # to check if those pinned object have a shadow and keep a dictionary
  1562. # filled with shadow information for them as they stay in the nursery.
  1563. if self.nursery_objects_shadows.length() > 0:
  1564. if self.surviving_pinned_objects.non_empty():
  1565. new_shadows = self.AddressDict()
  1566. self.surviving_pinned_objects.foreach(
  1567. self.record_pinned_object_with_shadow, new_shadows)
  1568. self.nursery_objects_shadows.delete()
  1569. self.nursery_objects_shadows = new_shadows
  1570. else:
  1571. self.nursery_objects_shadows.clear()
  1572. #
  1573. # visit the P and O lists from rawrefcount, if enabled.
  1574. if self.rrc_enabled:
  1575. self.rrc_minor_collection_free()
  1576. #
  1577. # Walk the list of young raw-malloced objects, and either free
  1578. # them or make them old.
  1579. if self.young_rawmalloced_objects:
  1580. self.free_young_rawmalloced_objects()
  1581. #
  1582. # All live nursery objects are out of the nursery or pinned inside
  1583. # the nursery. Create nursery barriers to protect the pinned objects,
  1584. # fill the rest of the nursery with zeros and reset the current nursery
  1585. # pointer.
  1586. size_gc_header = self.gcheaderbuilder.size_gc_header
  1587. nursery_barriers = self.AddressDeque()
  1588. prev = self.nursery
  1589. self.surviving_pinned_objects.sort()
  1590. ll_assert(
  1591. self.pinned_objects_in_nursery == \
  1592. self.surviving_pinned_objects.length(),
  1593. "pinned_objects_in_nursery != surviving_pinned_objects.length()")
  1594. while self.surviving_pinned_objects.non_empty():
  1595. #
  1596. cur = self.surviving_pinned_objects.pop()
  1597. ll_assert(
  1598. cur >= prev, "pinned objects encountered in backwards order")
  1599. #
  1600. # clear the arena between the last pinned object (or arena start)
  1601. # and the pinned object
  1602. pinned_obj_size = llarena.getfakearenaaddress(cur) - prev
  1603. if self.gc_nursery_debug:
  1604. llarena.arena_reset(prev, pinned_obj_size, 3)
  1605. else:
  1606. llarena.arena_reset(prev, pinned_obj_size, 0)
  1607. #
  1608. # clean up object's flags
  1609. obj = cur + size_gc_header
  1610. self.header(obj).tid &= ~GCFLAG_VISITED
  1611. #
  1612. # create a new nursery barrier for the pinned object
  1613. nursery_barriers.append(cur)
  1614. #
  1615. # update 'prev' to the end of the 'cur' object
  1616. prev = prev + pinned_obj_size + \
  1617. (size_gc_header + self.get_size(obj))
  1618. #
  1619. # reset everything after the last pinned object till the end of the arena
  1620. if self.gc_nursery_debug:
  1621. llarena.arena_reset(prev, self.nursery + self.nursery_size - prev, 3)
  1622. if not nursery_barriers.non_empty(): # no pinned objects
  1623. self.debug_rotate_nursery()
  1624. else:
  1625. llarena.arena_reset(prev, self.nursery + self.nursery_size - prev, 0)
  1626. #
  1627. # always add the end of the nursery to the list
  1628. nursery_barriers.append(self.nursery + self.nursery_size)
  1629. #
  1630. self.nursery_barriers = nursery_barriers
  1631. self.surviving_pinned_objects.delete()
  1632. #
  1633. self.nursery_free = self.nursery
  1634. self.nursery_top = self.nursery_barriers.popleft()
  1635. #
  1636. # clear GCFLAG_PINNED_OBJECT_PARENT_KNOWN from all parents in the list.
  1637. self.old_objects_pointing_to_pinned.foreach(
  1638. self._reset_flag_old_objects_pointing_to_pinned, None)
  1639. #
  1640. # Accounting: 'nursery_surviving_size' is the size of objects
  1641. # from the nursery that we just moved out.
  1642. self.size_objects_made_old += r_uint(self.nursery_surviving_size)
  1643. #
  1644. debug_print("minor collect, total memory used:",
  1645. self.get_total_memory_used())
  1646. debug_print("number of pinned objects:",
  1647. self.pinned_objects_in_nursery)
  1648. if self.DEBUG >= 2:
  1649. self.debug_check_consistency() # expensive!
  1650. #
  1651. self.root_walker.finished_minor_collection()
  1652. #
  1653. debug_stop("gc-minor")
  1654. def _reset_flag_old_objects_pointing_to_pinned(self, obj, ignore):
  1655. ll_assert(self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN != 0,
  1656. "!GCFLAG_PINNED_OBJECT_PARENT_KNOWN, but requested to reset.")
  1657. self.header(obj).tid &= ~GCFLAG_PINNED_OBJECT_PARENT_KNOWN
  1658. def _visit_old_objects_pointing_to_pinned(self, obj, ignore):
  1659. self.trace(obj, self._trace_drag_out, obj)
  1660. def collect_roots_in_nursery(self, any_pinned_object_from_earlier):
  1661. # we don't need to trace prebuilt GcStructs during a minor collect:
  1662. # if a prebuilt GcStruct contains a pointer to a young object,
  1663. # then the write_barrier must have ensured that the prebuilt
  1664. # GcStruct is in the list self.old_objects_pointing_to_young.
  1665. debug_start("gc-minor-walkroots")
  1666. if self.gc_state == STATE_MARKING:
  1667. callback = IncrementalMiniMarkGC._trace_drag_out1_marking_phase
  1668. else:
  1669. callback = IncrementalMiniMarkGC._trace_drag_out1
  1670. #
  1671. # Note a subtlety: if the nursery contains pinned objects "from
  1672. # earlier", i.e. created earlier than the previous minor
  1673. # collection, then we can't use the "is_minor=True" optimization.
  1674. # We really need to walk the complete stack to be sure we still
  1675. # see them.
  1676. use_jit_frame_stoppers = not any_pinned_object_from_earlier
  1677. #
  1678. self.root_walker.walk_roots(
  1679. callback, # stack roots
  1680. callback, # static in prebuilt non-gc
  1681. None, # static in prebuilt gc
  1682. is_minor=use_jit_frame_stoppers)
  1683. debug_stop("gc-minor-walkroots")
  1684. def collect_cardrefs_to_nursery(self):
  1685. size_gc_header = self.gcheaderbuilder.size_gc_header
  1686. oldlist = self.old_objects_with_cards_set
  1687. while oldlist.non_empty():
  1688. obj = oldlist.pop()
  1689. #
  1690. # Remove the GCFLAG_CARDS_SET flag.
  1691. ll_assert(self.header(obj).tid & GCFLAG_CARDS_SET != 0,
  1692. "!GCFLAG_CARDS_SET but object in 'old_objects_with_cards_set'")
  1693. self.header(obj).tid &= ~GCFLAG_CARDS_SET
  1694. #
  1695. # Get the number of card marker bytes in the header.
  1696. typeid = self.get_type_id(obj)
  1697. offset_to_length = self.varsize_offset_to_length(typeid)
  1698. length = (obj + offset_to_length).signed[0]
  1699. bytes = self.card_marking_bytes_for_length(length)
  1700. p = llarena.getfakearenaaddress(obj - size_gc_header)
  1701. #
  1702. # If the object doesn't have GCFLAG_TRACK_YOUNG_PTRS, then it
  1703. # means that it is in 'old_objects_pointing_to_young' and
  1704. # will be fully traced by collect_oldrefs_to_nursery() just
  1705. # afterwards.
  1706. if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
  1707. #
  1708. # In that case, we just have to reset all card bits.
  1709. while bytes > 0:
  1710. p -= 1
  1711. p.char[0] = '\x00'
  1712. bytes -= 1
  1713. #
  1714. else:
  1715. # Walk the bytes encoding the card marker bits, and for
  1716. # each bit set, call trace_and_drag_out_of_nursery_partial().
  1717. interval_start = 0
  1718. while bytes > 0:
  1719. p -= 1
  1720. cardbyte = ord(p.char[0])
  1721. p.char[0] = '\x00' # reset the bits
  1722. bytes -= 1
  1723. next_byte_start = interval_start + 8*self.card_page_indices
  1724. #
  1725. while cardbyte != 0:
  1726. interval_stop = interval_start + self.card_page_indices
  1727. #
  1728. if cardbyte & 1:
  1729. if interval_stop > length:
  1730. interval_stop = length
  1731. ll_assert(cardbyte <= 1 and bytes == 0,
  1732. "premature end of object")
  1733. self.trace_and_drag_out_of_nursery_partial(
  1734. obj, interval_start, interval_stop)
  1735. #
  1736. interval_start = interval_stop
  1737. cardbyte >>= 1
  1738. interval_start = next_byte_start
  1739. #
  1740. # If we're incrementally marking right now, sorry, we also
  1741. # need to add the object to 'more_objects_to_trace' and have
  1742. # it fully traced once at the end of the current marking phase.
  1743. ll_assert(not self.is_in_nursery(obj),
  1744. "expected nursery obj in collect_cardrefs_to_nursery")
  1745. if self.gc_state == STATE_MARKING:
  1746. self.header(obj).tid &= ~GCFLAG_VISITED
  1747. self.more_objects_to_trace.append(obj)
  1748. def collect_oldrefs_to_nursery(self):
  1749. # Follow the old_objects_pointing_to_young list and move the
  1750. # young objects they point to out of the nursery.
  1751. oldlist = self.old_objects_pointing_to_young
  1752. while oldlist.non_empty():
  1753. obj = oldlist.pop()
  1754. #
  1755. # Check that the flags are correct: we must not have
  1756. # GCFLAG_TRACK_YOUNG_PTRS so far.
  1757. ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0,
  1758. "old_objects_pointing_to_young contains obj with "
  1759. "GCFLAG_TRACK_YOUNG_PTRS")
  1760. #
  1761. # Add the flag GCFLAG_TRACK_YOUNG_PTRS. All live objects should
  1762. # have this flag set after a nursery collection.
  1763. self.header(obj).tid |= GCFLAG_TRACK_YOUNG_PTRS
  1764. #
  1765. # Trace the 'obj' to replace pointers to nursery with pointers
  1766. # outside the nursery, possibly forcing nursery objects out
  1767. # and adding them to 'old_objects_pointing_to_young' as well.
  1768. self.trace_and_drag_out_of_nursery(obj)
  1769. def trace_and_drag_out_of_nursery(self, obj):
  1770. """obj must not be in the nursery. This copies all the
  1771. young objects it references out of the nursery.
  1772. """
  1773. self.trace(obj, self._trace_drag_out, obj)
  1774. def trace_and_drag_out_of_nursery_partial(self, obj, start, stop):
  1775. """Like trace_and_drag_out_of_nursery(), but limited to the array
  1776. indices in range(start, stop).
  1777. """
  1778. ll_assert(start < stop, "empty or negative range "
  1779. "in trace_and_drag_out_of_nursery_partial()")
  1780. #print 'trace_partial:', start, stop, '\t', obj
  1781. self.trace_partial(obj, start, stop, self._trace_drag_out, obj)
  1782. def _trace_drag_out1(self, root):
  1783. self._trace_drag_out(root, llmemory.NULL)
  1784. def _trace_drag_out1_marking_phase(self, root):
  1785. self._trace_drag_out(root, llmemory.NULL)
  1786. #
  1787. # We are in the MARKING state: we must also record this object
  1788. # if it was young. Don't bother with old objects in general,
  1789. # as they are anyway added to 'more_objects_to_trace' if they
  1790. # are modified (see _add_to_more_objects_to_trace). But we do
  1791. # need to record the not-visited-yet (white) old objects. So
  1792. # as a conservative approximation, we need to add the object to
  1793. # the list if and only if it doesn't have GCFLAG_VISITED yet.
  1794. #
  1795. # Additionally, ignore pinned objects.
  1796. #
  1797. obj = root.address[0]
  1798. if (self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_PINNED)) == 0:
  1799. self.more_objects_to_trace.append(obj)
  1800. def _trace_drag_out(self, root, parent):
  1801. obj = root.address[0]
  1802. #print '_trace_drag_out(%x: %r)' % (hash(obj.ptr._obj), obj)
  1803. #
  1804. # If 'obj' is not in the nursery, nothing to change -- expect
  1805. # that we must set GCFLAG_VISITED_RMY on young raw-malloced objects.
  1806. if not self.is_in_nursery(obj):
  1807. # cache usage trade-off: I think that it is a better idea to
  1808. # check if 'obj' is in young_rawmalloced_objects with an access
  1809. # to this (small) dictionary, rather than risk a lot of cache
  1810. # misses by reading a flag in the header of all the 'objs' that
  1811. # arrive here.
  1812. if (bool(self.young_rawmalloced_objects)
  1813. and self.young_rawmalloced_objects.contains(obj)):
  1814. self._visit_young_rawmalloced_object(obj)
  1815. return
  1816. #
  1817. size_gc_header = self.gcheaderbuilder.size_gc_header
  1818. if self.header(obj).tid & (GCFLAG_HAS_SHADOW | GCFLAG_PINNED) == 0:
  1819. #
  1820. # Common case: 'obj' was not already forwarded (otherwise
  1821. # tid == -42, containing all flags), and it doesn't have the
  1822. # HAS_SHADOW flag either. We must move it out of the nursery,
  1823. # into a new nonmovable location.
  1824. totalsize = size_gc_header + self.get_size(obj)
  1825. self.nursery_surviving_size += raw_malloc_usage(totalsize)
  1826. newhdr = self._malloc_out_of_nursery(totalsize)
  1827. #
  1828. elif self.is_forwarded(obj):
  1829. #
  1830. # 'obj' was already forwarded. Change the original reference
  1831. # to point to its forwarding address, and we're done.
  1832. root.address[0] = self.get_forwarding_address(obj)
  1833. return
  1834. #
  1835. elif self._is_pinned(obj):
  1836. hdr = self.header(obj)
  1837. #
  1838. # track parent of pinned object specially. This mus be done before
  1839. # checking for GCFLAG_VISITED: it may be that the same pinned object
  1840. # is reachable from multiple sources (e.g. two old objects pointing
  1841. # to the same pinned object). In such a case we need all parents
  1842. # of the pinned object in the list. Otherwise he pinned object could
  1843. # become dead and be removed just because the first parent of it
  1844. # is dead and collected.
  1845. if parent != llmemory.NULL and \
  1846. not self.header(parent).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN:
  1847. #
  1848. self.old_objects_pointing_to_pinned.append(parent)
  1849. self.updated_old_objects_pointing_to_pinned = True
  1850. self.header(parent).tid |= GCFLAG_PINNED_OBJECT_PARENT_KNOWN
  1851. #
  1852. if hdr.tid & GCFLAG_VISITED:
  1853. return
  1854. #
  1855. hdr.tid |= GCFLAG_VISITED
  1856. #
  1857. self.surviving_pinned_objects.append(
  1858. llarena.getfakearenaaddress(obj - size_gc_header))
  1859. self.pinned_objects_in_nursery += 1
  1860. self.any_pinned_object_kept = True
  1861. return
  1862. else:
  1863. # First visit to an object that has already a shadow.
  1864. newobj = self.nursery_objects_shadows.get(obj)
  1865. ll_assert(newobj != llmemory.NULL, "GCFLAG_HAS_SHADOW but no shadow found")
  1866. newhdr = newobj - size_gc_header
  1867. #
  1868. # Remove the flag GCFLAG_HAS_SHADOW, so that it doesn't get
  1869. # copied to the shadow itself.
  1870. self.header(obj).tid &= ~GCFLAG_HAS_SHADOW
  1871. #
  1872. totalsize = size_gc_header + self.get_size(obj)
  1873. self.nursery_surviving_size += raw_malloc_usage(totalsize)
  1874. #
  1875. # Copy it. Note that references to other objects in the
  1876. # nursery are kept unchanged in this step.
  1877. llmemory.raw_memcopy(obj - size_gc_header, newhdr, totalsize)
  1878. #
  1879. # Set the old object's tid to -42 (containing all flags) and
  1880. # replace the old object's content with the target address.
  1881. # A bit of no-ops to convince llarena that we are changing
  1882. # the layout, in non-translated versions.
  1883. typeid = self.get_type_id(obj)
  1884. obj = llarena.getfakearenaaddress(obj)
  1885. llarena.arena_reset(obj - size_gc_header, totalsize, 0)
  1886. llarena.arena_reserve(obj - size_gc_header,
  1887. size_gc_header + llmemory.sizeof(FORWARDSTUB))
  1888. self.header(obj).tid = -42
  1889. newobj = newhdr + size_gc_header
  1890. llmemory.cast_adr_to_ptr(obj, FORWARDSTUBPTR).forw = newobj
  1891. #
  1892. # Change the original pointer to this object.
  1893. root.address[0] = newobj
  1894. #
  1895. # Add the newobj to the list 'old_objects_pointing_to_young',
  1896. # because it can contain further pointers to other young objects.
  1897. # We will fix such references to point to the copy of the young
  1898. # objects when we walk 'old_objects_pointing_to_young'.
  1899. if self.has_gcptr(typeid):
  1900. # we only have to do it if we have any gcptrs
  1901. self.old_objects_pointing_to_young.append(newobj)
  1902. _trace_drag_out._always_inline_ = True
  1903. def _visit_young_rawmalloced_object(self, obj):
  1904. # 'obj' points to a young, raw-malloced object.
  1905. # Any young rawmalloced object never seen by the code here
  1906. # will end up without GCFLAG_VISITED_RMY, and be freed at the
  1907. # end of the current minor collection. Note that there was
  1908. # a bug in which dying young arrays with card marks would
  1909. # still be scanned before being freed, keeping a lot of
  1910. # objects unnecessarily alive.
  1911. hdr = self.header(obj)
  1912. if hdr.tid & GCFLAG_VISITED_RMY:
  1913. return
  1914. hdr.tid |= GCFLAG_VISITED_RMY
  1915. #
  1916. # Accounting
  1917. size_gc_header = self.gcheaderbuilder.size_gc_header
  1918. size = size_gc_header + self.get_size(obj)
  1919. self.size_objects_made_old += r_uint(raw_malloc_usage(size))
  1920. #
  1921. # we just made 'obj' old, so we need to add it to the correct lists
  1922. added_somewhere = False
  1923. #
  1924. if hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
  1925. self.old_objects_pointing_to_young.append(obj)
  1926. added_somewhere = True
  1927. #
  1928. if hdr.tid & GCFLAG_HAS_CARDS != 0:
  1929. ll_assert(hdr.tid & GCFLAG_CARDS_SET != 0,
  1930. "young array: GCFLAG_HAS_CARDS without GCFLAG_CARDS_SET")
  1931. self.old_objects_with_cards_set.append(obj)
  1932. added_somewhere = True
  1933. #
  1934. ll_assert(added_somewhere, "wrong flag combination on young array")
  1935. def _malloc_out_of_nursery(self, totalsize):
  1936. """Allocate non-movable memory for an object of the given
  1937. 'totalsize' that lives so far in the nursery."""
  1938. if raw_malloc_usage(totalsize) <= self.small_request_threshold:
  1939. # most common path
  1940. return self.ac.malloc(totalsize)
  1941. else:
  1942. # for nursery objects that are not small
  1943. return self._malloc_out_of_nursery_nonsmall(totalsize)
  1944. _malloc_out_of_nursery._always_inline_ = True
  1945. def _malloc_out_of_nursery_nonsmall(self, totalsize):
  1946. # 'totalsize' should be aligned.
  1947. ll_assert(raw_malloc_usage(totalsize) & (WORD-1) == 0,
  1948. "misaligned totalsize in _malloc_out_of_nursery_nonsmall")
  1949. #
  1950. arena = llarena.arena_malloc(raw_malloc_usage(totalsize), False)
  1951. if not arena:
  1952. out_of_memory("out of memory: couldn't allocate a few KB more")
  1953. llarena.arena_reserve(arena, totalsize)
  1954. #
  1955. size_gc_header = self.gcheaderbuilder.size_gc_header
  1956. self.rawmalloced_total_size += r_uint(raw_malloc_usage(totalsize))
  1957. self.old_rawmalloced_objects.append(arena + size_gc_header)
  1958. return arena
  1959. def free_young_rawmalloced_objects(self):
  1960. self.young_rawmalloced_objects.foreach(
  1961. self._free_young_rawmalloced_obj, None)
  1962. self.young_rawmalloced_objects.delete()
  1963. self.young_rawmalloced_objects = self.null_address_dict()
  1964. def _free_young_rawmalloced_obj(self, obj, ignored1, ignored2):
  1965. # If 'obj' has GCFLAG_VISITED_RMY, it was seen by _trace_drag_out
  1966. # and survives. Otherwise, it dies.
  1967. self.free_rawmalloced_object_if_unvisited(obj, GCFLAG_VISITED_RMY)
  1968. def remove_young_arrays_from_old_objects_pointing_to_young(self):
  1969. old = self.old_objects_pointing_to_young
  1970. new = self.AddressStack()
  1971. while old.non_empty():
  1972. obj = old.pop()
  1973. if not self.young_rawmalloced_objects.contains(obj):
  1974. new.append(obj)
  1975. # an extra copy, to avoid assignments to
  1976. # 'self.old_objects_pointing_to_young'
  1977. while new.non_empty():
  1978. old.append(new.pop())
  1979. new.delete()
  1980. def _add_to_more_objects_to_trace(self, obj, ignored):
  1981. ll_assert(not self.is_in_nursery(obj), "unexpected nursery obj here")
  1982. self.header(obj).tid &= ~GCFLAG_VISITED
  1983. self.more_objects_to_trace.append(obj)
  1984. def _add_to_more_objects_to_trace_if_black(self, obj, ignored):
  1985. if self.header(obj).tid & GCFLAG_VISITED:
  1986. self._add_to_more_objects_to_trace(obj, ignored)
  1987. def minor_and_major_collection(self):
  1988. # First, finish the current major gc, if there is one in progress.
  1989. # This is a no-op if the gc_state is already STATE_SCANNING.
  1990. self.gc_step_until(STATE_SCANNING)
  1991. #
  1992. # Then do a complete collection again.
  1993. self.gc_step_until(STATE_MARKING)
  1994. self.gc_step_until(STATE_SCANNING)
  1995. def gc_step_until(self, state):
  1996. while self.gc_state != state:
  1997. self._minor_collection()
  1998. self.major_collection_step()
  1999. debug_gc_step_until = gc_step_until # xxx
  2000. def debug_gc_step(self, n=1):
  2001. while n > 0:
  2002. self._minor_collection()
  2003. self.major_collection_step()
  2004. n -= 1
  2005. # Note - minor collections seem fast enough so that one
  2006. # is done before every major collection step
  2007. def major_collection_step(self, reserving_size=0):
  2008. debug_start("gc-collect-step")
  2009. debug_print("starting gc state: ", GC_STATES[self.gc_state])
  2010. # Debugging checks
  2011. if self.pinned_objects_in_nursery == 0:
  2012. ll_assert(self.nursery_free == self.nursery,
  2013. "nursery not empty in major_collection_step()")
  2014. else:
  2015. # XXX try to add some similar check to the above one for the case
  2016. # that the nursery still contains some pinned objects (groggi)
  2017. pass
  2018. self.debug_check_consistency()
  2019. #
  2020. # 'threshold_objects_made_old', is used inside comparisons
  2021. # with 'size_objects_made_old' to know when we must do
  2022. # several major GC steps (i.e. several consecurive calls
  2023. # to the present function). Here is the target that
  2024. # we try to aim to: either (A1) or (A2)
  2025. #
  2026. # (A1) gc_state == STATE_SCANNING (i.e. major GC cycle ended)
  2027. # (A2) size_objects_made_old <= threshold_objects_made_old
  2028. #
  2029. # Every call to major_collection_step() adds nursery_size//2
  2030. # to 'threshold_objects_made_old'.
  2031. # In the common case, this is larger than the size of all
  2032. # objects that survive a minor collection. After a few
  2033. # minor collections (each followed by one call to
  2034. # major_collection_step()) the threshold is much higher than
  2035. # the 'size_objects_made_old', making the target invariant (A2)
  2036. # true by a large margin.
  2037. #
  2038. # However there are less common cases:
  2039. #
  2040. # * if more than half of the nursery consistently survives:
  2041. # then we need two calls to major_collection_step() after
  2042. # some minor collection;
  2043. #
  2044. # * or if we're allocating a large number of bytes in
  2045. # external_malloc() and some of them survive the following
  2046. # minor collection. In that case, more than two major
  2047. # collection steps must be done immediately, until we
  2048. # restore the target invariant (A2).
  2049. #
  2050. self.threshold_objects_made_old += r_uint(self.nursery_size // 2)
  2051. if self.gc_state == STATE_SCANNING:
  2052. # starting a major GC cycle: reset these two counters
  2053. self.size_objects_made_old = r_uint(0)
  2054. self.threshold_objects_made_old = r_uint(self.nursery_size // 2)
  2055. self.objects_to_trace = self.AddressStack()
  2056. self.collect_roots()
  2057. self.gc_state = STATE_MARKING
  2058. self.more_objects_to_trace = self.AddressStack()
  2059. #END SCANNING
  2060. elif self.gc_state == STATE_MARKING:
  2061. debug_print("number of objects to mark",
  2062. self.objects_to_trace.length(),
  2063. "plus",
  2064. self.more_objects_to_trace.length())
  2065. estimate = self.gc_increment_step
  2066. estimate_from_nursery = self.nursery_surviving_size * 2
  2067. if estimate_from_nursery > estimate:
  2068. estimate = estimate_from_nursery
  2069. estimate = intmask(estimate)
  2070. remaining = self.visit_all_objects_step(estimate)
  2071. #
  2072. if remaining >= estimate // 2:
  2073. if self.more_objects_to_trace.non_empty():
  2074. # We consumed less than 1/2 of our step's time, and
  2075. # there are more objects added during the marking steps
  2076. # of this major collection. Visit them all now.
  2077. # The idea is to ensure termination at the cost of some
  2078. # incrementality, in theory.
  2079. swap = self.objects_to_trace
  2080. self.objects_to_trace = self.more_objects_to_trace
  2081. self.more_objects_to_trace = swap
  2082. self.visit_all_objects()
  2083. # XXX A simplifying assumption that should be checked,
  2084. # finalizers/weak references are rare and short which means that
  2085. # they do not need a separate state and do not need to be
  2086. # made incremental.
  2087. # For now, the same applies to rawrefcount'ed objects.
  2088. if (not self.objects_to_trace.non_empty() and
  2089. not self.more_objects_to_trace.non_empty()):
  2090. #
  2091. # First, 'prebuilt_root_objects' might have grown since
  2092. # we scanned it in collect_roots() (rare case). Rescan.
  2093. self.collect_nonstack_roots()
  2094. self.visit_all_objects()
  2095. #
  2096. if self.rrc_enabled:
  2097. self.rrc_major_collection_trace()
  2098. #
  2099. ll_assert(not (self.probably_young_objects_with_finalizers
  2100. .non_empty()),
  2101. "probably_young_objects_with_finalizers should be empty")
  2102. if self.old_objects_with_finalizers.non_empty():
  2103. self.deal_with_objects_with_finalizers()
  2104. elif self.old_objects_with_weakrefs.non_empty():
  2105. # Weakref support: clear the weak pointers to dying objects
  2106. # (if we call deal_with_objects_with_finalizers(), it will
  2107. # invoke invalidate_old_weakrefs() itself directly)
  2108. self.invalidate_old_weakrefs()
  2109. ll_assert(not self.objects_to_trace.non_empty(),
  2110. "objects_to_trace should be empty")
  2111. ll_assert(not self.more_objects_to_trace.non_empty(),
  2112. "more_objects_to_trace should be empty")
  2113. self.objects_to_trace.delete()
  2114. self.more_objects_to_trace.delete()
  2115. #
  2116. # Destructors
  2117. if self.old_objects_with_destructors.non_empty():
  2118. self.deal_with_old_objects_with_destructors()
  2119. # objects_to_trace processed fully, can move on to sweeping
  2120. self.ac.mass_free_prepare()
  2121. self.start_free_rawmalloc_objects()
  2122. #
  2123. # get rid of objects pointing to pinned objects that were not
  2124. # visited
  2125. if self.old_objects_pointing_to_pinned.non_empty():
  2126. new_old_objects_pointing_to_pinned = self.AddressStack()
  2127. self.old_objects_pointing_to_pinned.foreach(
  2128. self._sweep_old_objects_pointing_to_pinned,
  2129. new_old_objects_pointing_to_pinned)
  2130. self.old_objects_pointing_to_pinned.delete()
  2131. self.old_objects_pointing_to_pinned = \
  2132. new_old_objects_pointing_to_pinned
  2133. self.updated_old_objects_pointing_to_pinned = True
  2134. #
  2135. if self.rrc_enabled:
  2136. self.rrc_major_collection_free()
  2137. #
  2138. self.gc_state = STATE_SWEEPING
  2139. #END MARKING
  2140. elif self.gc_state == STATE_SWEEPING:
  2141. #
  2142. if self.raw_malloc_might_sweep.non_empty():
  2143. # Walk all rawmalloced objects and free the ones that don't
  2144. # have the GCFLAG_VISITED flag. Visit at most 'limit' objects.
  2145. # This limit is conservatively high enough to guarantee that
  2146. # a total object size of at least '3 * nursery_size' bytes
  2147. # is processed.
  2148. limit = 3 * self.nursery_size // self.small_request_threshold
  2149. self.free_unvisited_rawmalloc_objects_step(limit)
  2150. done = False # the 2nd half below must still be done
  2151. else:
  2152. # Ask the ArenaCollection to visit a fraction of the objects.
  2153. # Free the ones that have not been visited above, and reset
  2154. # GCFLAG_VISITED on the others. Visit at most '3 *
  2155. # nursery_size' bytes.
  2156. limit = 3 * self.nursery_size // self.ac.page_size
  2157. done = self.ac.mass_free_incremental(self._free_if_unvisited,
  2158. limit)
  2159. # XXX tweak the limits above
  2160. #
  2161. if done:
  2162. self.num_major_collects += 1
  2163. #
  2164. # We also need to reset the GCFLAG_VISITED on prebuilt GC objects.
  2165. self.prebuilt_root_objects.foreach(self._reset_gcflag_visited, None)
  2166. #
  2167. # Set the threshold for the next major collection to be when we
  2168. # have allocated 'major_collection_threshold' times more than
  2169. # we currently have -- but no more than 'max_delta' more than
  2170. # we currently have.
  2171. total_memory_used = float(self.get_total_memory_used())
  2172. bounded = self.set_major_threshold_from(
  2173. min(total_memory_used * self.major_collection_threshold,
  2174. total_memory_used + self.max_delta),
  2175. reserving_size)
  2176. #
  2177. # Max heap size: gives an upper bound on the threshold. If we
  2178. # already have at least this much allocated, raise MemoryError.
  2179. if bounded and self.threshold_reached(reserving_size):
  2180. #
  2181. # First raise MemoryError, giving the program a chance to
  2182. # quit cleanly. It might still allocate in the nursery,
  2183. # which might eventually be emptied, triggering another
  2184. # major collect and (possibly) reaching here again with an
  2185. # even higher memory consumption. To prevent it, if it's
  2186. # the second time we are here, then abort the program.
  2187. if self.max_heap_size_already_raised:
  2188. out_of_memory("using too much memory, aborting")
  2189. self.max_heap_size_already_raised = True
  2190. self.gc_state = STATE_SCANNING
  2191. raise MemoryError
  2192. self.gc_state = STATE_FINALIZING
  2193. # FINALIZING not yet incrementalised
  2194. # but it seems safe to allow mutator to run after sweeping and
  2195. # before finalizers are called. This is because run_finalizers
  2196. # is a different list to objects_with_finalizers.
  2197. # END SWEEPING
  2198. elif self.gc_state == STATE_FINALIZING:
  2199. # XXX This is considered rare,
  2200. # so should we make the calling incremental? or leave as is
  2201. # Must be ready to start another scan
  2202. # just in case finalizer calls collect again.
  2203. self.gc_state = STATE_SCANNING
  2204. self.execute_finalizers()
  2205. #END FINALIZING
  2206. else:
  2207. pass #XXX which exception to raise here. Should be unreachable.
  2208. debug_print("stopping, now in gc state: ", GC_STATES[self.gc_state])
  2209. debug_stop("gc-collect-step")
  2210. def _sweep_old_objects_pointing_to_pinned(self, obj, new_list):
  2211. if self.header(obj).tid & GCFLAG_VISITED:
  2212. new_list.append(obj)
  2213. def _free_if_unvisited(self, hdr):
  2214. size_gc_header = self.gcheaderbuilder.size_gc_header
  2215. obj = hdr + size_gc_header
  2216. if self.header(obj).tid & GCFLAG_VISITED:
  2217. self.header(obj).tid &= ~GCFLAG_VISITED
  2218. return False # survives
  2219. return True # dies
  2220. def _reset_gcflag_visited(self, obj, ignored):
  2221. self.header(obj).tid &= ~GCFLAG_VISITED
  2222. def free_rawmalloced_object_if_unvisited(self, obj, check_flag):
  2223. if self.header(obj).tid & check_flag:
  2224. self.header(obj).tid &= ~check_flag # survives
  2225. self.old_rawmalloced_objects.append(obj)
  2226. else:
  2227. size_gc_header = self.gcheaderbuilder.size_gc_header
  2228. totalsize = size_gc_header + self.get_size(obj)
  2229. allocsize = raw_malloc_usage(totalsize)
  2230. arena = llarena.getfakearenaaddress(obj - size_gc_header)
  2231. #
  2232. # Must also include the card marker area, if any
  2233. if (self.card_page_indices > 0 # <- this is constant-folded
  2234. and self.header(obj).tid & GCFLAG_HAS_CARDS):
  2235. #
  2236. # Get the length and compute the number of extra bytes
  2237. typeid = self.get_type_id(obj)
  2238. ll_assert(self.has_gcptr_in_varsize(typeid),
  2239. "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
  2240. offset_to_length = self.varsize_offset_to_length(typeid)
  2241. length = (obj + offset_to_length).signed[0]
  2242. extra_words = self.card_marking_words_for_length(length)
  2243. arena -= extra_words * WORD
  2244. allocsize += extra_words * WORD
  2245. #
  2246. llarena.arena_free(arena)
  2247. self.rawmalloced_total_size -= r_uint(allocsize)
  2248. def start_free_rawmalloc_objects(self):
  2249. ll_assert(not self.raw_malloc_might_sweep.non_empty(),
  2250. "raw_malloc_might_sweep must be empty")
  2251. swap = self.raw_malloc_might_sweep
  2252. self.raw_malloc_might_sweep = self.old_rawmalloced_objects
  2253. self.old_rawmalloced_objects = swap
  2254. # Returns true when finished processing objects
  2255. def free_unvisited_rawmalloc_objects_step(self, nobjects):
  2256. while self.raw_malloc_might_sweep.non_empty() and nobjects > 0:
  2257. obj = self.raw_malloc_might_sweep.pop()
  2258. self.free_rawmalloced_object_if_unvisited(obj, GCFLAG_VISITED)
  2259. nobjects -= 1
  2260. return nobjects
  2261. def collect_nonstack_roots(self):
  2262. # Non-stack roots: first, the objects from 'prebuilt_root_objects'
  2263. self.prebuilt_root_objects.foreach(self._collect_obj, None)
  2264. #
  2265. # Add the roots from static prebuilt non-gc structures
  2266. self.root_walker.walk_roots(
  2267. None,
  2268. IncrementalMiniMarkGC._collect_ref_stk,
  2269. None) # we don't need the static in all prebuilt gc objects
  2270. #
  2271. # If we are in an inner collection caused by a call to a finalizer,
  2272. # the 'run_finalizers' objects also need to be kept alive.
  2273. self.enum_pending_finalizers(self._collect_obj, None)
  2274. def collect_roots(self):
  2275. # Collect all roots. Starts from the non-stack roots.
  2276. self.collect_nonstack_roots()
  2277. #
  2278. # Add the stack roots.
  2279. self.root_walker.walk_roots(
  2280. IncrementalMiniMarkGC._collect_ref_stk, # stack roots
  2281. None,
  2282. None)
  2283. def enumerate_all_roots(self, callback, arg):
  2284. self.prebuilt_root_objects.foreach(callback, arg)
  2285. MovingGCBase.enumerate_all_roots(self, callback, arg)
  2286. enumerate_all_roots._annspecialcase_ = 'specialize:arg(1)'
  2287. def _collect_obj(self, obj, ignored):
  2288. # Ignore pinned objects, which are the ones still in the nursery here.
  2289. # Cache effects: don't read any flag out of 'obj' at this point.
  2290. # But only checking if it is in the nursery or not is fine.
  2291. llop.debug_nonnull_pointer(lltype.Void, obj)
  2292. if not self.is_in_nursery(obj):
  2293. self.objects_to_trace.append(obj)
  2294. else:
  2295. # A pinned object can be found here. Such an object is handled
  2296. # by minor collections and shouldn't be specially handled by
  2297. # major collections. Therefore we only add non-pinned objects
  2298. # to the 'objects_to_trace' list.
  2299. ll_assert(self._is_pinned(obj),
  2300. "non-pinned nursery obj in _collect_obj")
  2301. _collect_obj._always_inline_ = True
  2302. def _collect_ref_stk(self, root):
  2303. self._collect_obj(root.address[0], None)
  2304. def _collect_ref_rec(self, root, ignored):
  2305. self._collect_obj(root.address[0], None)
  2306. def visit_all_objects(self):
  2307. while self.objects_to_trace.non_empty():
  2308. self.visit_all_objects_step(sys.maxint)
  2309. TEST_VISIT_SINGLE_STEP = False # for tests
  2310. def visit_all_objects_step(self, size_to_track):
  2311. # Objects can be added to pending by visit
  2312. pending = self.objects_to_trace
  2313. while pending.non_empty():
  2314. obj = pending.pop()
  2315. size_to_track -= self.visit(obj)
  2316. if size_to_track < 0 or self.TEST_VISIT_SINGLE_STEP:
  2317. return 0
  2318. return size_to_track
  2319. def visit(self, obj):
  2320. #
  2321. # 'obj' is a live object. Check GCFLAG_VISITED to know if we
  2322. # have already seen it before.
  2323. #
  2324. # Moreover, we can ignore prebuilt objects with GCFLAG_NO_HEAP_PTRS.
  2325. # If they have this flag set, then they cannot point to heap
  2326. # objects, so ignoring them is fine. If they don't have this
  2327. # flag set, then the object should be in 'prebuilt_root_objects',
  2328. # and the GCFLAG_VISITED will be reset at the end of the
  2329. # collection.
  2330. # We shouldn't see an object with GCFLAG_PINNED here (the pinned
  2331. # objects are never added to 'objects_to_trace'). The same-valued
  2332. # flag GCFLAG_PINNED_OBJECT_PARENT_KNOWN is used during minor
  2333. # collections and shouldn't be set here either.
  2334. #
  2335. hdr = self.header(obj)
  2336. ll_assert((hdr.tid & GCFLAG_PINNED) == 0,
  2337. "pinned object in 'objects_to_trace'")
  2338. ll_assert(not self.is_in_nursery(obj),
  2339. "nursery object in 'objects_to_trace'")
  2340. if hdr.tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS):
  2341. return 0
  2342. #
  2343. # It's the first time. We set the flag VISITED. The trick is
  2344. # to also set TRACK_YOUNG_PTRS here, for the write barrier.
  2345. hdr.tid |= GCFLAG_VISITED | GCFLAG_TRACK_YOUNG_PTRS
  2346. if self.has_gcptr(llop.extract_ushort(llgroup.HALFWORD, hdr.tid)):
  2347. #
  2348. # Trace the content of the object and put all objects it references
  2349. # into the 'objects_to_trace' list.
  2350. self.trace(obj, self._collect_ref_rec, None)
  2351. size_gc_header = self.gcheaderbuilder.size_gc_header
  2352. totalsize = size_gc_header + self.get_size(obj)
  2353. return raw_malloc_usage(totalsize)
  2354. # ----------
  2355. # id() and identityhash() support
  2356. def _allocate_shadow(self, obj):
  2357. size_gc_header = self.gcheaderbuilder.size_gc_header
  2358. size = self.get_size(obj)
  2359. shadowhdr = self._malloc_out_of_nursery(size_gc_header +
  2360. size)
  2361. # Initialize the shadow enough to be considered a
  2362. # valid gc object. If the original object stays
  2363. # alive at the next minor collection, it will anyway
  2364. # be copied over the shadow and overwrite the
  2365. # following fields. But if the object dies, then
  2366. # the shadow will stay around and only be freed at
  2367. # the next major collection, at which point we want
  2368. # it to look valid (but ready to be freed).
  2369. shadow = shadowhdr + size_gc_header
  2370. self.header(shadow).tid = self.header(obj).tid
  2371. typeid = self.get_type_id(obj)
  2372. if self.is_varsize(typeid):
  2373. lenofs = self.varsize_offset_to_length(typeid)
  2374. (shadow + lenofs).signed[0] = (obj + lenofs).signed[0]
  2375. #
  2376. self.header(obj).tid |= GCFLAG_HAS_SHADOW
  2377. self.nursery_objects_shadows.setitem(obj, shadow)
  2378. return shadow
  2379. def _find_shadow(self, obj):
  2380. #
  2381. # The object is not a tagged pointer, and it is still in the
  2382. # nursery. Find or allocate a "shadow" object, which is
  2383. # where the object will be moved by the next minor
  2384. # collection
  2385. if self.header(obj).tid & GCFLAG_HAS_SHADOW:
  2386. shadow = self.nursery_objects_shadows.get(obj)
  2387. ll_assert(shadow != llmemory.NULL,
  2388. "GCFLAG_HAS_SHADOW but no shadow found")
  2389. else:
  2390. shadow = self._allocate_shadow(obj)
  2391. #
  2392. # The answer is the address of the shadow.
  2393. return shadow
  2394. _find_shadow._dont_inline_ = True
  2395. @specialize.arg(2)
  2396. def id_or_identityhash(self, gcobj, is_hash):
  2397. """Implement the common logic of id() and identityhash()
  2398. of an object, given as a GCREF.
  2399. """
  2400. obj = llmemory.cast_ptr_to_adr(gcobj)
  2401. #
  2402. if self.is_valid_gc_object(obj):
  2403. if self.is_in_nursery(obj):
  2404. obj = self._find_shadow(obj)
  2405. elif is_hash:
  2406. if self.header(obj).tid & GCFLAG_HAS_SHADOW:
  2407. #
  2408. # For identityhash(), we need a special case for some
  2409. # prebuilt objects: their hash must be the same before
  2410. # and after translation. It is stored as an extra word
  2411. # after the object. But we cannot use it for id()
  2412. # because the stored value might clash with a real one.
  2413. size = self.get_size(obj)
  2414. i = (obj + size).signed[0]
  2415. # Important: the returned value is not mangle_hash()ed!
  2416. return i
  2417. #
  2418. i = llmemory.cast_adr_to_int(obj)
  2419. if is_hash:
  2420. i = mangle_hash(i)
  2421. return i
  2422. id_or_identityhash._always_inline_ = True
  2423. def id(self, gcobj):
  2424. return self.id_or_identityhash(gcobj, False)
  2425. def identityhash(self, gcobj):
  2426. return self.id_or_identityhash(gcobj, True)
  2427. # ----------
  2428. # Finalizers
  2429. def deal_with_young_objects_with_destructors(self):
  2430. """We can reasonably assume that destructors don't do
  2431. anything fancy and *just* call them. Among other things
  2432. they won't resurrect objects
  2433. """
  2434. while self.young_objects_with_destructors.non_empty():
  2435. obj = self.young_objects_with_destructors.pop()
  2436. if not self.is_forwarded(obj):
  2437. self.call_destructor(obj)
  2438. else:
  2439. obj = self.get_forwarding_address(obj)
  2440. self.old_objects_with_destructors.append(obj)
  2441. def deal_with_old_objects_with_destructors(self):
  2442. """We can reasonably assume that destructors don't do
  2443. anything fancy and *just* call them. Among other things
  2444. they won't resurrect objects
  2445. """
  2446. new_objects = self.AddressStack()
  2447. while self.old_objects_with_destructors.non_empty():
  2448. obj = self.old_objects_with_destructors.pop()
  2449. if self.header(obj).tid & GCFLAG_VISITED:
  2450. # surviving
  2451. new_objects.append(obj)
  2452. else:
  2453. # dying
  2454. self.call_destructor(obj)
  2455. self.old_objects_with_destructors.delete()
  2456. self.old_objects_with_destructors = new_objects
  2457. def deal_with_young_objects_with_finalizers(self):
  2458. while self.probably_young_objects_with_finalizers.non_empty():
  2459. obj = self.probably_young_objects_with_finalizers.popleft()
  2460. fq_nr = self.probably_young_objects_with_finalizers.popleft()
  2461. self.singleaddr.address[0] = obj
  2462. self._trace_drag_out1(self.singleaddr)
  2463. obj = self.singleaddr.address[0]
  2464. self.old_objects_with_finalizers.append(obj)
  2465. self.old_objects_with_finalizers.append(fq_nr)
  2466. def deal_with_objects_with_finalizers(self):
  2467. # Walk over list of objects with finalizers.
  2468. # If it is not surviving, add it to the list of to-be-called
  2469. # finalizers and make it survive, to make the finalizer runnable.
  2470. # We try to run the finalizers in a "reasonable" order, like
  2471. # CPython does. The details of this algorithm are in
  2472. # pypy/doc/discussion/finalizer-order.txt.
  2473. new_with_finalizer = self.AddressDeque()
  2474. marked = self.AddressDeque()
  2475. pending = self.AddressStack()
  2476. self.tmpstack = self.AddressStack()
  2477. while self.old_objects_with_finalizers.non_empty():
  2478. x = self.old_objects_with_finalizers.popleft()
  2479. fq_nr = self.old_objects_with_finalizers.popleft()
  2480. ll_assert(self._finalization_state(x) != 1,
  2481. "bad finalization state 1")
  2482. if self.header(x).tid & GCFLAG_VISITED:
  2483. new_with_finalizer.append(x)
  2484. new_with_finalizer.append(fq_nr)
  2485. continue
  2486. marked.append(x)
  2487. marked.append(fq_nr)
  2488. pending.append(x)
  2489. while pending.non_empty():
  2490. y = pending.pop()
  2491. state = self._finalization_state(y)
  2492. if state == 0:
  2493. self._bump_finalization_state_from_0_to_1(y)
  2494. self.trace(y, self._append_if_nonnull, pending)
  2495. elif state == 2:
  2496. self._recursively_bump_finalization_state_from_2_to_3(y)
  2497. self._recursively_bump_finalization_state_from_1_to_2(x)
  2498. # Clear the weak pointers to dying objects. Also clears them if
  2499. # they point to objects which have the GCFLAG_FINALIZATION_ORDERING
  2500. # bit set here. These are objects which will be added to
  2501. # run_finalizers().
  2502. self.invalidate_old_weakrefs()
  2503. while marked.non_empty():
  2504. x = marked.popleft()
  2505. fq_nr = marked.popleft()
  2506. state = self._finalization_state(x)
  2507. ll_assert(state >= 2, "unexpected finalization state < 2")
  2508. if state == 2:
  2509. from rpython.rtyper.lltypesystem import rffi
  2510. fq_index = rffi.cast(lltype.Signed, fq_nr)
  2511. self.mark_finalizer_to_run(fq_index, x)
  2512. # we must also fix the state from 2 to 3 here, otherwise
  2513. # we leave the GCFLAG_FINALIZATION_ORDERING bit behind
  2514. # which will confuse the next collection
  2515. self._recursively_bump_finalization_state_from_2_to_3(x)
  2516. else:
  2517. new_with_finalizer.append(x)
  2518. new_with_finalizer.append(fq_nr)
  2519. self.tmpstack.delete()
  2520. pending.delete()
  2521. marked.delete()
  2522. self.old_objects_with_finalizers.delete()
  2523. self.old_objects_with_finalizers = new_with_finalizer
  2524. def _append_if_nonnull(pointer, stack):
  2525. stack.append(pointer.address[0])
  2526. _append_if_nonnull = staticmethod(_append_if_nonnull)
  2527. def _finalization_state(self, obj):
  2528. tid = self.header(obj).tid
  2529. if tid & GCFLAG_VISITED:
  2530. if tid & GCFLAG_FINALIZATION_ORDERING:
  2531. return 2
  2532. else:
  2533. return 3
  2534. else:
  2535. if tid & GCFLAG_FINALIZATION_ORDERING:
  2536. return 1
  2537. else:
  2538. return 0
  2539. def _bump_finalization_state_from_0_to_1(self, obj):
  2540. ll_assert(self._finalization_state(obj) == 0,
  2541. "unexpected finalization state != 0")
  2542. hdr = self.header(obj)
  2543. hdr.tid |= GCFLAG_FINALIZATION_ORDERING
  2544. def _recursively_bump_finalization_state_from_2_to_3(self, obj):
  2545. ll_assert(self._finalization_state(obj) == 2,
  2546. "unexpected finalization state != 2")
  2547. pending = self.tmpstack
  2548. ll_assert(not pending.non_empty(), "tmpstack not empty")
  2549. pending.append(obj)
  2550. while pending.non_empty():
  2551. y = pending.pop()
  2552. hdr = self.header(y)
  2553. if hdr.tid & GCFLAG_FINALIZATION_ORDERING: # state 2 ?
  2554. hdr.tid &= ~GCFLAG_FINALIZATION_ORDERING # change to state 3
  2555. self.trace(y, self._append_if_nonnull, pending)
  2556. def _recursively_bump_finalization_state_from_1_to_2(self, obj):
  2557. # recursively convert objects from state 1 to state 2.
  2558. # The call to visit_all_objects() will add the GCFLAG_VISITED
  2559. # recursively.
  2560. ll_assert(not self.is_in_nursery(obj), "pinned finalizer object??")
  2561. self.objects_to_trace.append(obj)
  2562. self.visit_all_objects()
  2563. # ----------
  2564. # Weakrefs
  2565. # XXX (groggi): weakref pointing to pinned object not supported.
  2566. # XXX (groggi): missing asserts/checks for the missing feature.
  2567. # The code relies on the fact that no weakref can be an old object
  2568. # weakly pointing to a young object. Indeed, weakrefs are immutable
  2569. # so they cannot point to an object that was created after it.
  2570. # Thanks to this, during a minor collection, we don't have to fix
  2571. # or clear the address stored in old weakrefs.
  2572. def invalidate_young_weakrefs(self):
  2573. """Called during a nursery collection."""
  2574. # walk over the list of objects that contain weakrefs and are in the
  2575. # nursery. if the object it references survives then update the
  2576. # weakref; otherwise invalidate the weakref
  2577. while self.young_objects_with_weakrefs.non_empty():
  2578. obj = self.young_objects_with_weakrefs.pop()
  2579. if not self.is_forwarded(obj):
  2580. continue # weakref itself dies
  2581. obj = self.get_forwarding_address(obj)
  2582. offset = self.weakpointer_offset(self.get_type_id(obj))
  2583. pointing_to = (obj + offset).address[0]
  2584. if self.is_in_nursery(pointing_to):
  2585. if self.is_forwarded(pointing_to):
  2586. (obj + offset).address[0] = self.get_forwarding_address(
  2587. pointing_to)
  2588. else:
  2589. # If the target is pinned, then we reach this point too.
  2590. # It means that a hypothetical RPython interpreter that
  2591. # would let you take a weakref to a pinned object (strange
  2592. # thing not possible at all in PyPy) might see these
  2593. # weakrefs marked as dead too early.
  2594. (obj + offset).address[0] = llmemory.NULL
  2595. continue # no need to remember this weakref any longer
  2596. #
  2597. elif (bool(self.young_rawmalloced_objects) and
  2598. self.young_rawmalloced_objects.contains(pointing_to)):
  2599. # young weakref to a young raw-malloced object
  2600. if self.header(pointing_to).tid & GCFLAG_VISITED_RMY:
  2601. pass # survives, but does not move
  2602. else:
  2603. (obj + offset).address[0] = llmemory.NULL
  2604. continue # no need to remember this weakref any longer
  2605. #
  2606. elif self.header(pointing_to).tid & GCFLAG_NO_HEAP_PTRS:
  2607. # see test_weakref_to_prebuilt: it's not useful to put
  2608. # weakrefs into 'old_objects_with_weakrefs' if they point
  2609. # to a prebuilt object (they are immortal). If moreover
  2610. # the 'pointing_to' prebuilt object still has the
  2611. # GCFLAG_NO_HEAP_PTRS flag, then it's even wrong, because
  2612. # 'pointing_to' will not get the GCFLAG_VISITED during
  2613. # the next major collection. Solve this by not registering
  2614. # the weakref into 'old_objects_with_weakrefs'.
  2615. continue
  2616. #
  2617. self.old_objects_with_weakrefs.append(obj)
  2618. def invalidate_old_weakrefs(self):
  2619. """Called during a major collection."""
  2620. # walk over list of objects that contain weakrefs
  2621. # if the object it references does not survive, invalidate the weakref
  2622. new_with_weakref = self.AddressStack()
  2623. while self.old_objects_with_weakrefs.non_empty():
  2624. obj = self.old_objects_with_weakrefs.pop()
  2625. if self.header(obj).tid & GCFLAG_VISITED == 0:
  2626. continue # weakref itself dies
  2627. offset = self.weakpointer_offset(self.get_type_id(obj))
  2628. pointing_to = (obj + offset).address[0]
  2629. ll_assert((self.header(pointing_to).tid & GCFLAG_NO_HEAP_PTRS)
  2630. == 0, "registered old weakref should not "
  2631. "point to a NO_HEAP_PTRS obj")
  2632. tid = self.header(pointing_to).tid
  2633. if ((tid & (GCFLAG_VISITED | GCFLAG_FINALIZATION_ORDERING)) ==
  2634. GCFLAG_VISITED):
  2635. new_with_weakref.append(obj)
  2636. else:
  2637. (obj + offset).address[0] = llmemory.NULL
  2638. self.old_objects_with_weakrefs.delete()
  2639. self.old_objects_with_weakrefs = new_with_weakref
  2640. # ----------
  2641. # RawRefCount
  2642. rrc_enabled = False
  2643. _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True})
  2644. PYOBJ_HDR = lltype.Struct('GCHdr_PyObject',
  2645. ('ob_refcnt', lltype.Signed),
  2646. ('ob_pypy_link', lltype.Signed))
  2647. PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR)
  2648. RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void))
  2649. def _pyobj(self, pyobjaddr):
  2650. return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR)
  2651. def rawrefcount_init(self, dealloc_trigger_callback):
  2652. # see pypy/doc/discussion/rawrefcount.rst
  2653. if not self.rrc_enabled:
  2654. self.rrc_p_list_young = self.AddressStack()
  2655. self.rrc_p_list_old = self.AddressStack()
  2656. self.rrc_o_list_young = self.AddressStack()
  2657. self.rrc_o_list_old = self.AddressStack()
  2658. self.rrc_p_dict = self.AddressDict() # non-nursery keys only
  2659. self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only
  2660. self.rrc_dealloc_trigger_callback = dealloc_trigger_callback
  2661. self.rrc_dealloc_pending = self.AddressStack()
  2662. self.rrc_enabled = True
  2663. def check_no_more_rawrefcount_state(self):
  2664. "NOT_RPYTHON: for tests"
  2665. assert self.rrc_p_list_young.length() == 0
  2666. assert self.rrc_p_list_old .length() == 0
  2667. assert self.rrc_o_list_young.length() == 0
  2668. assert self.rrc_o_list_old .length() == 0
  2669. def check_value_is_null(key, value, ignore):
  2670. assert value == llmemory.NULL
  2671. self.rrc_p_dict.foreach(check_value_is_null, None)
  2672. self.rrc_p_dict_nurs.foreach(check_value_is_null, None)
  2673. def rawrefcount_create_link_pypy(self, gcobj, pyobject):
  2674. ll_assert(self.rrc_enabled, "rawrefcount.init not called")
  2675. obj = llmemory.cast_ptr_to_adr(gcobj)
  2676. objint = llmemory.cast_adr_to_int(obj, "symbolic")
  2677. self._pyobj(pyobject).ob_pypy_link = objint
  2678. #
  2679. lst = self.rrc_p_list_young
  2680. if self.is_in_nursery(obj):
  2681. dct = self.rrc_p_dict_nurs
  2682. else:
  2683. dct = self.rrc_p_dict
  2684. if not self.is_young_object(obj):
  2685. lst = self.rrc_p_list_old
  2686. lst.append(pyobject)
  2687. dct.setitem(obj, pyobject)
  2688. def rawrefcount_create_link_pyobj(self, gcobj, pyobject):
  2689. ll_assert(self.rrc_enabled, "rawrefcount.init not called")
  2690. obj = llmemory.cast_ptr_to_adr(gcobj)
  2691. if self.is_young_object(obj):
  2692. self.rrc_o_list_young.append(pyobject)
  2693. else:
  2694. self.rrc_o_list_old.append(pyobject)
  2695. objint = llmemory.cast_adr_to_int(obj, "symbolic")
  2696. self._pyobj(pyobject).ob_pypy_link = objint
  2697. # there is no rrc_o_dict
  2698. def rawrefcount_from_obj(self, gcobj):
  2699. obj = llmemory.cast_ptr_to_adr(gcobj)
  2700. if self.is_in_nursery(obj):
  2701. dct = self.rrc_p_dict_nurs
  2702. else:
  2703. dct = self.rrc_p_dict
  2704. return dct.get(obj)
  2705. def rawrefcount_to_obj(self, pyobject):
  2706. obj = llmemory.cast_int_to_adr(self._pyobj(pyobject).ob_pypy_link)
  2707. return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
  2708. def rawrefcount_next_dead(self):
  2709. if self.rrc_dealloc_pending.non_empty():
  2710. return self.rrc_dealloc_pending.pop()
  2711. return llmemory.NULL
  2712. def rrc_invoke_callback(self):
  2713. if self.rrc_enabled and self.rrc_dealloc_pending.non_empty():
  2714. self.rrc_dealloc_trigger_callback()
  2715. def rrc_minor_collection_trace(self):
  2716. length_estimate = self.rrc_p_dict_nurs.length()
  2717. self.rrc_p_dict_nurs.delete()
  2718. self.rrc_p_dict_nurs = self.AddressDict(length_estimate)
  2719. self.rrc_p_list_young.foreach(self._rrc_minor_trace,
  2720. self.singleaddr)
  2721. def _rrc_minor_trace(self, pyobject, singleaddr):
  2722. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
  2723. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
  2724. #
  2725. rc = self._pyobj(pyobject).ob_refcnt
  2726. if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT:
  2727. pass # the corresponding object may die
  2728. else:
  2729. # force the corresponding object to be alive
  2730. intobj = self._pyobj(pyobject).ob_pypy_link
  2731. singleaddr.address[0] = llmemory.cast_int_to_adr(intobj)
  2732. self._trace_drag_out1(singleaddr)
  2733. def rrc_minor_collection_free(self):
  2734. ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 1")
  2735. lst = self.rrc_p_list_young
  2736. while lst.non_empty():
  2737. self._rrc_minor_free(lst.pop(), self.rrc_p_list_old,
  2738. self.rrc_p_dict)
  2739. lst = self.rrc_o_list_young
  2740. no_o_dict = self.null_address_dict()
  2741. while lst.non_empty():
  2742. self._rrc_minor_free(lst.pop(), self.rrc_o_list_old,
  2743. no_o_dict)
  2744. def _rrc_minor_free(self, pyobject, surviving_list, surviving_dict):
  2745. intobj = self._pyobj(pyobject).ob_pypy_link
  2746. obj = llmemory.cast_int_to_adr(intobj)
  2747. if self.is_in_nursery(obj):
  2748. if self.is_forwarded(obj):
  2749. # Common case: survives and moves
  2750. obj = self.get_forwarding_address(obj)
  2751. intobj = llmemory.cast_adr_to_int(obj, "symbolic")
  2752. self._pyobj(pyobject).ob_pypy_link = intobj
  2753. surviving = True
  2754. if surviving_dict:
  2755. # Surviving nursery object: was originally in
  2756. # rrc_p_dict_nurs and now must be put into rrc_p_dict
  2757. surviving_dict.setitem(obj, pyobject)
  2758. else:
  2759. surviving = False
  2760. elif (bool(self.young_rawmalloced_objects) and
  2761. self.young_rawmalloced_objects.contains(obj)):
  2762. # young weakref to a young raw-malloced object
  2763. if self.header(obj).tid & GCFLAG_VISITED_RMY:
  2764. surviving = True # survives, but does not move
  2765. else:
  2766. surviving = False
  2767. if surviving_dict:
  2768. # Dying young large object: was in rrc_p_dict,
  2769. # must be deleted
  2770. surviving_dict.setitem(obj, llmemory.NULL)
  2771. else:
  2772. ll_assert(False, "rrc_X_list_young contains non-young obj")
  2773. return
  2774. #
  2775. if surviving:
  2776. surviving_list.append(pyobject)
  2777. else:
  2778. self._rrc_free(pyobject)
  2779. def _rrc_free(self, pyobject):
  2780. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
  2781. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
  2782. #
  2783. rc = self._pyobj(pyobject).ob_refcnt
  2784. if rc >= REFCNT_FROM_PYPY_LIGHT:
  2785. rc -= REFCNT_FROM_PYPY_LIGHT
  2786. if rc == 0:
  2787. lltype.free(self._pyobj(pyobject), flavor='raw')
  2788. else:
  2789. # can only occur if LIGHT is used in create_link_pyobj()
  2790. self._pyobj(pyobject).ob_refcnt = rc
  2791. self._pyobj(pyobject).ob_pypy_link = 0
  2792. else:
  2793. ll_assert(rc >= REFCNT_FROM_PYPY, "refcount underflow?")
  2794. ll_assert(rc < int(REFCNT_FROM_PYPY_LIGHT * 0.99),
  2795. "refcount underflow from REFCNT_FROM_PYPY_LIGHT?")
  2796. rc -= REFCNT_FROM_PYPY
  2797. self._pyobj(pyobject).ob_pypy_link = 0
  2798. if rc == 0:
  2799. self.rrc_dealloc_pending.append(pyobject)
  2800. # an object with refcnt == 0 cannot stay around waiting
  2801. # for its deallocator to be called. Some code (lxml)
  2802. # expects that tp_dealloc is called immediately when
  2803. # the refcnt drops to 0. If it isn't, we get some
  2804. # uncleared raw pointer that can still be used to access
  2805. # the object; but (PyObject *)raw_pointer is then bogus
  2806. # because after a Py_INCREF()/Py_DECREF() on it, its
  2807. # tp_dealloc is also called!
  2808. rc = 1
  2809. self._pyobj(pyobject).ob_refcnt = rc
  2810. _rrc_free._always_inline_ = True
  2811. def rrc_major_collection_trace(self):
  2812. self.rrc_p_list_old.foreach(self._rrc_major_trace, None)
  2813. def _rrc_major_trace(self, pyobject, ignore):
  2814. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
  2815. from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
  2816. #
  2817. rc = self._pyobj(pyobject).ob_refcnt
  2818. if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT:
  2819. pass # the corresponding object may die
  2820. else:
  2821. # force the corresponding object to be alive
  2822. intobj = self._pyobj(pyobject).ob_pypy_link
  2823. obj = llmemory.cast_int_to_adr(intobj)
  2824. self.objects_to_trace.append(obj)
  2825. self.visit_all_objects()
  2826. def rrc_major_collection_free(self):
  2827. ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2")
  2828. length_estimate = self.rrc_p_dict.length()
  2829. self.rrc_p_dict.delete()
  2830. self.rrc_p_dict = new_p_dict = self.AddressDict(length_estimate)
  2831. new_p_list = self.AddressStack()
  2832. while self.rrc_p_list_old.non_empty():
  2833. self._rrc_major_free(self.rrc_p_list_old.pop(), new_p_list,
  2834. new_p_dict)
  2835. self.rrc_p_list_old.delete()
  2836. self.rrc_p_list_old = new_p_list
  2837. #
  2838. new_o_list = self.AddressStack()
  2839. no_o_dict = self.null_address_dict()
  2840. while self.rrc_o_list_old.non_empty():
  2841. self._rrc_major_free(self.rrc_o_list_old.pop(), new_o_list,
  2842. no_o_dict)
  2843. self.rrc_o_list_old.delete()
  2844. self.rrc_o_list_old = new_o_list
  2845. def _rrc_major_free(self, pyobject, surviving_list, surviving_dict):
  2846. # The pyobject survives if the corresponding obj survives.
  2847. # This is true if the obj has one of the following two flags:
  2848. # * GCFLAG_VISITED: was seen during tracing
  2849. # * GCFLAG_NO_HEAP_PTRS: immortal object never traced (so far)
  2850. intobj = self._pyobj(pyobject).ob_pypy_link
  2851. obj = llmemory.cast_int_to_adr(intobj)
  2852. if self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS):
  2853. surviving_list.append(pyobject)
  2854. if surviving_dict:
  2855. surviving_dict.insertclean(obj, pyobject)
  2856. else:
  2857. self._rrc_free(pyobject)