PageRenderTime 52ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/memory/gc/base.py

https://bitbucket.org/pypy/pypy/
Python | 541 lines | 467 code | 28 blank | 46 comment | 14 complexity | 92507d7ca1e6f2e1d31e94b90f03e32b MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, rffi
  2. from rpython.rtyper.lltypesystem.lloperation import llop
  3. from rpython.rlib.debug import ll_assert
  4. from rpython.memory.gcheader import GCHeaderBuilder
  5. from rpython.memory.support import DEFAULT_CHUNK_SIZE
  6. from rpython.memory.support import get_address_stack, get_address_deque
  7. from rpython.memory.support import AddressDict, null_address_dict
  8. from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
  9. from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance
  10. TYPEID_MAP = lltype.GcStruct('TYPEID_MAP', ('count', lltype.Signed),
  11. ('size', lltype.Signed),
  12. ('links', lltype.Array(lltype.Signed)))
  13. ARRAY_TYPEID_MAP = lltype.GcArray(lltype.Ptr(TYPEID_MAP))
  14. class GCBase(object):
  15. _alloc_flavor_ = "raw"
  16. moving_gc = False
  17. needs_write_barrier = False
  18. malloc_zero_filled = False
  19. prebuilt_gc_objects_are_static_roots = True
  20. can_usually_pin_objects = False
  21. object_minimal_size = 0
  22. gcflag_extra = 0 # or a real GC flag that is always 0 when not collecting
  23. def __init__(self, config, chunk_size=DEFAULT_CHUNK_SIZE,
  24. translated_to_c=True):
  25. self.gcheaderbuilder = GCHeaderBuilder(self.HDR)
  26. self.AddressStack = get_address_stack(chunk_size)
  27. self.AddressDeque = get_address_deque(chunk_size)
  28. self.AddressDict = AddressDict
  29. self.null_address_dict = null_address_dict
  30. self.config = config
  31. assert isinstance(translated_to_c, bool)
  32. self.translated_to_c = translated_to_c
  33. def setup(self):
  34. # all runtime mutable values' setup should happen here
  35. # and in its overriden versions! for the benefit of test_transformed_gc
  36. self.finalizer_lock = False
  37. self.run_old_style_finalizers = self.AddressDeque()
  38. def mark_finalizer_to_run(self, fq_index, obj):
  39. if fq_index == -1: # backward compatibility with old-style finalizer
  40. self.run_old_style_finalizers.append(obj)
  41. return
  42. handlers = self.finalizer_handlers()
  43. self._adr2deque(handlers[fq_index].deque).append(obj)
  44. def post_setup(self):
  45. # More stuff that needs to be initialized when the GC is already
  46. # fully working. (Only called by gctransform/framework for now.)
  47. from rpython.memory.gc import env
  48. self.DEBUG = env.read_from_env('PYPY_GC_DEBUG')
  49. def _teardown(self):
  50. pass
  51. def can_optimize_clean_setarrayitems(self):
  52. return True # False in case of card marking
  53. # The following flag enables costly consistency checks after each
  54. # collection. It is automatically set to True by test_gc.py. The
  55. # checking logic is translatable, so the flag can be set to True
  56. # here before translation. At run-time, if PYPY_GC_DEBUG is set,
  57. # then it is also set to True.
  58. DEBUG = False
  59. def set_query_functions(self, is_varsize, has_gcptr_in_varsize,
  60. is_gcarrayofgcptr,
  61. finalizer_handlers,
  62. destructor_or_custom_trace,
  63. is_old_style_finalizer,
  64. offsets_to_gc_pointers,
  65. fixed_size, varsize_item_sizes,
  66. varsize_offset_to_variable_part,
  67. varsize_offset_to_length,
  68. varsize_offsets_to_gcpointers_in_var_part,
  69. weakpointer_offset,
  70. member_index,
  71. is_rpython_class,
  72. has_custom_trace,
  73. fast_path_tracing,
  74. has_gcptr,
  75. cannot_pin):
  76. self.finalizer_handlers = finalizer_handlers
  77. self.destructor_or_custom_trace = destructor_or_custom_trace
  78. self.is_old_style_finalizer = is_old_style_finalizer
  79. self.is_varsize = is_varsize
  80. self.has_gcptr_in_varsize = has_gcptr_in_varsize
  81. self.is_gcarrayofgcptr = is_gcarrayofgcptr
  82. self.offsets_to_gc_pointers = offsets_to_gc_pointers
  83. self.fixed_size = fixed_size
  84. self.varsize_item_sizes = varsize_item_sizes
  85. self.varsize_offset_to_variable_part = varsize_offset_to_variable_part
  86. self.varsize_offset_to_length = varsize_offset_to_length
  87. self.varsize_offsets_to_gcpointers_in_var_part = varsize_offsets_to_gcpointers_in_var_part
  88. self.weakpointer_offset = weakpointer_offset
  89. self.member_index = member_index
  90. self.is_rpython_class = is_rpython_class
  91. self.has_custom_trace = has_custom_trace
  92. self.fast_path_tracing = fast_path_tracing
  93. self.has_gcptr = has_gcptr
  94. self.cannot_pin = cannot_pin
  95. def get_member_index(self, type_id):
  96. return self.member_index(type_id)
  97. def set_root_walker(self, root_walker):
  98. self.root_walker = root_walker
  99. def write_barrier(self, addr_struct):
  100. pass
  101. def size_gc_header(self, typeid=0):
  102. return self.gcheaderbuilder.size_gc_header
  103. def header(self, addr):
  104. addr -= self.gcheaderbuilder.size_gc_header
  105. return llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
  106. def _get_size_for_typeid(self, obj, typeid):
  107. size = self.fixed_size(typeid)
  108. if self.is_varsize(typeid):
  109. lenaddr = obj + self.varsize_offset_to_length(typeid)
  110. length = lenaddr.signed[0]
  111. size += length * self.varsize_item_sizes(typeid)
  112. size = llarena.round_up_for_allocation(size)
  113. # XXX maybe we should parametrize round_up_for_allocation()
  114. # per GC; if we do, we also need to fix the call in
  115. # gctypelayout.encode_type_shape()
  116. return size
  117. def get_size(self, obj):
  118. return self._get_size_for_typeid(obj, self.get_type_id(obj))
  119. def get_type_id_cast(self, obj):
  120. return rffi.cast(lltype.Signed, self.get_type_id(obj))
  121. def get_size_incl_hash(self, obj):
  122. return self.get_size(obj)
  123. def malloc(self, typeid, length=0, zero=False):
  124. """NOT_RPYTHON
  125. For testing. The interface used by the gctransformer is
  126. the four malloc_[fixed,var]size[_clear]() functions.
  127. """
  128. size = self.fixed_size(typeid)
  129. needs_finalizer = (bool(self.destructor_or_custom_trace(typeid))
  130. and not self.has_custom_trace(typeid))
  131. finalizer_is_light = (needs_finalizer and
  132. not self.is_old_style_finalizer(typeid))
  133. contains_weakptr = self.weakpointer_offset(typeid) >= 0
  134. assert not (needs_finalizer and contains_weakptr)
  135. if self.is_varsize(typeid):
  136. assert not contains_weakptr
  137. assert not needs_finalizer
  138. itemsize = self.varsize_item_sizes(typeid)
  139. offset_to_length = self.varsize_offset_to_length(typeid)
  140. if self.malloc_zero_filled:
  141. malloc_varsize = self.malloc_varsize_clear
  142. else:
  143. malloc_varsize = self.malloc_varsize
  144. ref = malloc_varsize(typeid, length, size, itemsize,
  145. offset_to_length)
  146. size += itemsize * length
  147. else:
  148. if self.malloc_zero_filled:
  149. malloc_fixedsize = self.malloc_fixedsize_clear
  150. else:
  151. malloc_fixedsize = self.malloc_fixedsize
  152. ref = malloc_fixedsize(typeid, size, needs_finalizer,
  153. finalizer_is_light,
  154. contains_weakptr)
  155. # lots of cast and reverse-cast around...
  156. ref = llmemory.cast_ptr_to_adr(ref)
  157. if zero and not self.malloc_zero_filled:
  158. llmemory.raw_memclear(ref, size)
  159. return ref
  160. def id(self, ptr):
  161. return lltype.cast_ptr_to_int(ptr)
  162. def can_move(self, addr):
  163. return False
  164. def malloc_fixed_or_varsize_nonmovable(self, typeid, length):
  165. raise MemoryError
  166. def pin(self, addr):
  167. return False
  168. def unpin(self, addr):
  169. pass
  170. def _is_pinned(self, addr):
  171. return False
  172. def set_max_heap_size(self, size):
  173. raise NotImplementedError
  174. def trace(self, obj, callback, arg):
  175. """Enumerate the locations inside the given obj that can contain
  176. GC pointers. For each such location, callback(pointer, arg) is
  177. called, where 'pointer' is an address inside the object.
  178. Typically, 'callback' is a bound method and 'arg' can be None.
  179. """
  180. typeid = self.get_type_id(obj)
  181. #
  182. # First, look if we need more than the simple fixed-size tracing
  183. if not self.fast_path_tracing(typeid):
  184. #
  185. # Yes. Two cases: either we are just a GcArray(gcptr), for
  186. # which we have a special case for performance, or we call
  187. # the slow path version.
  188. if self.is_gcarrayofgcptr(typeid):
  189. length = (obj + llmemory.gcarrayofptr_lengthoffset).signed[0]
  190. item = obj + llmemory.gcarrayofptr_itemsoffset
  191. while length > 0:
  192. if self.points_to_valid_gc_object(item):
  193. callback(item, arg)
  194. item += llmemory.gcarrayofptr_singleitemoffset
  195. length -= 1
  196. return
  197. self._trace_slow_path(obj, callback, arg)
  198. #
  199. # Do the tracing on the fixed-size part of the object.
  200. offsets = self.offsets_to_gc_pointers(typeid)
  201. i = 0
  202. while i < len(offsets):
  203. item = obj + offsets[i]
  204. if self.points_to_valid_gc_object(item):
  205. callback(item, arg)
  206. i += 1
  207. trace._annspecialcase_ = 'specialize:arg(2)'
  208. def _trace_slow_path(self, obj, callback, arg):
  209. typeid = self.get_type_id(obj)
  210. if self.has_gcptr_in_varsize(typeid):
  211. length = (obj + self.varsize_offset_to_length(typeid)).signed[0]
  212. if length > 0:
  213. item = obj + self.varsize_offset_to_variable_part(typeid)
  214. offsets = self.varsize_offsets_to_gcpointers_in_var_part(typeid)
  215. itemlength = self.varsize_item_sizes(typeid)
  216. len_offsets = len(offsets)
  217. if len_offsets == 1: # common path #1
  218. offsets0 = offsets[0]
  219. while length > 0:
  220. itemobj0 = item + offsets0
  221. if self.points_to_valid_gc_object(itemobj0):
  222. callback(itemobj0, arg)
  223. item += itemlength
  224. length -= 1
  225. elif len_offsets == 2: # common path #2
  226. offsets0 = offsets[0]
  227. offsets1 = offsets[1]
  228. while length > 0:
  229. itemobj0 = item + offsets0
  230. if self.points_to_valid_gc_object(itemobj0):
  231. callback(itemobj0, arg)
  232. itemobj1 = item + offsets1
  233. if self.points_to_valid_gc_object(itemobj1):
  234. callback(itemobj1, arg)
  235. item += itemlength
  236. length -= 1
  237. else: # general path
  238. while length > 0:
  239. j = 0
  240. while j < len_offsets:
  241. itemobj = item + offsets[j]
  242. if self.points_to_valid_gc_object(itemobj):
  243. callback(itemobj, arg)
  244. j += 1
  245. item += itemlength
  246. length -= 1
  247. if self.has_custom_trace(typeid):
  248. self.custom_trace_dispatcher(obj, typeid, callback, arg)
  249. _trace_slow_path._annspecialcase_ = 'specialize:arg(2)'
  250. def _trace_callback(self, callback, arg, addr):
  251. if self.is_valid_gc_object(addr.address[0]):
  252. callback(addr, arg)
  253. _trace_callback._annspecialcase_ = 'specialize:arg(1)'
  254. def trace_partial(self, obj, start, stop, callback, arg):
  255. """Like trace(), but only walk the array part, for indices in
  256. range(start, stop). Must only be called if has_gcptr_in_varsize().
  257. """
  258. length = stop - start
  259. typeid = self.get_type_id(obj)
  260. if self.is_gcarrayofgcptr(typeid):
  261. # a performance shortcut for GcArray(gcptr)
  262. item = obj + llmemory.gcarrayofptr_itemsoffset
  263. item += llmemory.gcarrayofptr_singleitemoffset * start
  264. while length > 0:
  265. if self.points_to_valid_gc_object(item):
  266. callback(item, arg)
  267. item += llmemory.gcarrayofptr_singleitemoffset
  268. length -= 1
  269. return
  270. ll_assert(self.has_gcptr_in_varsize(typeid),
  271. "trace_partial() on object without has_gcptr_in_varsize()")
  272. item = obj + self.varsize_offset_to_variable_part(typeid)
  273. offsets = self.varsize_offsets_to_gcpointers_in_var_part(typeid)
  274. itemlength = self.varsize_item_sizes(typeid)
  275. item += itemlength * start
  276. while length > 0:
  277. j = 0
  278. while j < len(offsets):
  279. itemobj = item + offsets[j]
  280. if self.points_to_valid_gc_object(itemobj):
  281. callback(itemobj, arg)
  282. j += 1
  283. item += itemlength
  284. length -= 1
  285. trace_partial._annspecialcase_ = 'specialize:arg(4)'
  286. def points_to_valid_gc_object(self, addr):
  287. return self.is_valid_gc_object(addr.address[0])
  288. def is_valid_gc_object(self, addr):
  289. return (addr != NULL and
  290. (not self.config.taggedpointers or
  291. llmemory.cast_adr_to_int(addr) & 1 == 0))
  292. def enumerate_all_roots(self, callback, arg):
  293. """For each root object, invoke callback(obj, arg).
  294. 'callback' should not be a bound method.
  295. Note that this method is not suitable for actually doing the
  296. collection in a moving GC, because you cannot write back a
  297. modified address. It is there only for inspection.
  298. """
  299. # overridden in some subclasses, for GCs which have an additional
  300. # list of last generation roots
  301. callback2, attrname = _convert_callback_formats(callback) # :-/
  302. setattr(self, attrname, arg)
  303. self.root_walker.walk_roots(callback2, callback2, callback2)
  304. self.enum_pending_finalizers(callback, arg)
  305. enumerate_all_roots._annspecialcase_ = 'specialize:arg(1)'
  306. def enum_pending_finalizers(self, callback, arg):
  307. self.run_old_style_finalizers.foreach(callback, arg)
  308. handlers = self.finalizer_handlers()
  309. i = 0
  310. while i < len(handlers):
  311. self._adr2deque(handlers[i].deque).foreach(callback, arg)
  312. i += 1
  313. enum_pending_finalizers._annspecialcase_ = 'specialize:arg(1)'
  314. def _copy_pending_finalizers_deque(self, deque, copy_fn):
  315. tmp = self.AddressDeque()
  316. while deque.non_empty():
  317. obj = deque.popleft()
  318. tmp.append(copy_fn(obj))
  319. while tmp.non_empty():
  320. deque.append(tmp.popleft())
  321. tmp.delete()
  322. def copy_pending_finalizers(self, copy_fn):
  323. "NOTE: not very efficient, but only for SemiSpaceGC and subclasses"
  324. self._copy_pending_finalizers_deque(
  325. self.run_old_style_finalizers, copy_fn)
  326. handlers = self.finalizer_handlers()
  327. i = 0
  328. while i < len(handlers):
  329. h = handlers[i]
  330. self._copy_pending_finalizers_deque(
  331. self._adr2deque(h.deque), copy_fn)
  332. i += 1
  333. def call_destructor(self, obj):
  334. destructor = self.destructor_or_custom_trace(self.get_type_id(obj))
  335. ll_assert(bool(destructor), "no destructor found")
  336. destructor(obj)
  337. def debug_check_consistency(self):
  338. """To use after a collection. If self.DEBUG is set, this
  339. enumerates all roots and traces all objects to check if we didn't
  340. accidentally free a reachable object or forgot to update a pointer
  341. to an object that moved.
  342. """
  343. if self.DEBUG:
  344. from rpython.rlib.objectmodel import we_are_translated
  345. from rpython.memory.support import AddressDict
  346. self._debug_seen = AddressDict()
  347. self._debug_pending = self.AddressStack()
  348. if not we_are_translated():
  349. self.root_walker._walk_prebuilt_gc(self._debug_record)
  350. self.enumerate_all_roots(GCBase._debug_callback, self)
  351. pending = self._debug_pending
  352. while pending.non_empty():
  353. obj = pending.pop()
  354. self.trace(obj, self._debug_callback2, None)
  355. self._debug_seen.delete()
  356. self._debug_pending.delete()
  357. def _debug_record(self, obj):
  358. seen = self._debug_seen
  359. if not seen.contains(obj):
  360. seen.add(obj)
  361. self.debug_check_object(obj)
  362. self._debug_pending.append(obj)
  363. @staticmethod
  364. def _debug_callback(obj, self):
  365. self._debug_record(obj)
  366. def _debug_callback2(self, pointer, ignored):
  367. obj = pointer.address[0]
  368. ll_assert(bool(obj), "NULL address from self.trace()")
  369. self._debug_record(obj)
  370. def debug_check_object(self, obj):
  371. pass
  372. def _adr2deque(self, adr):
  373. return cast_adr_to_nongc_instance(self.AddressDeque, adr)
  374. def execute_finalizers(self):
  375. if self.finalizer_lock:
  376. return # the outer invocation of execute_finalizers() will do it
  377. self.finalizer_lock = True
  378. try:
  379. handlers = self.finalizer_handlers()
  380. i = 0
  381. while i < len(handlers):
  382. if self._adr2deque(handlers[i].deque).non_empty():
  383. handlers[i].trigger()
  384. i += 1
  385. while self.run_old_style_finalizers.non_empty():
  386. obj = self.run_old_style_finalizers.popleft()
  387. self.call_destructor(obj)
  388. finally:
  389. self.finalizer_lock = False
  390. class MovingGCBase(GCBase):
  391. moving_gc = True
  392. def setup(self):
  393. GCBase.setup(self)
  394. self.objects_with_id = self.AddressDict()
  395. self.id_free_list = self.AddressStack()
  396. self.next_free_id = 1
  397. def can_move(self, addr):
  398. return True
  399. def id(self, ptr):
  400. # Default implementation for id(), assuming that "external" objects
  401. # never move. Overriden in the HybridGC.
  402. obj = llmemory.cast_ptr_to_adr(ptr)
  403. # is it a tagged pointer? or an external object?
  404. if not self.is_valid_gc_object(obj) or self._is_external(obj):
  405. return llmemory.cast_adr_to_int(obj)
  406. # tagged pointers have ids of the form 2n + 1
  407. # external objects have ids of the form 4n (due to word alignment)
  408. # self._compute_id returns addresses of the form 2n + 1
  409. # if we multiply by 2, we get ids of the form 4n + 2, thus we get no
  410. # clashes
  411. return llmemory.cast_adr_to_int(self._compute_id(obj)) * 2
  412. def _next_id(self):
  413. # return an id not currently in use (as an address instead of an int)
  414. if self.id_free_list.non_empty():
  415. result = self.id_free_list.pop() # reuse a dead id
  416. else:
  417. # make up a fresh id number
  418. result = llmemory.cast_int_to_adr(self.next_free_id)
  419. self.next_free_id += 2 # only odd numbers, to make lltype
  420. # and llmemory happy and to avoid
  421. # clashes with real addresses
  422. return result
  423. def _compute_id(self, obj):
  424. # look if the object is listed in objects_with_id
  425. result = self.objects_with_id.get(obj)
  426. if not result:
  427. result = self._next_id()
  428. self.objects_with_id.setitem(obj, result)
  429. return result
  430. def update_objects_with_id(self):
  431. old = self.objects_with_id
  432. new_objects_with_id = self.AddressDict(old.length())
  433. old.foreach(self._update_object_id_FAST, new_objects_with_id)
  434. old.delete()
  435. self.objects_with_id = new_objects_with_id
  436. def _update_object_id(self, obj, id, new_objects_with_id):
  437. # safe version (used by subclasses)
  438. if self.surviving(obj):
  439. newobj = self.get_forwarding_address(obj)
  440. new_objects_with_id.setitem(newobj, id)
  441. else:
  442. self.id_free_list.append(id)
  443. def _update_object_id_FAST(self, obj, id, new_objects_with_id):
  444. # unsafe version, assumes that the new_objects_with_id is large enough
  445. if self.surviving(obj):
  446. newobj = self.get_forwarding_address(obj)
  447. new_objects_with_id.insertclean(newobj, id)
  448. else:
  449. self.id_free_list.append(id)
  450. def choose_gc_from_config(config):
  451. """Return a (GCClass, GC_PARAMS) from the given config object.
  452. """
  453. if config.translation.gctransformer != "framework":
  454. raise AssertionError("fix this test")
  455. classes = {"semispace": "semispace.SemiSpaceGC",
  456. "generation": "generation.GenerationGC",
  457. "hybrid": "hybrid.HybridGC",
  458. "minimark" : "minimark.MiniMarkGC",
  459. "incminimark" : "incminimark.IncrementalMiniMarkGC",
  460. }
  461. try:
  462. modulename, classname = classes[config.translation.gc].split('.')
  463. except KeyError:
  464. raise ValueError("unknown value for translation.gc: %r" % (
  465. config.translation.gc,))
  466. module = __import__("rpython.memory.gc." + modulename,
  467. globals(), locals(), [classname])
  468. GCClass = getattr(module, classname)
  469. return GCClass, GCClass.TRANSLATION_PARAMS
  470. def _convert_callback_formats(callback):
  471. callback = getattr(callback, 'im_func', callback)
  472. if callback not in _converted_callback_formats:
  473. def callback2(gc, root):
  474. obj = root.address[0]
  475. ll_assert(bool(obj), "NULL address from walk_roots()")
  476. callback(obj, getattr(gc, attrname))
  477. attrname = '_callback2_arg%d' % len(_converted_callback_formats)
  478. _converted_callback_formats[callback] = callback2, attrname
  479. return _converted_callback_formats[callback]
  480. _convert_callback_formats._annspecialcase_ = 'specialize:memo'
  481. _converted_callback_formats = {}