PageRenderTime 53ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/llsupport/rewrite.py

https://bitbucket.org/pypy/pypy/
Python | 991 lines | 821 code | 50 blank | 120 comment | 126 complexity | 7170762b58d71b2ab2f7ac96589f6aeb MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.rlib import rgc
  2. from rpython.rlib.objectmodel import we_are_translated, r_dict
  3. from rpython.rlib.rarithmetic import ovfcheck, highest_bit
  4. from rpython.rtyper.lltypesystem import llmemory, lltype, rstr
  5. from rpython.rtyper.annlowlevel import cast_instance_to_gcref
  6. from rpython.jit.metainterp import history
  7. from rpython.jit.metainterp.history import ConstInt, ConstPtr
  8. from rpython.jit.metainterp.resoperation import ResOperation, rop, OpHelpers
  9. from rpython.jit.metainterp.typesystem import rd_eq, rd_hash
  10. from rpython.jit.codewriter import heaptracker
  11. from rpython.jit.backend.llsupport.symbolic import (WORD,
  12. get_array_token)
  13. from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr,\
  14. FLAG_POINTER
  15. from rpython.jit.metainterp.history import JitCellToken
  16. from rpython.jit.backend.llsupport.descr import (unpack_arraydescr,
  17. unpack_fielddescr, unpack_interiorfielddescr)
  18. FLAG_ARRAY = 0
  19. FLAG_STR = 1
  20. FLAG_UNICODE = 2
  21. class BridgeExceptionNotFirst(Exception):
  22. pass
  23. class GcRewriterAssembler(object):
  24. """ This class performs the following rewrites on the list of operations:
  25. - Turn all NEW_xxx to either a CALL_R/CHECK_MEMORY_ERROR,
  26. or a CALL_MALLOC_NURSERY,
  27. followed by SETFIELDs in order to initialize their GC fields. The
  28. two advantages of CALL_MALLOC_NURSERY is that it inlines the common
  29. path, and we need only one such operation to allocate several blocks
  30. of memory at once.
  31. - Add COND_CALLs to the write barrier before SETFIELD_GC and
  32. SETARRAYITEM_GC operations.
  33. '_write_barrier_applied' contains a dictionary of variable -> None.
  34. If a variable is in the dictionary, next setfields can be called without
  35. a write barrier. The idea is that an object that was freshly allocated
  36. or already write_barrier'd don't need another write_barrier if there
  37. was no potentially collecting resop inbetween.
  38. """
  39. _previous_size = -1
  40. _op_malloc_nursery = None
  41. _v_last_malloced_nursery = None
  42. c_zero = ConstInt(0)
  43. c_null = ConstPtr(lltype.nullptr(llmemory.GCREF.TO))
  44. def __init__(self, gc_ll_descr, cpu):
  45. self.gc_ll_descr = gc_ll_descr
  46. self.cpu = cpu
  47. self._newops = []
  48. self._known_lengths = {}
  49. self._write_barrier_applied = {}
  50. self._delayed_zero_setfields = {}
  51. self.last_zero_arrays = []
  52. self._setarrayitems_occurred = {} # {box: {set-of-indexes}}
  53. def remember_known_length(self, op, val):
  54. self._known_lengths[op] = val
  55. def remember_setarrayitem_occured(self, op, index):
  56. op = self.get_box_replacement(op)
  57. try:
  58. subs = self._setarrayitems_occurred[op]
  59. except KeyError:
  60. subs = {}
  61. self._setarrayitems_occurred[op] = subs
  62. subs[index] = None
  63. def setarrayitems_occurred(self, op):
  64. return self._setarrayitems_occurred[self.get_box_replacement(op)]
  65. def known_length(self, op, default):
  66. return self._known_lengths.get(op, default)
  67. def delayed_zero_setfields(self, op):
  68. op = self.get_box_replacement(op)
  69. try:
  70. d = self._delayed_zero_setfields[op]
  71. except KeyError:
  72. d = {}
  73. self._delayed_zero_setfields[op] = d
  74. return d
  75. def get_box_replacement(self, op, allow_none=False):
  76. if allow_none and op is None:
  77. return None # for failargs
  78. while op.get_forwarded():
  79. op = op.get_forwarded()
  80. return op
  81. def emit_op(self, op):
  82. op = self.get_box_replacement(op)
  83. orig_op = op
  84. replaced = False
  85. opnum = op.getopnum()
  86. keep = (opnum == rop.JIT_DEBUG)
  87. for i in range(op.numargs()):
  88. orig_arg = op.getarg(i)
  89. arg = self.get_box_replacement(orig_arg)
  90. if isinstance(arg, ConstPtr) and bool(arg.value) and not keep:
  91. arg = self.remove_constptr(arg)
  92. if orig_arg is not arg:
  93. if not replaced:
  94. op = op.copy_and_change(opnum)
  95. orig_op.set_forwarded(op)
  96. replaced = True
  97. op.setarg(i, arg)
  98. if rop.is_guard(opnum):
  99. if not replaced:
  100. op = op.copy_and_change(opnum)
  101. orig_op.set_forwarded(op)
  102. op.setfailargs([self.get_box_replacement(a, True)
  103. for a in op.getfailargs()])
  104. if rop.is_guard(opnum) or opnum == rop.FINISH:
  105. llref = cast_instance_to_gcref(op.getdescr())
  106. self.gcrefs_output_list.append(llref)
  107. self._newops.append(op)
  108. def replace_op_with(self, op, newop):
  109. assert not op.get_forwarded()
  110. op.set_forwarded(newop)
  111. def handle_setarrayitem(self, op):
  112. itemsize, basesize, _ = unpack_arraydescr(op.getdescr())
  113. ptr_box = op.getarg(0)
  114. index_box = op.getarg(1)
  115. value_box = op.getarg(2)
  116. self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box,
  117. itemsize, itemsize, basesize)
  118. def emit_gc_store_or_indexed(self, op, ptr_box, index_box, value_box,
  119. itemsize, factor, offset):
  120. factor, offset, index_box = \
  121. self._emit_mul_if_factor_offset_not_supported(index_box,
  122. factor, offset)
  123. #
  124. if index_box is None:
  125. args = [ptr_box, ConstInt(offset), value_box, ConstInt(itemsize)]
  126. newload = ResOperation(rop.GC_STORE, args)
  127. else:
  128. args = [ptr_box, index_box, value_box, ConstInt(factor),
  129. ConstInt(offset), ConstInt(itemsize)]
  130. newload = ResOperation(rop.GC_STORE_INDEXED, args)
  131. if op is not None:
  132. self.replace_op_with(op, newload)
  133. else:
  134. self.emit_op(newload)
  135. def handle_getarrayitem(self, op):
  136. itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
  137. ptr_box = op.getarg(0)
  138. index_box = op.getarg(1)
  139. self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, itemsize, ofs, sign)
  140. def handle_rawload(self, op):
  141. itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
  142. ptr_box = op.getarg(0)
  143. index_box = op.getarg(1)
  144. self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, ofs, sign)
  145. def _emit_mul_if_factor_offset_not_supported(self, index_box,
  146. factor, offset):
  147. # Returns (factor, offset, index_box) where index_box is either
  148. # a non-constant BoxInt or None.
  149. if isinstance(index_box, ConstInt):
  150. return 1, index_box.value * factor + offset, None
  151. else:
  152. if factor != 1 and factor not in self.cpu.load_supported_factors:
  153. # the factor is supported by the cpu
  154. # x & (x - 1) == 0 is a quick test for power of 2
  155. assert factor > 0
  156. if (factor & (factor - 1)) == 0:
  157. index_box = ResOperation(rop.INT_LSHIFT,
  158. [index_box, ConstInt(highest_bit(factor))])
  159. else:
  160. index_box = ResOperation(rop.INT_MUL,
  161. [index_box, ConstInt(factor)])
  162. self.emit_op(index_box)
  163. factor = 1
  164. return factor, offset, index_box
  165. def emit_gc_load_or_indexed(self, op, ptr_box, index_box, itemsize,
  166. factor, offset, sign, type='i'):
  167. factor, offset, index_box = \
  168. self._emit_mul_if_factor_offset_not_supported(index_box,
  169. factor, offset)
  170. #
  171. if sign:
  172. # encode signed into the itemsize value
  173. itemsize = -itemsize
  174. #
  175. optype = type
  176. if op is not None:
  177. optype = op.type
  178. if index_box is None:
  179. args = [ptr_box, ConstInt(offset), ConstInt(itemsize)]
  180. newload = ResOperation(OpHelpers.get_gc_load(optype), args)
  181. else:
  182. args = [ptr_box, index_box, ConstInt(factor),
  183. ConstInt(offset), ConstInt(itemsize)]
  184. newload = ResOperation(OpHelpers.get_gc_load_indexed(optype), args)
  185. if op is None:
  186. self.emit_op(newload)
  187. else:
  188. self.replace_op_with(op, newload)
  189. return newload
  190. def transform_to_gc_load(self, op):
  191. NOT_SIGNED = 0
  192. CINT_ZERO = ConstInt(0)
  193. opnum = op.getopnum()
  194. #if opnum == rop.CALL_MALLOC_NURSERY_VARSIZE:
  195. # v_length = op.getarg(2)
  196. # scale = op.getarg(1).getint()
  197. # if scale not in self.cpu.load_supported_factors:
  198. # scale, offset, v_length = \
  199. # self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0)
  200. # op.setarg(1, ConstInt(scale))
  201. # op.setarg(2, v_length)
  202. if rop.is_getarrayitem(opnum) or \
  203. opnum in (rop.GETARRAYITEM_RAW_I,
  204. rop.GETARRAYITEM_RAW_F):
  205. self.handle_getarrayitem(op)
  206. elif opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
  207. self.handle_setarrayitem(op)
  208. elif opnum == rop.RAW_STORE:
  209. itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
  210. ptr_box = op.getarg(0)
  211. index_box = op.getarg(1)
  212. value_box = op.getarg(2)
  213. self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box, itemsize, 1, ofs)
  214. elif opnum in (rop.RAW_LOAD_I, rop.RAW_LOAD_F):
  215. itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
  216. ptr_box = op.getarg(0)
  217. index_box = op.getarg(1)
  218. self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, ofs, sign)
  219. elif opnum in (rop.GETINTERIORFIELD_GC_I, rop.GETINTERIORFIELD_GC_R,
  220. rop.GETINTERIORFIELD_GC_F):
  221. ofs, itemsize, fieldsize, sign = unpack_interiorfielddescr(op.getdescr())
  222. ptr_box = op.getarg(0)
  223. index_box = op.getarg(1)
  224. self.emit_gc_load_or_indexed(op, ptr_box, index_box, fieldsize, itemsize, ofs, sign)
  225. elif opnum in (rop.SETINTERIORFIELD_RAW, rop.SETINTERIORFIELD_GC):
  226. ofs, itemsize, fieldsize, sign = unpack_interiorfielddescr(op.getdescr())
  227. ptr_box = op.getarg(0)
  228. index_box = op.getarg(1)
  229. value_box = op.getarg(2)
  230. self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box,
  231. fieldsize, itemsize, ofs)
  232. elif opnum in (rop.GETFIELD_GC_I, rop.GETFIELD_GC_F, rop.GETFIELD_GC_R,
  233. rop.GETFIELD_RAW_I, rop.GETFIELD_RAW_F, rop.GETFIELD_RAW_R):
  234. ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
  235. ptr_box = op.getarg(0)
  236. if op.getopnum() in (rop.GETFIELD_GC_F, rop.GETFIELD_GC_I, rop.GETFIELD_GC_R):
  237. # See test_zero_ptr_field_before_getfield(). We hope there is
  238. # no getfield_gc in the middle of initialization code, but there
  239. # shouldn't be, given that a 'new' is already delayed by previous
  240. # optimization steps. In practice it should immediately be
  241. # followed by a bunch of 'setfields', and the 'pending_zeros'
  242. # optimization we do here is meant for this case.
  243. self.emit_pending_zeros()
  244. self.emit_gc_load_or_indexed(op, ptr_box, ConstInt(0), itemsize, 1, ofs, sign)
  245. self.emit_op(op)
  246. return True
  247. self.emit_gc_load_or_indexed(op, ptr_box, ConstInt(0), itemsize, 1, ofs, sign)
  248. elif opnum in (rop.SETFIELD_GC, rop.SETFIELD_RAW):
  249. ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
  250. ptr_box = op.getarg(0)
  251. value_box = op.getarg(1)
  252. self.emit_gc_store_or_indexed(op, ptr_box, ConstInt(0), value_box, itemsize, 1, ofs)
  253. elif opnum == rop.ARRAYLEN_GC:
  254. descr = op.getdescr()
  255. assert isinstance(descr, ArrayDescr)
  256. ofs = descr.lendescr.offset
  257. self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
  258. WORD, 1, ofs, NOT_SIGNED)
  259. elif opnum == rop.STRLEN:
  260. basesize, itemsize, ofs_length = get_array_token(rstr.STR,
  261. self.cpu.translate_support_code)
  262. self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
  263. WORD, 1, ofs_length, NOT_SIGNED)
  264. elif opnum == rop.UNICODELEN:
  265. basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
  266. self.cpu.translate_support_code)
  267. self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
  268. WORD, 1, ofs_length, NOT_SIGNED)
  269. elif opnum == rop.STRGETITEM:
  270. basesize, itemsize, ofs_length = get_array_token(rstr.STR,
  271. self.cpu.translate_support_code)
  272. assert itemsize == 1
  273. basesize -= 1 # for the extra null character
  274. self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
  275. itemsize, itemsize, basesize, NOT_SIGNED)
  276. elif opnum == rop.UNICODEGETITEM:
  277. basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
  278. self.cpu.translate_support_code)
  279. self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
  280. itemsize, itemsize, basesize, NOT_SIGNED)
  281. elif opnum == rop.STRSETITEM:
  282. basesize, itemsize, ofs_length = get_array_token(rstr.STR,
  283. self.cpu.translate_support_code)
  284. assert itemsize == 1
  285. basesize -= 1 # for the extra null character
  286. self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1), op.getarg(2),
  287. itemsize, itemsize, basesize)
  288. elif opnum == rop.UNICODESETITEM:
  289. basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
  290. self.cpu.translate_support_code)
  291. self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1), op.getarg(2),
  292. itemsize, itemsize, basesize)
  293. return False
  294. def rewrite(self, operations, gcrefs_output_list):
  295. # we can only remember one malloc since the next malloc can possibly
  296. # collect; but we can try to collapse several known-size mallocs into
  297. # one, both for performance and to reduce the number of write
  298. # barriers. We do this on each "basic block" of operations, which in
  299. # this case means between CALLs or unknown-size mallocs.
  300. #
  301. self.gcrefs_output_list = gcrefs_output_list
  302. self.gcrefs_map = None
  303. self.gcrefs_recently_loaded = None
  304. operations = self.remove_bridge_exception(operations)
  305. self._changed_op = None
  306. for i in range(len(operations)):
  307. op = operations[i]
  308. assert op.get_forwarded() is None
  309. if op.getopnum() == rop.DEBUG_MERGE_POINT:
  310. continue
  311. if op is self._changed_op:
  312. op = self._changed_op_to
  313. # ---------- GC_LOAD/STORE transformations --------------
  314. if self.transform_to_gc_load(op):
  315. continue
  316. # ---------- turn NEWxxx into CALL_MALLOC_xxx ----------
  317. if rop.is_malloc(op.opnum):
  318. self.handle_malloc_operation(op)
  319. continue
  320. if (rop.is_guard(op.opnum) or
  321. self.could_merge_with_next_guard(op, i, operations)):
  322. self.emit_pending_zeros()
  323. elif rop.can_malloc(op.opnum):
  324. self.emitting_an_operation_that_can_collect()
  325. elif op.getopnum() == rop.LABEL:
  326. self.emit_label()
  327. # ---------- write barriers ----------
  328. if self.gc_ll_descr.write_barrier_descr is not None:
  329. if op.getopnum() == rop.SETFIELD_GC:
  330. self.consider_setfield_gc(op)
  331. self.handle_write_barrier_setfield(op)
  332. continue
  333. if op.getopnum() == rop.SETINTERIORFIELD_GC:
  334. self.handle_write_barrier_setinteriorfield(op)
  335. continue
  336. if op.getopnum() == rop.SETARRAYITEM_GC:
  337. self.consider_setarrayitem_gc(op)
  338. self.handle_write_barrier_setarrayitem(op)
  339. continue
  340. else:
  341. # this is dead code, but in case we have a gc that does
  342. # not have a write barrier and does not zero memory, we would
  343. # need to clal it
  344. if op.getopnum() == rop.SETFIELD_GC:
  345. self.consider_setfield_gc(op)
  346. elif op.getopnum() == rop.SETARRAYITEM_GC:
  347. self.consider_setarrayitem_gc(op)
  348. # ---------- call assembler -----------
  349. if OpHelpers.is_call_assembler(op.getopnum()):
  350. self.handle_call_assembler(op)
  351. continue
  352. if op.getopnum() == rop.JUMP or op.getopnum() == rop.FINISH:
  353. self.emit_pending_zeros()
  354. #
  355. self.emit_op(op)
  356. return self._newops
  357. def could_merge_with_next_guard(self, op, i, operations):
  358. # return True in cases where the operation and the following guard
  359. # should likely remain together. Simplified version of
  360. # can_merge_with_next_guard() in llsupport/regalloc.py.
  361. if not rop.is_comparison(op.opnum):
  362. return rop.is_ovf(op.opnum) # int_xxx_ovf() / guard_no_overflow()
  363. if i + 1 >= len(operations):
  364. return False
  365. next_op = operations[i + 1]
  366. opnum = next_op.getopnum()
  367. if not (opnum == rop.GUARD_TRUE or
  368. opnum == rop.GUARD_FALSE or
  369. opnum == rop.COND_CALL):
  370. return False
  371. if next_op.getarg(0) is not op:
  372. return False
  373. self.remove_tested_failarg(next_op)
  374. return True
  375. def remove_tested_failarg(self, op):
  376. opnum = op.getopnum()
  377. if not (opnum == rop.GUARD_TRUE or opnum == rop.GUARD_FALSE):
  378. return
  379. if op.getarg(0).is_vector():
  380. return
  381. try:
  382. i = op.getfailargs().index(op.getarg(0))
  383. except ValueError:
  384. return
  385. # The checked value is also in the failargs. The front-end
  386. # tries not to produce it, but doesn't always succeed (and
  387. # it's hard to test all cases). Rewrite it away.
  388. value = int(opnum == rop.GUARD_FALSE)
  389. op1 = ResOperation(rop.SAME_AS_I, [ConstInt(value)])
  390. self.emit_op(op1)
  391. lst = op.getfailargs()[:]
  392. lst[i] = op1
  393. newop = op.copy_and_change(opnum)
  394. newop.setfailargs(lst)
  395. self._changed_op = op
  396. self._changed_op_to = newop
  397. # ----------
  398. def handle_malloc_operation(self, op):
  399. opnum = op.getopnum()
  400. if opnum == rop.NEW:
  401. self.handle_new_fixedsize(op.getdescr(), op)
  402. elif opnum == rop.NEW_WITH_VTABLE:
  403. descr = op.getdescr()
  404. self.handle_new_fixedsize(descr, op)
  405. if self.gc_ll_descr.fielddescr_vtable is not None:
  406. self.emit_setfield(op, ConstInt(descr.get_vtable()),
  407. descr=self.gc_ll_descr.fielddescr_vtable)
  408. elif opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
  409. descr = op.getdescr()
  410. assert isinstance(descr, ArrayDescr)
  411. self.handle_new_array(descr, op)
  412. elif opnum == rop.NEWSTR:
  413. self.handle_new_array(self.gc_ll_descr.str_descr, op,
  414. kind=FLAG_STR)
  415. elif opnum == rop.NEWUNICODE:
  416. self.handle_new_array(self.gc_ll_descr.unicode_descr, op,
  417. kind=FLAG_UNICODE)
  418. else:
  419. raise NotImplementedError(op.getopname())
  420. def clear_gc_fields(self, descr, result):
  421. if self.gc_ll_descr.malloc_zero_filled:
  422. return
  423. d = self.delayed_zero_setfields(result)
  424. for fielddescr in descr.gc_fielddescrs:
  425. ofs = self.cpu.unpack_fielddescr(fielddescr)
  426. d[ofs] = None
  427. def consider_setfield_gc(self, op):
  428. offset = self.cpu.unpack_fielddescr(op.getdescr())
  429. try:
  430. del self._delayed_zero_setfields[
  431. self.get_box_replacement(op.getarg(0))][offset]
  432. except KeyError:
  433. pass
  434. def consider_setarrayitem_gc(self, op):
  435. array_box = op.getarg(0)
  436. index_box = op.getarg(1)
  437. if not isinstance(array_box, ConstPtr) and index_box.is_constant():
  438. self.remember_setarrayitem_occured(array_box, index_box.getint())
  439. def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum):
  440. if self.gc_ll_descr.malloc_zero_filled:
  441. return
  442. if kind == FLAG_ARRAY:
  443. if descr.is_array_of_structs() or descr.is_array_of_pointers():
  444. assert opnum == rop.NEW_ARRAY_CLEAR
  445. if opnum == rop.NEW_ARRAY_CLEAR:
  446. self.handle_clear_array_contents(descr, result, v_length)
  447. return
  448. if kind == FLAG_STR:
  449. hash_descr = self.gc_ll_descr.str_hash_descr
  450. elif kind == FLAG_UNICODE:
  451. hash_descr = self.gc_ll_descr.unicode_hash_descr
  452. else:
  453. return
  454. self.emit_setfield(result, self.c_zero, descr=hash_descr)
  455. def handle_new_fixedsize(self, descr, op):
  456. assert isinstance(descr, SizeDescr)
  457. size = descr.size
  458. if self.gen_malloc_nursery(size, op):
  459. self.gen_initialize_tid(op, descr.tid)
  460. else:
  461. self.gen_malloc_fixedsize(size, descr.tid, op)
  462. self.clear_gc_fields(descr, op)
  463. def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
  464. v_length = self.get_box_replacement(op.getarg(0))
  465. total_size = -1
  466. if isinstance(v_length, ConstInt):
  467. num_elem = v_length.getint()
  468. self.remember_known_length(op, num_elem)
  469. try:
  470. var_size = ovfcheck(arraydescr.itemsize * num_elem)
  471. total_size = ovfcheck(arraydescr.basesize + var_size)
  472. except OverflowError:
  473. pass # total_size is still -1
  474. elif arraydescr.itemsize == 0:
  475. total_size = arraydescr.basesize
  476. elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
  477. self.gen_malloc_nursery_varsize(arraydescr.itemsize,
  478. v_length, op, arraydescr, kind=kind)):
  479. # note that we cannot initialize tid here, because the array
  480. # might end up being allocated by malloc_external or some
  481. # stuff that initializes GC header fields differently
  482. self.gen_initialize_len(op, v_length, arraydescr.lendescr)
  483. self.clear_varsize_gc_fields(kind, op.getdescr(), op,
  484. v_length, op.getopnum())
  485. return
  486. if (total_size >= 0 and
  487. self.gen_malloc_nursery(total_size, op)):
  488. self.gen_initialize_tid(op, arraydescr.tid)
  489. self.gen_initialize_len(op, v_length, arraydescr.lendescr)
  490. elif self.gc_ll_descr.kind == 'boehm':
  491. self.gen_boehm_malloc_array(arraydescr, v_length, op)
  492. else:
  493. opnum = op.getopnum()
  494. if opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
  495. self.gen_malloc_array(arraydescr, v_length, op)
  496. elif opnum == rop.NEWSTR:
  497. self.gen_malloc_str(v_length, op)
  498. elif opnum == rop.NEWUNICODE:
  499. self.gen_malloc_unicode(v_length, op)
  500. else:
  501. raise NotImplementedError(op.getopname())
  502. self.clear_varsize_gc_fields(kind, op.getdescr(), op, v_length,
  503. op.getopnum())
  504. def handle_clear_array_contents(self, arraydescr, v_arr, v_length):
  505. assert v_length is not None
  506. if isinstance(v_length, ConstInt) and v_length.getint() == 0:
  507. return
  508. # the ZERO_ARRAY operation will be optimized according to what
  509. # SETARRAYITEM_GC we see before the next allocation operation.
  510. # See emit_pending_zeros(). (This optimization is done by
  511. # hacking the object 'o' in-place: e.g., o.getarg(1) may be
  512. # replaced with another constant greater than 0.)
  513. assert isinstance(arraydescr, ArrayDescr)
  514. scale = arraydescr.itemsize
  515. v_length_scaled = v_length
  516. if not isinstance(v_length, ConstInt):
  517. scale, offset, v_length_scaled = \
  518. self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0)
  519. v_scale = ConstInt(scale)
  520. # there is probably no point in doing _emit_mul_if.. for c_zero!
  521. # NOTE that the scale might be != 1 for e.g. v_length_scaled if it is a constant
  522. # it is later applied in emit_pending_zeros
  523. args = [v_arr, self.c_zero, v_length_scaled, ConstInt(scale), v_scale]
  524. o = ResOperation(rop.ZERO_ARRAY, args, descr=arraydescr)
  525. self.emit_op(o)
  526. if isinstance(v_length, ConstInt):
  527. self.last_zero_arrays.append(self._newops[-1])
  528. def gen_malloc_frame(self, frame_info):
  529. descrs = self.gc_ll_descr.getframedescrs(self.cpu)
  530. if self.gc_ll_descr.kind == 'boehm':
  531. ofs, size, sign = unpack_fielddescr(descrs.jfi_frame_depth)
  532. if sign:
  533. size = -size
  534. args = [ConstInt(frame_info), ConstInt(ofs), ConstInt(size)]
  535. size = ResOperation(rop.GC_LOAD_I, args)
  536. self.emit_op(size)
  537. frame = ResOperation(rop.NEW_ARRAY, [size],
  538. descr=descrs.arraydescr)
  539. self.handle_new_array(descrs.arraydescr, frame)
  540. return self.get_box_replacement(frame)
  541. else:
  542. # we read size in bytes here, not the length
  543. ofs, size, sign = unpack_fielddescr(descrs.jfi_frame_size)
  544. if sign:
  545. size = -size
  546. args = [ConstInt(frame_info), ConstInt(ofs), ConstInt(size)]
  547. size = ResOperation(rop.GC_LOAD_I, args)
  548. self.emit_op(size)
  549. frame = self.gen_malloc_nursery_varsize_frame(size)
  550. self.gen_initialize_tid(frame, descrs.arraydescr.tid)
  551. # we need to explicitely zero all the gc fields, because
  552. # of the unusal malloc pattern
  553. length = self.emit_getfield(ConstInt(frame_info),
  554. descr=descrs.jfi_frame_depth, raw=True)
  555. self.emit_setfield(frame, self.c_zero,
  556. descr=descrs.jf_extra_stack_depth)
  557. self.emit_setfield(frame, self.c_null,
  558. descr=descrs.jf_savedata)
  559. self.emit_setfield(frame, self.c_null,
  560. descr=descrs.jf_force_descr)
  561. self.emit_setfield(frame, self.c_null,
  562. descr=descrs.jf_descr)
  563. self.emit_setfield(frame, self.c_null,
  564. descr=descrs.jf_guard_exc)
  565. self.emit_setfield(frame, self.c_null,
  566. descr=descrs.jf_forward)
  567. self.gen_initialize_len(frame, length,
  568. descrs.arraydescr.lendescr)
  569. return self.get_box_replacement(frame)
  570. def emit_getfield(self, ptr, descr, type='i', raw=False):
  571. ofs, size, sign = unpack_fielddescr(descr)
  572. op = self.emit_gc_load_or_indexed(None, ptr, ConstInt(0), size, 1, ofs, sign)
  573. return op
  574. def emit_setfield(self, ptr, value, descr):
  575. ofs, size, sign = unpack_fielddescr(descr)
  576. self.emit_gc_store_or_indexed(None, ptr, ConstInt(0), value,
  577. size, 1, ofs)
  578. def handle_call_assembler(self, op):
  579. descrs = self.gc_ll_descr.getframedescrs(self.cpu)
  580. loop_token = op.getdescr()
  581. assert isinstance(loop_token, history.JitCellToken)
  582. jfi = loop_token.compiled_loop_token.frame_info
  583. llfi = heaptracker.adr2int(llmemory.cast_ptr_to_adr(jfi))
  584. frame = self.gen_malloc_frame(llfi)
  585. self.emit_setfield(frame, history.ConstInt(llfi),
  586. descr=descrs.jf_frame_info)
  587. arglist = op.getarglist()
  588. index_list = loop_token.compiled_loop_token._ll_initial_locs
  589. for i, arg in enumerate(arglist):
  590. descr = self.cpu.getarraydescr_for_frame(arg.type)
  591. assert self.cpu.JITFRAME_FIXED_SIZE & 1 == 0
  592. _, itemsize, _ = self.cpu.unpack_arraydescr_size(descr)
  593. array_offset = index_list[i] # index, already measured in bytes
  594. # emit GC_STORE
  595. _, basesize, _ = unpack_arraydescr(descr)
  596. offset = basesize + array_offset
  597. args = [frame, ConstInt(offset), arg, ConstInt(itemsize)]
  598. self.emit_op(ResOperation(rop.GC_STORE, args))
  599. descr = op.getdescr()
  600. assert isinstance(descr, JitCellToken)
  601. jd = descr.outermost_jitdriver_sd
  602. args = [frame]
  603. if jd and jd.index_of_virtualizable >= 0:
  604. args = [frame, arglist[jd.index_of_virtualizable]]
  605. else:
  606. args = [frame]
  607. call_asm = ResOperation(op.getopnum(), args, descr=op.getdescr())
  608. self.replace_op_with(self.get_box_replacement(op), call_asm)
  609. self.emit_op(call_asm)
  610. # ----------
  611. def emitting_an_operation_that_can_collect(self):
  612. # must be called whenever we emit an operation that can collect:
  613. # forgets the previous MALLOC_NURSERY, if any; and empty the
  614. # set 'write_barrier_applied', so that future SETFIELDs will generate
  615. # a write barrier as usual.
  616. # it also writes down all the pending zero ptr fields
  617. self._op_malloc_nursery = None
  618. self._write_barrier_applied.clear()
  619. self.emit_pending_zeros()
  620. def write_barrier_applied(self, op):
  621. return self.get_box_replacement(op) in self._write_barrier_applied
  622. def remember_write_barrier(self, op):
  623. self._write_barrier_applied[self.get_box_replacement(op)] = None
  624. def emit_pending_zeros(self):
  625. # First, try to rewrite the existing ZERO_ARRAY operations from
  626. # the 'last_zero_arrays' list. Note that these operation objects
  627. # are also already in 'newops', which is the point.
  628. for op in self.last_zero_arrays:
  629. assert op.getopnum() == rop.ZERO_ARRAY
  630. descr = op.getdescr()
  631. assert isinstance(descr, ArrayDescr)
  632. scale = descr.itemsize
  633. box = op.getarg(0)
  634. try:
  635. intset = self.setarrayitems_occurred(box)
  636. except KeyError:
  637. start_box = op.getarg(1)
  638. length_box = op.getarg(2)
  639. if isinstance(start_box, ConstInt):
  640. start = start_box.getint()
  641. op.setarg(1, ConstInt(start * scale))
  642. op.setarg(3, ConstInt(1))
  643. if isinstance(length_box, ConstInt):
  644. stop = length_box.getint()
  645. scaled_len = stop * scale
  646. op.setarg(2, ConstInt(scaled_len))
  647. op.setarg(4, ConstInt(1))
  648. continue
  649. assert op.getarg(1).getint() == 0 # always 'start=0' initially
  650. start = 0
  651. while start in intset:
  652. start += 1
  653. op.setarg(1, ConstInt(start * scale))
  654. stop = op.getarg(2).getint()
  655. assert start <= stop
  656. while stop > start and (stop - 1) in intset:
  657. stop -= 1
  658. op.setarg(2, ConstInt((stop - start) * scale))
  659. # ^^ may be ConstInt(0); then the operation becomes a no-op
  660. op.setarg(3, ConstInt(1)) # set scale to 1
  661. op.setarg(4, ConstInt(1)) # set scale to 1
  662. del self.last_zero_arrays[:]
  663. self._setarrayitems_occurred.clear()
  664. #
  665. # Then write the NULL-pointer-writing ops that are still pending
  666. for v, d in self._delayed_zero_setfields.iteritems():
  667. v = self.get_box_replacement(v)
  668. for ofs in d.iterkeys():
  669. self.emit_gc_store_or_indexed(None, v, ConstInt(ofs), ConstInt(0),
  670. WORD, 1, 0)
  671. self._delayed_zero_setfields.clear()
  672. def _gen_call_malloc_gc(self, args, v_result, descr):
  673. """Generate a CALL_R/CHECK_MEMORY_ERROR with the given args."""
  674. self.emitting_an_operation_that_can_collect()
  675. op = ResOperation(rop.CALL_R, args, descr=descr)
  676. self.replace_op_with(v_result, op)
  677. self.emit_op(op)
  678. self.emit_op(ResOperation(rop.CHECK_MEMORY_ERROR, [op]))
  679. # In general, don't add v_result to write_barrier_applied:
  680. # v_result might be a large young array.
  681. def gen_malloc_fixedsize(self, size, typeid, v_result):
  682. """Generate a CALL_R(malloc_fixedsize_fn, ...).
  683. Used on Boehm, and on the framework GC for large fixed-size
  684. mallocs. (For all I know this latter case never occurs in
  685. practice, but better safe than sorry.)
  686. """
  687. if self.gc_ll_descr.fielddescr_tid is not None: # framework GC
  688. assert (size & (WORD-1)) == 0, "size not aligned?"
  689. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
  690. args = [ConstInt(addr), ConstInt(size), ConstInt(typeid)]
  691. descr = self.gc_ll_descr.malloc_big_fixedsize_descr
  692. else: # Boehm
  693. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
  694. args = [ConstInt(addr), ConstInt(size)]
  695. descr = self.gc_ll_descr.malloc_fixedsize_descr
  696. self._gen_call_malloc_gc(args, v_result, descr)
  697. # mark 'v_result' as freshly malloced, so not needing a write barrier
  698. # (this is always true because it's a fixed-size object)
  699. self.remember_write_barrier(v_result)
  700. def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
  701. """Generate a CALL_R(malloc_array_fn, ...) for Boehm."""
  702. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_array')
  703. self._gen_call_malloc_gc([ConstInt(addr),
  704. ConstInt(arraydescr.basesize),
  705. v_num_elem,
  706. ConstInt(arraydescr.itemsize),
  707. ConstInt(arraydescr.lendescr.offset)],
  708. v_result,
  709. self.gc_ll_descr.malloc_array_descr)
  710. def gen_malloc_array(self, arraydescr, v_num_elem, v_result):
  711. """Generate a CALL_R(malloc_array_fn, ...) going either
  712. to the standard or the nonstandard version of the function."""
  713. #
  714. if (arraydescr.basesize == self.gc_ll_descr.standard_array_basesize
  715. and arraydescr.lendescr.offset ==
  716. self.gc_ll_descr.standard_array_length_ofs):
  717. # this is a standard-looking array, common case
  718. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_array')
  719. args = [ConstInt(addr),
  720. ConstInt(arraydescr.itemsize),
  721. ConstInt(arraydescr.tid),
  722. v_num_elem]
  723. calldescr = self.gc_ll_descr.malloc_array_descr
  724. else:
  725. # rare case, so don't care too much about the number of arguments
  726. addr = self.gc_ll_descr.get_malloc_fn_addr(
  727. 'malloc_array_nonstandard')
  728. args = [ConstInt(addr),
  729. ConstInt(arraydescr.basesize),
  730. ConstInt(arraydescr.itemsize),
  731. ConstInt(arraydescr.lendescr.offset),
  732. ConstInt(arraydescr.tid),
  733. v_num_elem]
  734. calldescr = self.gc_ll_descr.malloc_array_nonstandard_descr
  735. self._gen_call_malloc_gc(args, v_result, calldescr)
  736. def gen_malloc_str(self, v_num_elem, v_result):
  737. """Generate a CALL_R(malloc_str_fn, ...)."""
  738. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_str')
  739. self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
  740. self.gc_ll_descr.malloc_str_descr)
  741. def gen_malloc_unicode(self, v_num_elem, v_result):
  742. """Generate a CALL_R(malloc_unicode_fn, ...)."""
  743. addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
  744. self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
  745. self.gc_ll_descr.malloc_unicode_descr)
  746. def gen_malloc_nursery_varsize(self, itemsize, v_length, v_result,
  747. arraydescr, kind=FLAG_ARRAY):
  748. """ itemsize is an int, v_length and v_result are boxes
  749. """
  750. gc_descr = self.gc_ll_descr
  751. if (kind == FLAG_ARRAY and
  752. (arraydescr.basesize != gc_descr.standard_array_basesize or
  753. arraydescr.lendescr.offset != gc_descr.standard_array_length_ofs)):
  754. return False
  755. self.emitting_an_operation_that_can_collect()
  756. #scale = itemsize
  757. #if scale not in self.cpu.load_supported_factors:
  758. # scale, offset, v_length = \
  759. # self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0)
  760. op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE,
  761. [ConstInt(kind), ConstInt(itemsize), v_length],
  762. descr=arraydescr)
  763. self.replace_op_with(v_result, op)
  764. self.emit_op(op)
  765. # don't record v_result into self.write_barrier_applied:
  766. # it can be a large, young array with card marking, and then
  767. # the GC relies on the write barrier being called
  768. return True
  769. def gen_malloc_nursery_varsize_frame(self, sizebox):
  770. """ Generate CALL_MALLOC_NURSERY_VARSIZE_FRAME
  771. """
  772. self.emitting_an_operation_that_can_collect()
  773. op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
  774. [sizebox])
  775. self.emit_op(op)
  776. self.remember_write_barrier(op)
  777. return op
  778. def gen_malloc_nursery(self, size, v_result):
  779. """Try to generate or update a CALL_MALLOC_NURSERY.
  780. If that succeeds, return True; you still need to write the tid.
  781. If that fails, return False.
  782. """
  783. size = self.round_up_for_allocation(size)
  784. if not self.gc_ll_descr.can_use_nursery_malloc(size):
  785. return False
  786. #
  787. op = None
  788. if self._op_malloc_nursery is not None:
  789. # already a MALLOC_NURSERY: increment its total size
  790. total_size = self._op_malloc_nursery.getarg(0).getint()
  791. total_size += size
  792. if self.gc_ll_descr.can_use_nursery_malloc(total_size):
  793. # if the total size is still reasonable, merge it
  794. self._op_malloc_nursery.setarg(0, ConstInt(total_size))
  795. op = ResOperation(rop.NURSERY_PTR_INCREMENT,
  796. [self._v_last_malloced_nursery,
  797. ConstInt(self._previous_size)])
  798. self.replace_op_with(v_result, op)
  799. if op is None:
  800. # if we failed to merge with a previous MALLOC_NURSERY, emit one
  801. self.emitting_an_operation_that_can_collect()
  802. op = ResOperation(rop.CALL_MALLOC_NURSERY,
  803. [ConstInt(size)])
  804. self.replace_op_with(v_result, op)
  805. self._op_malloc_nursery = op
  806. #
  807. self.emit_op(op)
  808. self._previous_size = size
  809. self._v_last_malloced_nursery = op
  810. self.remember_write_barrier(op)
  811. return True
  812. def gen_initialize_tid(self, v_newgcobj, tid):
  813. if self.gc_ll_descr.fielddescr_tid is not None:
  814. # produce a SETFIELD to initialize the GC header
  815. self.emit_setfield(v_newgcobj, ConstInt(tid),
  816. descr=self.gc_ll_descr.fielddescr_tid)
  817. def gen_initialize_len(self, v_newgcobj, v_length, arraylen_descr):
  818. # produce a SETFIELD to initialize the array length
  819. self.emit_setfield(v_newgcobj, v_length, descr=arraylen_descr)
  820. # ----------
  821. def handle_write_barrier_setfield(self, op):
  822. val = op.getarg(0)
  823. if not self.write_barrier_applied(val):
  824. v = op.getarg(1)
  825. if (v.type == 'r' and (not isinstance(v, ConstPtr) or
  826. rgc.needs_write_barrier(v.value))):
  827. self.gen_write_barrier(val)
  828. #op = op.copy_and_change(rop.SETFIELD_RAW)
  829. self.emit_op(op)
  830. def handle_write_barrier_setarrayitem(self, op):
  831. val = op.getarg(0)
  832. if not self.write_barrier_applied(val):
  833. v = op.getarg(2)
  834. if (v.type == 'r' and (not isinstance(v, ConstPtr) or
  835. rgc.needs_write_barrier(v.value))):
  836. self.gen_write_barrier_array(val, op.getarg(1))
  837. #op = op.copy_and_change(rop.SET{ARRAYITEM,INTERIORFIELD}_RAW)
  838. self.emit_op(op)
  839. handle_write_barrier_setinteriorfield = handle_write_barrier_setarrayitem
  840. def gen_write_barrier(self, v_base):
  841. write_barrier_descr = self.gc_ll_descr.write_barrier_descr
  842. args = [v_base]
  843. self.emit_op(ResOperation(rop.COND_CALL_GC_WB, args,
  844. descr=write_barrier_descr))
  845. self.remember_write_barrier(v_base)
  846. def gen_write_barrier_array(self, v_base, v_index):
  847. write_barrier_descr = self.gc_ll_descr.write_barrier_descr
  848. if write_barrier_descr.has_write_barrier_from_array(self.cpu):
  849. # If we know statically the length of 'v', and it is not too
  850. # big, then produce a regular write_barrier. If it's unknown or
  851. # too big, produce instead a write_barrier_from_array.
  852. LARGE = 130
  853. length = self.known_length(v_base, LARGE)
  854. if length >= LARGE:
  855. # unknown or too big: produce a write_barrier_from_array
  856. args = [v_base, v_index]
  857. self.emit_op(
  858. ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
  859. descr=write_barrier_descr))
  860. # a WB_ARRAY is not enough to prevent any future write
  861. # barriers, so don't add to 'write_barrier_applied'!
  862. return
  863. # fall-back case: produce a write_barrier
  864. self.gen_write_barrier(v_base)
  865. def round_up_for_allocation(self, size):
  866. if not self.gc_ll_descr.round_up:
  867. return size
  868. if self.gc_ll_descr.translate_support_code:
  869. from rpython.rtyper.lltypesystem import llarena
  870. return llarena.round_up_for_allocation(
  871. size, self.gc_ll_descr.minimal_size_in_nursery)
  872. else:
  873. # non-translated: do it manually
  874. # assume that "self.gc_ll_descr.minimal_size_in_nursery" is 2 WORDs
  875. size = max(size, 2 * WORD)
  876. return (size + WORD-1) & ~(WORD-1) # round up
  877. def remove_bridge_exception(self, operations):
  878. """Check a common case: 'save_exception' immediately followed by
  879. 'restore_exception' at the start of the bridge."""
  880. # XXX should check if the boxes are used later; but we just assume
  881. # they aren't for now
  882. start = 0
  883. if operations[0].getopnum() == rop.INCREMENT_DEBUG_COUNTER:
  884. start = 1
  885. if len(operations) >= start + 3:
  886. if (operations[start+0].getopnum() == rop.SAVE_EXC_CLASS and
  887. operations[start+1].getopnum() == rop.SAVE_EXCEPTION and
  888. operations[start+2].getopnum() == rop.RESTORE_EXCEPTION):
  889. return operations[:start] + operations[start+3:]
  890. return operations
  891. def emit_label(self):
  892. self.emitting_an_operation_that_can_collect()
  893. self._known_lengths.clear()
  894. self.gcrefs_recently_loaded = None
  895. def _gcref_index(self, gcref):
  896. if self.gcrefs_map is None:
  897. self.gcrefs_map = r_dict(rd_eq, rd_hash)
  898. try:
  899. return self.gcrefs_map[gcref]
  900. except KeyError:
  901. pass
  902. index = len(self.gcrefs_output_list)
  903. self.gcrefs_map[gcref] = index
  904. self.gcrefs_output_list.append(gcref)
  905. return index
  906. def remove_constptr(self, c):
  907. """Remove all ConstPtrs, and replace them with load_from_gc_table.
  908. """
  909. # Note: currently, gcrefs_recently_loaded is only cleared in
  910. # LABELs. We'd like something better, like "don't spill it",
  911. # but that's the wrong level...
  912. index = self._gcref_index(c.value)
  913. if self.gcrefs_recently_loaded is None:
  914. self.gcrefs_recently_loaded = {}
  915. try:
  916. load_op = self.gcrefs_recently_loaded[index]
  917. except KeyError:
  918. load_op = ResOperation(rop.LOAD_FROM_GC_TABLE, [ConstInt(index)])
  919. self._newops.append(load_op)
  920. self.gcrefs_recently_loaded[index] = load_op
  921. return load_op