PageRenderTime 50ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/backend/llsupport/assembler.py

https://bitbucket.org/pypy/pypy/
Python | 506 lines | 385 code | 52 blank | 69 comment | 82 complexity | 3b08f96a04c622787ab623a9009e7ea8 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.backend.llsupport import jitframe
  2. from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
  3. from rpython.jit.backend.llsupport.symbolic import WORD
  4. from rpython.jit.backend.llsupport.codemap import CodemapBuilder
  5. from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
  6. ConstInt, AbstractFailDescr, VECTOR)
  7. from rpython.jit.metainterp.resoperation import ResOperation, rop
  8. from rpython.rlib import rgc
  9. from rpython.rlib.debug import (debug_start, debug_stop, have_debug_prints_for,
  10. debug_print)
  11. from rpython.rlib.rarithmetic import r_uint
  12. from rpython.rlib.objectmodel import specialize, compute_unique_id
  13. from rpython.rtyper.annlowlevel import cast_instance_to_gcref, llhelper
  14. from rpython.rtyper.lltypesystem import rffi, lltype
  15. from rpython.rlib.rjitlog import rjitlog as jl
  16. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER',
  17. # 'b'ridge, 'l'abel or # 'e'ntry point
  18. ('i', lltype.Signed), # first field, at offset 0
  19. ('type', lltype.Char),
  20. ('number', lltype.Signed)
  21. )
  22. class GuardToken(object):
  23. def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
  24. guard_opnum, frame_depth, faildescrindex):
  25. assert isinstance(faildescr, AbstractFailDescr)
  26. self.cpu = cpu
  27. self.faildescr = faildescr
  28. self.faildescrindex = faildescrindex
  29. self.failargs = failargs
  30. self.fail_locs = fail_locs
  31. self.gcmap = self.compute_gcmap(gcmap, failargs,
  32. fail_locs, frame_depth)
  33. self.guard_opnum = guard_opnum
  34. def guard_not_invalidated(self):
  35. return self.guard_opnum == rop.GUARD_NOT_INVALIDATED
  36. def must_save_exception(self):
  37. guard_opnum = self.guard_opnum
  38. return (guard_opnum == rop.GUARD_EXCEPTION or
  39. guard_opnum == rop.GUARD_NO_EXCEPTION or
  40. guard_opnum == rop.GUARD_NOT_FORCED)
  41. def compute_gcmap(self, gcmap, failargs, fail_locs, frame_depth):
  42. # note that regalloc has a very similar compute, but
  43. # one that does iteration over all bindings, so slightly different,
  44. # eh
  45. input_i = 0
  46. for i in range(len(failargs)):
  47. arg = failargs[i]
  48. if arg is None:
  49. continue
  50. loc = fail_locs[input_i]
  51. input_i += 1
  52. if arg.type == REF:
  53. loc = fail_locs[i]
  54. if loc.is_core_reg():
  55. val = self.cpu.all_reg_indexes[loc.value]
  56. else:
  57. val = loc.get_position() + self.cpu.JITFRAME_FIXED_SIZE
  58. gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
  59. return gcmap
  60. class BaseAssembler(object):
  61. """ Base class for Assembler generator in real backends
  62. """
  63. def __init__(self, cpu, translate_support_code=False):
  64. self.cpu = cpu
  65. self.memcpy_addr = 0
  66. self.memset_addr = 0
  67. self.rtyper = cpu.rtyper
  68. # do not rely on this attribute if you test for jitlog
  69. self._debug = False
  70. self.loop_run_counters = []
  71. def stitch_bridge(self, faildescr, target):
  72. raise NotImplementedError
  73. def setup_once(self):
  74. # the address of the function called by 'new'
  75. gc_ll_descr = self.cpu.gc_ll_descr
  76. gc_ll_descr.initialize()
  77. if hasattr(gc_ll_descr, 'minimal_size_in_nursery'):
  78. self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery
  79. else:
  80. self.gc_minimal_size_in_nursery = 0
  81. if hasattr(gc_ll_descr, 'gcheaderbuilder'):
  82. self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header
  83. else:
  84. self.gc_size_of_header = WORD # for tests
  85. self.memcpy_addr = rffi.cast(lltype.Signed, memcpy_fn)
  86. self.memset_addr = rffi.cast(lltype.Signed, memset_fn)
  87. self._build_failure_recovery(False, withfloats=False)
  88. self._build_failure_recovery(True, withfloats=False)
  89. self._build_wb_slowpath(False)
  90. self._build_wb_slowpath(True)
  91. self._build_wb_slowpath(False, for_frame=True)
  92. # only one of those
  93. self.build_frame_realloc_slowpath()
  94. if self.cpu.supports_floats:
  95. self._build_failure_recovery(False, withfloats=True)
  96. self._build_failure_recovery(True, withfloats=True)
  97. self._build_wb_slowpath(False, withfloats=True)
  98. self._build_wb_slowpath(True, withfloats=True)
  99. self._build_propagate_exception_path()
  100. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  101. # generate few slowpaths for various cases
  102. self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed')
  103. self.malloc_slowpath_varsize = self._build_malloc_slowpath(
  104. kind='var')
  105. if hasattr(gc_ll_descr, 'malloc_str'):
  106. self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str')
  107. else:
  108. self.malloc_slowpath_str = None
  109. if hasattr(gc_ll_descr, 'malloc_unicode'):
  110. self.malloc_slowpath_unicode = self._build_malloc_slowpath(
  111. kind='unicode')
  112. else:
  113. self.malloc_slowpath_unicode = None
  114. lst = [0, 0, 0, 0]
  115. lst[0] = self._build_cond_call_slowpath(False, False)
  116. lst[1] = self._build_cond_call_slowpath(False, True)
  117. if self.cpu.supports_floats:
  118. lst[2] = self._build_cond_call_slowpath(True, False)
  119. lst[3] = self._build_cond_call_slowpath(True, True)
  120. self.cond_call_slowpath = lst
  121. self._build_stack_check_slowpath()
  122. self._build_release_gil(gc_ll_descr.gcrootmap)
  123. # do not rely on the attribute _debug for jitlog
  124. if not self._debug:
  125. # if self._debug is already set it means that someone called
  126. # set_debug by hand before initializing the assembler. Leave it
  127. # as it is
  128. should_debug = have_debug_prints_for('jit-backend-counts')
  129. self.set_debug(should_debug)
  130. # when finishing, we only have one value at [0], the rest dies
  131. self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1,
  132. flavor='raw',
  133. track_allocation=False)
  134. self.gcmap_for_finish[0] = r_uint(1)
  135. def setup(self, looptoken):
  136. if self.cpu.HAS_CODEMAP:
  137. self.codemap_builder = CodemapBuilder()
  138. self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
  139. def setup_gcrefs_list(self, allgcrefs):
  140. self._allgcrefs = allgcrefs
  141. self._allgcrefs_faildescr_next = 0
  142. def teardown_gcrefs_list(self):
  143. self._allgcrefs = None
  144. def get_gcref_from_faildescr(self, descr):
  145. """This assumes that it is called in order for all faildescrs."""
  146. search = cast_instance_to_gcref(descr)
  147. while not _safe_eq(
  148. self._allgcrefs[self._allgcrefs_faildescr_next], search):
  149. self._allgcrefs_faildescr_next += 1
  150. assert self._allgcrefs_faildescr_next < len(self._allgcrefs)
  151. return self._allgcrefs_faildescr_next
  152. def get_asmmemmgr_blocks(self, looptoken):
  153. clt = looptoken.compiled_loop_token
  154. if clt.asmmemmgr_blocks is None:
  155. clt.asmmemmgr_blocks = []
  156. return clt.asmmemmgr_blocks
  157. def get_asmmemmgr_gcreftracers(self, looptoken):
  158. clt = looptoken.compiled_loop_token
  159. if clt.asmmemmgr_gcreftracers is None:
  160. clt.asmmemmgr_gcreftracers = []
  161. return clt.asmmemmgr_gcreftracers
  162. def set_debug(self, v):
  163. r = self._debug
  164. self._debug = v
  165. return r
  166. def rebuild_faillocs_from_descr(self, descr, inputargs):
  167. locs = []
  168. GPR_REGS = len(self.cpu.gen_regs)
  169. XMM_REGS = len(self.cpu.float_regs)
  170. input_i = 0
  171. if self.cpu.IS_64_BIT:
  172. coeff = 1
  173. else:
  174. coeff = 2
  175. for pos in descr.rd_locs:
  176. pos = rffi.cast(lltype.Signed, pos)
  177. if pos == 0xFFFF:
  178. continue
  179. elif pos < GPR_REGS:
  180. locs.append(self.cpu.gen_regs[pos])
  181. elif pos < GPR_REGS + XMM_REGS * coeff:
  182. pos = (pos - GPR_REGS) // coeff
  183. locs.append(self.cpu.float_regs[pos])
  184. else:
  185. i = pos - self.cpu.JITFRAME_FIXED_SIZE
  186. assert i >= 0
  187. tp = inputargs[input_i].type
  188. locs.append(self.new_stack_loc(i, tp))
  189. input_i += 1
  190. return locs
  191. _previous_rd_locs = []
  192. def store_info_on_descr(self, startspos, guardtok):
  193. withfloats = False
  194. for box in guardtok.failargs:
  195. if box is not None and \
  196. (box.type == FLOAT or box.type == VECTOR):
  197. withfloats = True
  198. break
  199. exc = guardtok.must_save_exception()
  200. target = self.failure_recovery_code[exc + 2 * withfloats]
  201. faildescrindex = guardtok.faildescrindex
  202. base_ofs = self.cpu.get_baseofs_of_frame_field()
  203. #
  204. # in practice, about 2/3rd of 'positions' lists that we build are
  205. # exactly the same as the previous one, so share the lists to
  206. # conserve memory
  207. if len(self._previous_rd_locs) == len(guardtok.fail_locs):
  208. positions = self._previous_rd_locs # tentatively
  209. shared = True
  210. else:
  211. positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs)
  212. shared = False
  213. #
  214. for i, loc in enumerate(guardtok.fail_locs):
  215. if loc is None:
  216. position = 0xFFFF
  217. elif loc.is_stack():
  218. assert (loc.value & (WORD - 1)) == 0, \
  219. "store_info_on_descr: misaligned"
  220. position = (loc.value - base_ofs) // WORD
  221. assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!"
  222. else:
  223. assert loc is not self.cpu.frame_reg # for now
  224. if self.cpu.IS_64_BIT:
  225. coeff = 1
  226. else:
  227. coeff = 2
  228. if loc.is_float():
  229. position = len(self.cpu.gen_regs) + loc.value * coeff
  230. else:
  231. position = self.cpu.all_reg_indexes[loc.value]
  232. if shared:
  233. if (rffi.cast(lltype.Signed, self._previous_rd_locs[i]) ==
  234. rffi.cast(lltype.Signed, position)):
  235. continue # still equal
  236. positions = positions[:]
  237. shared = False
  238. positions[i] = rffi.cast(rffi.USHORT, position)
  239. self._previous_rd_locs = positions
  240. # write down the positions of locs
  241. guardtok.faildescr.rd_locs = positions
  242. return faildescrindex, target
  243. def enter_portal_frame(self, op):
  244. if self.cpu.HAS_CODEMAP:
  245. self.codemap_builder.enter_portal_frame(op.getarg(0).getint(),
  246. op.getarg(1).getint(),
  247. self.mc.get_relative_pos())
  248. def leave_portal_frame(self, op):
  249. if self.cpu.HAS_CODEMAP:
  250. self.codemap_builder.leave_portal_frame(op.getarg(0).getint(),
  251. self.mc.get_relative_pos())
  252. def call_assembler(self, op, argloc, vloc, result_loc, tmploc):
  253. """
  254. * argloc: location of the frame argument that we're passing to
  255. the called assembler (this is the first return value
  256. of locs_for_call_assembler())
  257. * vloc: location of the virtualizable (not in a register;
  258. this is the optional second return value of
  259. locs_for_call_assembler(), or imm(0) if none returned)
  260. * result_loc: location of op.result (which is not be
  261. confused with the next one)
  262. * tmploc: location where the actual call to the other piece
  263. of assembler will return its jitframe result
  264. (which is always a REF), before the helper may be
  265. called
  266. """
  267. descr = op.getdescr()
  268. assert isinstance(descr, JitCellToken)
  269. #
  270. # Write a call to the target assembler
  271. # we need to allocate the frame, keep in sync with runner's
  272. # execute_token
  273. jd = descr.outermost_jitdriver_sd
  274. self._call_assembler_emit_call(self.imm(descr._ll_function_addr),
  275. argloc, tmploc)
  276. if op.type == 'v':
  277. assert result_loc is None
  278. value = self.cpu.done_with_this_frame_descr_void
  279. else:
  280. kind = op.type
  281. if kind == INT:
  282. assert result_loc is tmploc
  283. value = self.cpu.done_with_this_frame_descr_int
  284. elif kind == REF:
  285. assert result_loc is tmploc
  286. value = self.cpu.done_with_this_frame_descr_ref
  287. elif kind == FLOAT:
  288. value = self.cpu.done_with_this_frame_descr_float
  289. else:
  290. raise AssertionError(kind)
  291. gcref = cast_instance_to_gcref(value)
  292. if gcref:
  293. rgc._make_sure_does_not_move(gcref) # but should be prebuilt
  294. value = rffi.cast(lltype.Signed, gcref)
  295. je_location = self._call_assembler_check_descr(value, tmploc)
  296. #
  297. # Path A: use assembler_helper_adr
  298. assert jd is not None
  299. asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
  300. self._call_assembler_emit_helper_call(self.imm(asm_helper_adr),
  301. [tmploc, vloc], result_loc)
  302. jmp_location = self._call_assembler_patch_je(result_loc, je_location)
  303. # Path B: fast path. Must load the return value
  304. #
  305. self._call_assembler_load_result(op, result_loc)
  306. #
  307. # Here we join Path A and Path B again
  308. self._call_assembler_patch_jmp(jmp_location)
  309. def get_loop_run_counters(self, index):
  310. return self.loop_run_counters[index]
  311. @specialize.argtype(1)
  312. def _inject_debugging_code(self, looptoken, operations, tp, number):
  313. if self._debug or jl.jitlog_enabled():
  314. newoperations = []
  315. self._append_debugging_code(newoperations, tp, number, None)
  316. for op in operations:
  317. newoperations.append(op)
  318. if op.getopnum() == rop.LABEL:
  319. self._append_debugging_code(newoperations, 'l', number,
  320. op.getdescr())
  321. operations = newoperations
  322. return operations
  323. def _append_debugging_code(self, operations, tp, number, token):
  324. counter = self._register_counter(tp, number, token)
  325. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  326. operations.append(
  327. ResOperation(rop.INCREMENT_DEBUG_COUNTER, [c_adr]))
  328. def _register_counter(self, tp, number, token):
  329. # XXX the numbers here are ALMOST unique, but not quite, use a counter
  330. # or something
  331. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  332. track_allocation=False)
  333. struct.i = 0
  334. struct.type = tp
  335. if tp == 'b' or tp == 'e':
  336. struct.number = number
  337. else:
  338. assert token
  339. struct.number = compute_unique_id(token)
  340. # YYY very minor leak -- we need the counters to stay alive
  341. # forever, just because we want to report them at the end
  342. # of the process
  343. self.loop_run_counters.append(struct)
  344. return struct
  345. def finish_once(self):
  346. if self._debug:
  347. # TODO remove the old logging system when jitlog is complete
  348. debug_start('jit-backend-counts')
  349. length = len(self.loop_run_counters)
  350. for i in range(length):
  351. struct = self.loop_run_counters[i]
  352. if struct.type == 'l':
  353. prefix = 'TargetToken(%d)' % struct.number
  354. else:
  355. num = struct.number
  356. if num == -1:
  357. num = '-1'
  358. else:
  359. num = str(r_uint(num))
  360. if struct.type == 'b':
  361. prefix = 'bridge %s' % num
  362. else:
  363. prefix = 'entry %s' % num
  364. debug_print(prefix + ':' + str(struct.i))
  365. debug_stop('jit-backend-counts')
  366. self.flush_trace_counters()
  367. def flush_trace_counters(self):
  368. # this is always called, the jitlog knows if it is enabled
  369. length = len(self.loop_run_counters)
  370. for i in range(length):
  371. struct = self.loop_run_counters[i]
  372. # only log if it has been executed
  373. if struct.i > 0:
  374. jl._log_jit_counter(struct)
  375. # reset the counter, flush in a later point in time will
  376. # add up the counters!
  377. struct.i = 0
  378. # here would be the point to free some counters
  379. # see YYY comment above! but first we should run this every once in a while
  380. # not just when jitlog_disable is called
  381. @staticmethod
  382. @rgc.no_collect
  383. def _reacquire_gil_asmgcc(css, old_rpy_fastgil):
  384. # Before doing an external call, 'rpy_fastgil' is initialized to
  385. # be equal to css. This function is called if we find out after
  386. # the call that it is no longer equal to css. See description
  387. # in translator/c/src/thread_pthread.c.
  388. # XXX some duplicated logic here, but note that rgil.acquire()
  389. # does more than just RPyGilAcquire()
  390. if old_rpy_fastgil == 0:
  391. # this case occurs if some other thread stole the GIL but
  392. # released it again. What occurred here is that we changed
  393. # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the
  394. # GIL.
  395. pass
  396. elif old_rpy_fastgil == 1:
  397. # 'rpy_fastgil' was (and still is) locked by someone else.
  398. # We need to wait for the regular mutex.
  399. from rpython.rlib import rgil
  400. rgil.acquire()
  401. else:
  402. # stole the GIL from a different thread that is also
  403. # currently in an external call from the jit. Attach
  404. # the 'old_rpy_fastgil' into the chained list.
  405. from rpython.memory.gctransform import asmgcroot
  406. oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil)
  407. next = asmgcroot.gcrootanchor.next
  408. oth.next = next
  409. oth.prev = asmgcroot.gcrootanchor
  410. asmgcroot.gcrootanchor.next = oth
  411. next.prev = oth
  412. # similar to trackgcroot.py:pypy_asm_stackwalk, second part:
  413. # detach the 'css' from the chained list
  414. from rpython.memory.gctransform import asmgcroot
  415. old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  416. prev = old.prev
  417. next = old.next
  418. prev.next = next
  419. next.prev = prev
  420. @staticmethod
  421. @rgc.no_collect
  422. def _reacquire_gil_shadowstack():
  423. # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode,
  424. # 'rpy_fastgil' contains only zero or non-zero, and this is only
  425. # called when the old value stored in 'rpy_fastgil' was non-zero
  426. # (i.e. still locked, must wait with the regular mutex)
  427. from rpython.rlib import rgil
  428. rgil.acquire()
  429. _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  430. _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed],
  431. lltype.Void))
  432. def _build_release_gil(self, gcrootmap):
  433. if gcrootmap is None or gcrootmap.is_shadow_stack:
  434. reacqgil_func = llhelper(self._REACQGIL0_FUNC,
  435. self._reacquire_gil_shadowstack)
  436. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  437. else:
  438. reacqgil_func = llhelper(self._REACQGIL2_FUNC,
  439. self._reacquire_gil_asmgcc)
  440. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  441. def _is_asmgcc(self):
  442. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  443. return bool(gcrootmap) and not gcrootmap.is_shadow_stack
  444. def debug_bridge(descr_number, rawstart, codeendpos):
  445. debug_start("jit-backend-addr")
  446. debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
  447. (r_uint(descr_number), r_uint(rawstart),
  448. r_uint(rawstart + codeendpos)))
  449. debug_stop("jit-backend-addr")
  450. def _safe_eq(x, y):
  451. try:
  452. return x == y
  453. except AttributeError: # minor mess
  454. return False