PageRenderTime 63ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/x86/regalloc.py

https://bitbucket.org/pypy/pypy/
Python | 1517 lines | 1201 code | 154 blank | 162 comment | 200 complexity | 63e5d74d22b08bc3e565187de51680ab MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """ Register allocation scheme.
  2. """
  3. import os, sys
  4. from rpython.jit.backend.llsupport import symbolic
  5. from rpython.jit.backend.llsupport.descr import CallDescr, unpack_arraydescr
  6. from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
  7. from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
  8. RegisterManager, TempVar, compute_vars_longevity, is_comparison_or_ovf_op,
  9. valid_addressing_size, get_scale)
  10. from rpython.jit.backend.x86 import rx86
  11. from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
  12. IS_X86_64, DEFAULT_FRAME_BYTES)
  13. from rpython.jit.backend.x86.jump import remap_frame_layout_mixed
  14. from rpython.jit.backend.x86.regloc import (FrameLoc, RegLoc, ConstFloatLoc,
  15. FloatImmedLoc, ImmedLoc, imm, imm0, imm1, ecx, eax, edx, ebx, esi, edi,
  16. ebp, r8, r9, r10, r11, r12, r13, r14, r15, xmm0, xmm1, xmm2, xmm3, xmm4,
  17. xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
  18. X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG)
  19. from rpython.jit.backend.x86.vector_ext import VectorRegallocMixin
  20. from rpython.jit.codewriter import longlong
  21. from rpython.jit.codewriter.effectinfo import EffectInfo
  22. from rpython.jit.metainterp.history import (Const, ConstInt, ConstPtr,
  23. ConstFloat, INT, REF, FLOAT, VECTOR, TargetToken, AbstractFailDescr)
  24. from rpython.jit.metainterp.resoperation import rop, ResOperation
  25. from rpython.jit.metainterp.resume import AccumInfo
  26. from rpython.rlib import rgc
  27. from rpython.rlib.objectmodel import we_are_translated
  28. from rpython.rlib.rarithmetic import r_longlong, r_uint
  29. from rpython.rtyper.annlowlevel import cast_instance_to_gcref
  30. from rpython.rtyper.lltypesystem import lltype, rffi, rstr
  31. from rpython.rtyper.lltypesystem.lloperation import llop
  32. from rpython.jit.backend.x86.regloc import AddressLoc
  33. class X86RegisterManager(RegisterManager):
  34. box_types = [INT, REF]
  35. all_regs = [ecx, eax, edx, ebx, esi, edi]
  36. no_lower_byte_regs = [esi, edi]
  37. save_around_call_regs = [eax, edx, ecx]
  38. frame_reg = ebp
  39. def call_result_location(self, v):
  40. return eax
  41. def convert_to_imm(self, c):
  42. if isinstance(c, ConstInt):
  43. return imm(c.value)
  44. elif isinstance(c, ConstPtr):
  45. if we_are_translated() and c.value and rgc.can_move(c.value):
  46. not_implemented("convert_to_imm: ConstPtr needs special care")
  47. return imm(rffi.cast(lltype.Signed, c.value))
  48. else:
  49. not_implemented("convert_to_imm: got a %s" % c)
  50. class X86_64_RegisterManager(X86RegisterManager):
  51. # r11 omitted because it's used as scratch
  52. all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
  53. no_lower_byte_regs = []
  54. save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
  55. class X86XMMRegisterManager(RegisterManager):
  56. box_types = [FLOAT, INT] # yes INT!
  57. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
  58. # we never need lower byte I hope
  59. save_around_call_regs = all_regs
  60. def convert_to_imm(self, c):
  61. adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
  62. x = c.getfloatstorage()
  63. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  64. return ConstFloatLoc(adr)
  65. def convert_to_imm_16bytes_align(self, c):
  66. adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
  67. x = c.getfloatstorage()
  68. y = longlong.ZEROF
  69. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  70. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y
  71. return ConstFloatLoc(adr)
  72. def expand_float(self, size, const):
  73. if size == 4:
  74. loc = self.expand_single_float(const)
  75. else:
  76. loc = self.expand_double_float(const)
  77. return loc
  78. def expand_double_float(self, f):
  79. adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
  80. fs = f.getfloatstorage()
  81. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = fs
  82. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = fs
  83. return ConstFloatLoc(adr)
  84. def expand_single_float(self, f):
  85. adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
  86. fs = rffi.cast(lltype.SingleFloat, f.getfloatstorage())
  87. rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[0] = fs
  88. rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[1] = fs
  89. rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[2] = fs
  90. rffi.cast(rffi.CArrayPtr(lltype.SingleFloat), adr)[3] = fs
  91. return ConstFloatLoc(adr)
  92. def call_result_location(self, v):
  93. return xmm0
  94. class X86_64_XMMRegisterManager(X86XMMRegisterManager):
  95. # xmm15 reserved for scratch use
  96. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
  97. save_around_call_regs = all_regs
  98. class X86FrameManager(FrameManager):
  99. def __init__(self, base_ofs):
  100. FrameManager.__init__(self)
  101. self.base_ofs = base_ofs
  102. def frame_pos(self, i, box_type):
  103. return FrameLoc(i, get_ebp_ofs(self.base_ofs, i), box_type)
  104. @staticmethod
  105. def frame_size(box_type):
  106. if IS_X86_32 and box_type == FLOAT:
  107. return 2
  108. else:
  109. return 1
  110. @staticmethod
  111. def get_loc_index(loc):
  112. assert isinstance(loc, FrameLoc)
  113. return loc.position
  114. if WORD == 4:
  115. gpr_reg_mgr_cls = X86RegisterManager
  116. xmm_reg_mgr_cls = X86XMMRegisterManager
  117. elif WORD == 8:
  118. gpr_reg_mgr_cls = X86_64_RegisterManager
  119. xmm_reg_mgr_cls = X86_64_XMMRegisterManager
  120. else:
  121. raise AssertionError("Word size should be 4 or 8")
  122. gpr_reg_mgr_cls.all_reg_indexes = [-1] * WORD * 2 # eh, happens to be true
  123. for _i, _reg in enumerate(gpr_reg_mgr_cls.all_regs):
  124. gpr_reg_mgr_cls.all_reg_indexes[_reg.value] = _i
  125. class RegAlloc(BaseRegalloc, VectorRegallocMixin):
  126. def __init__(self, assembler, translate_support_code=False):
  127. assert isinstance(translate_support_code, bool)
  128. # variables that have place in register
  129. self.assembler = assembler
  130. self.translate_support_code = translate_support_code
  131. # to be read/used by the assembler too
  132. self.jump_target_descr = None
  133. self.final_jump_op = None
  134. def _prepare(self, inputargs, operations, allgcrefs):
  135. for box in inputargs:
  136. assert box.get_forwarded() is None
  137. cpu = self.assembler.cpu
  138. self.fm = X86FrameManager(cpu.get_baseofs_of_frame_field())
  139. operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
  140. allgcrefs)
  141. # compute longevity of variables
  142. longevity, last_real_usage = compute_vars_longevity(
  143. inputargs, operations)
  144. self.longevity = longevity
  145. self.last_real_usage = last_real_usage
  146. self.rm = gpr_reg_mgr_cls(self.longevity,
  147. frame_manager = self.fm,
  148. assembler = self.assembler)
  149. self.xrm = xmm_reg_mgr_cls(self.longevity, frame_manager = self.fm,
  150. assembler = self.assembler)
  151. return operations
  152. def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
  153. operations = self._prepare(inputargs, operations, allgcrefs)
  154. self._set_initial_bindings(inputargs, looptoken)
  155. # note: we need to make a copy of inputargs because possibly_free_vars
  156. # is also used on op args, which is a non-resizable list
  157. self.possibly_free_vars(list(inputargs))
  158. if WORD == 4: # see redirect_call_assembler()
  159. self.min_bytes_before_label = 5
  160. else:
  161. self.min_bytes_before_label = 13
  162. return operations
  163. def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
  164. frame_info):
  165. operations = self._prepare(inputargs, operations, allgcrefs)
  166. self._update_bindings(arglocs, inputargs)
  167. self.min_bytes_before_label = 0
  168. return operations
  169. def ensure_next_label_is_at_least_at_position(self, at_least_position):
  170. self.min_bytes_before_label = max(self.min_bytes_before_label,
  171. at_least_position)
  172. def get_final_frame_depth(self):
  173. return self.fm.get_frame_depth()
  174. def possibly_free_var(self, var):
  175. if var.type == FLOAT or var.is_vector():
  176. self.xrm.possibly_free_var(var)
  177. else:
  178. self.rm.possibly_free_var(var)
  179. def possibly_free_vars_for_op(self, op):
  180. for i in range(op.numargs()):
  181. var = op.getarg(i)
  182. if var is not None: # xxx kludgy
  183. self.possibly_free_var(var)
  184. if op.type != 'v':
  185. self.possibly_free_var(op)
  186. def possibly_free_vars(self, vars):
  187. for var in vars:
  188. if var is not None: # xxx kludgy
  189. self.possibly_free_var(var)
  190. def make_sure_var_in_reg(self, var, forbidden_vars=[],
  191. selected_reg=None, need_lower_byte=False):
  192. if var.type == FLOAT or var.is_vector():
  193. if isinstance(var, ConstFloat):
  194. return FloatImmedLoc(var.getfloatstorage())
  195. return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
  196. selected_reg, need_lower_byte)
  197. else:
  198. return self.rm.make_sure_var_in_reg(var, forbidden_vars,
  199. selected_reg, need_lower_byte)
  200. def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
  201. need_lower_byte=False):
  202. if var.type == FLOAT or var.is_vector():
  203. return self.xrm.force_allocate_reg(var, forbidden_vars,
  204. selected_reg, need_lower_byte)
  205. else:
  206. return self.rm.force_allocate_reg(var, forbidden_vars,
  207. selected_reg, need_lower_byte)
  208. def force_allocate_reg_or_cc(self, var):
  209. assert var.type == INT
  210. if self.next_op_can_accept_cc(self.operations, self.rm.position):
  211. # hack: return the ebp location to mean "lives in CC". This
  212. # ebp will not actually be used, and the location will be freed
  213. # after the next op as usual.
  214. self.rm.force_allocate_frame_reg(var)
  215. return ebp
  216. else:
  217. # else, return a regular register (not ebp).
  218. return self.rm.force_allocate_reg(var, need_lower_byte=True)
  219. def force_spill_var(self, var):
  220. if var.type == FLOAT:
  221. return self.xrm.force_spill_var(var)
  222. else:
  223. return self.rm.force_spill_var(var)
  224. def load_xmm_aligned_16_bytes(self, var, forbidden_vars=[]):
  225. # Load 'var' in a register; but if it is a constant, we can return
  226. # a 16-bytes-aligned ConstFloatLoc.
  227. if isinstance(var, Const):
  228. return self.xrm.convert_to_imm_16bytes_align(var)
  229. else:
  230. return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
  231. def _update_bindings(self, locs, inputargs):
  232. # XXX this should probably go to llsupport/regalloc.py
  233. used = {}
  234. i = 0
  235. for loc in locs:
  236. if loc is None: # xxx bit kludgy
  237. loc = ebp
  238. arg = inputargs[i]
  239. i += 1
  240. if isinstance(loc, RegLoc):
  241. if arg.type == FLOAT:
  242. self.xrm.reg_bindings[arg] = loc
  243. used[loc] = None
  244. else:
  245. if loc is ebp:
  246. self.rm.bindings_to_frame_reg[arg] = None
  247. else:
  248. self.rm.reg_bindings[arg] = loc
  249. used[loc] = None
  250. else:
  251. self.fm.bind(arg, loc)
  252. self.rm.free_regs = []
  253. for reg in self.rm.all_regs:
  254. if reg not in used:
  255. self.rm.free_regs.append(reg)
  256. self.xrm.free_regs = []
  257. for reg in self.xrm.all_regs:
  258. if reg not in used:
  259. self.xrm.free_regs.append(reg)
  260. self.possibly_free_vars(list(inputargs))
  261. self.fm.finish_binding()
  262. self.rm._check_invariants()
  263. self.xrm._check_invariants()
  264. def perform(self, op, arglocs, result_loc):
  265. if not we_are_translated():
  266. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  267. self.assembler.regalloc_perform(op, arglocs, result_loc)
  268. def perform_llong(self, op, arglocs, result_loc):
  269. if not we_are_translated():
  270. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  271. self.assembler.regalloc_perform_llong(op, arglocs, result_loc)
  272. def perform_math(self, op, arglocs, result_loc):
  273. if not we_are_translated():
  274. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  275. self.assembler.regalloc_perform_math(op, arglocs, result_loc)
  276. def locs_for_fail(self, guard_op):
  277. faillocs = [self.loc(arg) for arg in guard_op.getfailargs()]
  278. descr = guard_op.getdescr()
  279. if not descr:
  280. return faillocs
  281. assert isinstance(descr, AbstractFailDescr)
  282. if descr.rd_vector_info:
  283. accuminfo = descr.rd_vector_info
  284. while accuminfo:
  285. accuminfo.location = faillocs[accuminfo.getpos_in_failargs()]
  286. loc = self.loc(accuminfo.getoriginal())
  287. faillocs[accuminfo.getpos_in_failargs()] = loc
  288. accuminfo = accuminfo.next()
  289. return faillocs
  290. def perform_guard(self, guard_op, arglocs, result_loc):
  291. faillocs = self.locs_for_fail(guard_op)
  292. if not we_are_translated():
  293. if result_loc is not None:
  294. self.assembler.dump('%s <- %s(%s)' % (result_loc, guard_op,
  295. arglocs))
  296. else:
  297. self.assembler.dump('%s(%s)' % (guard_op, arglocs))
  298. self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
  299. result_loc,
  300. self.fm.get_frame_depth())
  301. self.possibly_free_vars(guard_op.getfailargs())
  302. def perform_discard(self, op, arglocs):
  303. if not we_are_translated():
  304. self.assembler.dump('%s(%s)' % (op, arglocs))
  305. self.assembler.regalloc_perform_discard(op, arglocs)
  306. def walk_operations(self, inputargs, operations):
  307. i = 0
  308. self.operations = operations
  309. while i < len(operations):
  310. op = operations[i]
  311. self.assembler.mc.mark_op(op)
  312. assert self.assembler.mc._frame_size == DEFAULT_FRAME_BYTES
  313. self.rm.position = i
  314. self.xrm.position = i
  315. if rop.has_no_side_effect(op.opnum) and op not in self.longevity:
  316. i += 1
  317. self.possibly_free_vars_for_op(op)
  318. continue
  319. if not we_are_translated() and op.getopnum() == rop.FORCE_SPILL:
  320. self._consider_force_spill(op)
  321. else:
  322. oplist[op.getopnum()](self, op)
  323. self.possibly_free_vars_for_op(op)
  324. self.rm._check_invariants()
  325. self.xrm._check_invariants()
  326. i += 1
  327. assert not self.rm.reg_bindings
  328. assert not self.xrm.reg_bindings
  329. if not we_are_translated():
  330. self.assembler.mc.UD2()
  331. self.flush_loop()
  332. self.assembler.mc.mark_op(None) # end of the loop
  333. self.operations = None
  334. for arg in inputargs:
  335. self.possibly_free_var(arg)
  336. def flush_loop(self):
  337. # Force the code to be aligned to a multiple of 16. Also,
  338. # rare case: if the loop is too short, or if we are just after
  339. # a GUARD_NOT_INVALIDATED, we need to make sure we insert enough
  340. # NOPs. This is important to ensure that there are enough bytes
  341. # produced, because GUARD_NOT_INVALIDATED or
  342. # redirect_call_assembler() will maybe overwrite them. (In that
  343. # rare case we don't worry too much about alignment.)
  344. mc = self.assembler.mc
  345. current_pos = mc.get_relative_pos()
  346. target_pos = (current_pos + 15) & ~15
  347. target_pos = max(target_pos, self.min_bytes_before_label)
  348. insert_nops = target_pos - current_pos
  349. assert 0 <= insert_nops <= 15
  350. for c in mc.MULTIBYTE_NOPs[insert_nops]:
  351. mc.writechar(c)
  352. def loc(self, v):
  353. if v is None: # xxx kludgy
  354. return None
  355. if v.type == FLOAT or v.is_vector():
  356. return self.xrm.loc(v)
  357. return self.rm.loc(v)
  358. def load_condition_into_cc(self, box):
  359. if self.assembler.guard_success_cc == rx86.cond_none:
  360. self.assembler.test_location(self.loc(box))
  361. self.assembler.guard_success_cc = rx86.Conditions['NZ']
  362. def _consider_guard_cc(self, op):
  363. arg = op.getarg(0)
  364. self.load_condition_into_cc(arg)
  365. self.perform_guard(op, [], None)
  366. consider_guard_true = _consider_guard_cc
  367. consider_guard_false = _consider_guard_cc
  368. consider_guard_nonnull = _consider_guard_cc
  369. consider_guard_isnull = _consider_guard_cc
  370. def consider_finish(self, op):
  371. # the frame is in ebp, but we have to point where in the frame is
  372. # the potential argument to FINISH
  373. if op.numargs() == 1:
  374. loc = self.make_sure_var_in_reg(op.getarg(0))
  375. locs = [loc]
  376. else:
  377. locs = []
  378. self.perform(op, locs, None)
  379. def consider_guard_no_exception(self, op):
  380. self.perform_guard(op, [], None)
  381. def consider_guard_not_invalidated(self, op):
  382. mc = self.assembler.mc
  383. n = mc.get_relative_pos()
  384. self.perform_guard(op, [], None)
  385. assert n == mc.get_relative_pos()
  386. # ensure that the next label is at least 5 bytes farther than
  387. # the current position. Otherwise, when invalidating the guard,
  388. # we would overwrite randomly the next label's position.
  389. self.ensure_next_label_is_at_least_at_position(n + 5)
  390. def consider_guard_exception(self, op):
  391. loc = self.rm.make_sure_var_in_reg(op.getarg(0))
  392. box = TempVar()
  393. args = op.getarglist()
  394. loc1 = self.rm.force_allocate_reg(box, args)
  395. if op in self.longevity:
  396. # this means, is it ever used
  397. resloc = self.rm.force_allocate_reg(op, args + [box])
  398. else:
  399. resloc = None
  400. self.perform_guard(op, [loc, loc1], resloc)
  401. self.rm.possibly_free_var(box)
  402. def consider_save_exception(self, op):
  403. resloc = self.rm.force_allocate_reg(op)
  404. self.perform(op, [], resloc)
  405. consider_save_exc_class = consider_save_exception
  406. def consider_restore_exception(self, op):
  407. args = op.getarglist()
  408. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0), args) # exc class
  409. loc1 = self.rm.make_sure_var_in_reg(op.getarg(1), args) # exc instance
  410. self.perform_discard(op, [loc0, loc1])
  411. consider_guard_no_overflow = consider_guard_no_exception
  412. consider_guard_overflow = consider_guard_no_exception
  413. consider_guard_not_forced = consider_guard_no_exception
  414. def consider_guard_value(self, op):
  415. x = self.make_sure_var_in_reg(op.getarg(0))
  416. loc = self.assembler.cpu.all_reg_indexes[x.value]
  417. op.getdescr().make_a_counter_per_value(op, loc)
  418. y = self.loc(op.getarg(1))
  419. self.perform_guard(op, [x, y], None)
  420. def consider_guard_class(self, op):
  421. assert not isinstance(op.getarg(0), Const)
  422. x = self.rm.make_sure_var_in_reg(op.getarg(0))
  423. y = self.loc(op.getarg(1))
  424. self.perform_guard(op, [x, y], None)
  425. consider_guard_nonnull_class = consider_guard_class
  426. consider_guard_gc_type = consider_guard_class
  427. def consider_guard_is_object(self, op):
  428. x = self.make_sure_var_in_reg(op.getarg(0))
  429. tmp_box = TempVar()
  430. y = self.rm.force_allocate_reg(tmp_box, [op.getarg(0)])
  431. self.rm.possibly_free_var(tmp_box)
  432. self.perform_guard(op, [x, y], None)
  433. def consider_guard_subclass(self, op):
  434. x = self.make_sure_var_in_reg(op.getarg(0))
  435. tmp_box = TempVar()
  436. z = self.rm.force_allocate_reg(tmp_box, [op.getarg(0)])
  437. y = self.loc(op.getarg(1))
  438. self.rm.possibly_free_var(tmp_box)
  439. self.perform_guard(op, [x, y, z], None)
  440. def _consider_binop_part(self, op, symm=False):
  441. x = op.getarg(0)
  442. y = op.getarg(1)
  443. argloc = self.loc(y)
  444. #
  445. # For symmetrical operations, if 'y' is already in a register
  446. # and won't be used after the current operation finishes,
  447. # then swap the role of 'x' and 'y'
  448. if (symm and isinstance(argloc, RegLoc) and
  449. self.rm.longevity[y][1] == self.rm.position):
  450. x, y = y, x
  451. argloc = self.loc(y)
  452. #
  453. args = op.getarglist()
  454. loc = self.rm.force_result_in_reg(op, x, args)
  455. return loc, argloc
  456. def _consider_binop(self, op):
  457. loc, argloc = self._consider_binop_part(op)
  458. self.perform(op, [loc, argloc], loc)
  459. def _consider_binop_symm(self, op):
  460. loc, argloc = self._consider_binop_part(op, symm=True)
  461. self.perform(op, [loc, argloc], loc)
  462. def _consider_lea(self, op, loc):
  463. argloc = self.loc(op.getarg(1))
  464. resloc = self.force_allocate_reg(op)
  465. self.perform(op, [loc, argloc], resloc)
  466. def consider_int_add(self, op):
  467. loc = self.loc(op.getarg(0))
  468. y = op.getarg(1)
  469. if (isinstance(loc, RegLoc) and
  470. isinstance(y, ConstInt) and rx86.fits_in_32bits(y.value)):
  471. self._consider_lea(op, loc)
  472. else:
  473. self._consider_binop_symm(op)
  474. consider_nursery_ptr_increment = consider_int_add
  475. def consider_int_sub(self, op):
  476. loc = self.loc(op.getarg(0))
  477. y = op.getarg(1)
  478. if (isinstance(loc, RegLoc) and
  479. isinstance(y, ConstInt) and rx86.fits_in_32bits(-y.value)):
  480. self._consider_lea(op, loc)
  481. else:
  482. self._consider_binop(op)
  483. consider_int_mul = _consider_binop_symm
  484. consider_int_and = _consider_binop_symm
  485. consider_int_or = _consider_binop_symm
  486. consider_int_xor = _consider_binop_symm
  487. consider_int_mul_ovf = _consider_binop_symm
  488. consider_int_sub_ovf = _consider_binop
  489. consider_int_add_ovf = _consider_binop_symm
  490. def consider_uint_mul_high(self, op):
  491. arg1, arg2 = op.getarglist()
  492. # should support all cases, but is optimized for (box, const)
  493. if isinstance(arg1, Const):
  494. arg1, arg2 = arg2, arg1
  495. self.rm.make_sure_var_in_reg(arg2, selected_reg=eax)
  496. l1 = self.loc(arg1)
  497. # l1 is a register != eax, or stack_bp; or, just possibly, it
  498. # can be == eax if arg1 is arg2
  499. assert not isinstance(l1, ImmedLoc)
  500. assert l1 is not eax or arg1 is arg2
  501. #
  502. # eax will be trash after the operation
  503. self.rm.possibly_free_var(arg2)
  504. tmpvar = TempVar()
  505. self.rm.force_allocate_reg(tmpvar, selected_reg=eax)
  506. self.rm.possibly_free_var(tmpvar)
  507. #
  508. self.rm.force_allocate_reg(op, selected_reg=edx)
  509. self.perform(op, [l1], edx)
  510. def consider_int_neg(self, op):
  511. res = self.rm.force_result_in_reg(op, op.getarg(0))
  512. self.perform(op, [res], res)
  513. consider_int_invert = consider_int_neg
  514. def consider_int_signext(self, op):
  515. argloc = self.loc(op.getarg(0))
  516. numbytesloc = self.loc(op.getarg(1))
  517. resloc = self.force_allocate_reg(op)
  518. self.perform(op, [argloc, numbytesloc], resloc)
  519. def consider_int_lshift(self, op):
  520. if isinstance(op.getarg(1), Const):
  521. loc2 = self.rm.convert_to_imm(op.getarg(1))
  522. else:
  523. loc2 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
  524. args = op.getarglist()
  525. loc1 = self.rm.force_result_in_reg(op, op.getarg(0), args)
  526. self.perform(op, [loc1, loc2], loc1)
  527. consider_int_rshift = consider_int_lshift
  528. consider_uint_rshift = consider_int_lshift
  529. def _consider_compop(self, op):
  530. vx = op.getarg(0)
  531. vy = op.getarg(1)
  532. arglocs = [self.loc(vx), self.loc(vy)]
  533. args = op.getarglist()
  534. if (vx in self.rm.reg_bindings or vy in self.rm.reg_bindings or
  535. isinstance(vx, Const) or isinstance(vy, Const)):
  536. pass
  537. else:
  538. arglocs[0] = self.rm.make_sure_var_in_reg(vx)
  539. loc = self.force_allocate_reg_or_cc(op)
  540. self.perform(op, arglocs, loc)
  541. consider_int_lt = _consider_compop
  542. consider_int_gt = _consider_compop
  543. consider_int_ge = _consider_compop
  544. consider_int_le = _consider_compop
  545. consider_int_ne = _consider_compop
  546. consider_int_eq = _consider_compop
  547. consider_uint_gt = _consider_compop
  548. consider_uint_lt = _consider_compop
  549. consider_uint_le = _consider_compop
  550. consider_uint_ge = _consider_compop
  551. consider_ptr_eq = consider_instance_ptr_eq = _consider_compop
  552. consider_ptr_ne = consider_instance_ptr_ne = _consider_compop
  553. def _consider_float_op(self, op):
  554. loc1 = self.xrm.loc(op.getarg(1))
  555. args = op.getarglist()
  556. loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args)
  557. self.perform(op, [loc0, loc1], loc0)
  558. consider_float_add = _consider_float_op # xxx could be _symm
  559. consider_float_sub = _consider_float_op
  560. consider_float_mul = _consider_float_op # xxx could be _symm
  561. consider_float_truediv = _consider_float_op
  562. def _consider_float_cmp(self, op):
  563. vx = op.getarg(0)
  564. vy = op.getarg(1)
  565. arglocs = [self.loc(vx), self.loc(vy)]
  566. if not (isinstance(arglocs[0], RegLoc) or
  567. isinstance(arglocs[1], RegLoc)):
  568. if isinstance(vx, Const):
  569. arglocs[1] = self.xrm.make_sure_var_in_reg(vy)
  570. else:
  571. arglocs[0] = self.xrm.make_sure_var_in_reg(vx)
  572. loc = self.force_allocate_reg_or_cc(op)
  573. self.perform(op, arglocs, loc)
  574. consider_float_lt = _consider_float_cmp
  575. consider_float_le = _consider_float_cmp
  576. consider_float_eq = _consider_float_cmp
  577. consider_float_ne = _consider_float_cmp
  578. consider_float_gt = _consider_float_cmp
  579. consider_float_ge = _consider_float_cmp
  580. def _consider_float_unary_op(self, op):
  581. loc0 = self.xrm.force_result_in_reg(op, op.getarg(0))
  582. self.perform(op, [loc0], loc0)
  583. consider_float_neg = _consider_float_unary_op
  584. consider_float_abs = _consider_float_unary_op
  585. def consider_cast_float_to_int(self, op):
  586. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  587. loc1 = self.rm.force_allocate_reg(op)
  588. self.perform(op, [loc0], loc1)
  589. def consider_cast_int_to_float(self, op):
  590. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
  591. loc1 = self.xrm.force_allocate_reg(op)
  592. self.perform(op, [loc0], loc1)
  593. def consider_cast_float_to_singlefloat(self, op):
  594. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  595. loc1 = self.rm.force_allocate_reg(op)
  596. tmpxvar = TempVar()
  597. loctmp = self.xrm.force_allocate_reg(tmpxvar) # may be equal to loc0
  598. self.xrm.possibly_free_var(tmpxvar)
  599. self.perform(op, [loc0, loctmp], loc1)
  600. consider_cast_singlefloat_to_float = consider_cast_int_to_float
  601. def consider_convert_float_bytes_to_longlong(self, op):
  602. if longlong.is_64_bit:
  603. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  604. loc1 = self.rm.force_allocate_reg(op)
  605. self.perform(op, [loc0], loc1)
  606. else:
  607. arg0 = op.getarg(0)
  608. loc0 = self.xrm.loc(arg0)
  609. loc1 = self.xrm.force_allocate_reg(op, forbidden_vars=[arg0])
  610. self.perform(op, [loc0], loc1)
  611. def consider_convert_longlong_bytes_to_float(self, op):
  612. if longlong.is_64_bit:
  613. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
  614. loc1 = self.xrm.force_allocate_reg(op)
  615. self.perform(op, [loc0], loc1)
  616. else:
  617. arg0 = op.getarg(0)
  618. loc0 = self.xrm.make_sure_var_in_reg(arg0)
  619. loc1 = self.xrm.force_allocate_reg(op, forbidden_vars=[arg0])
  620. self.perform(op, [loc0], loc1)
  621. def _consider_llong_binop_xx(self, op):
  622. # must force both arguments into xmm registers, because we don't
  623. # know if they will be suitably aligned. Exception: if the second
  624. # argument is a constant, we can ask it to be aligned to 16 bytes.
  625. # xxx some of these operations could be '_symm'.
  626. args = [op.getarg(1), op.getarg(2)]
  627. loc1 = self.load_xmm_aligned_16_bytes(args[1])
  628. loc0 = self.xrm.force_result_in_reg(op, args[0], args)
  629. self.perform_llong(op, [loc0, loc1], loc0)
  630. def _consider_llong_eq_ne_xx(self, op):
  631. # must force both arguments into xmm registers, because we don't
  632. # know if they will be suitably aligned. Exception: if they are
  633. # constants, we can ask them to be aligned to 16 bytes.
  634. args = [op.getarg(1), op.getarg(2)]
  635. loc1 = self.load_xmm_aligned_16_bytes(args[0])
  636. loc2 = self.load_xmm_aligned_16_bytes(args[1], args)
  637. tmpxvar = TempVar()
  638. loc3 = self.xrm.force_allocate_reg(tmpxvar, args)
  639. self.xrm.possibly_free_var(tmpxvar)
  640. loc0 = self.rm.force_allocate_reg(op, need_lower_byte=True)
  641. self.perform_llong(op, [loc1, loc2, loc3], loc0)
  642. def _maybe_consider_llong_lt(self, op):
  643. # XXX just a special case for now
  644. box = op.getarg(2)
  645. if not isinstance(box, ConstFloat):
  646. return False
  647. if box.getfloat() != 0.0: # NaNs are also != 0.0
  648. return False
  649. # "x < 0.0" or maybe "x < -0.0" which is the same
  650. box = op.getarg(1)
  651. assert box.type == FLOAT
  652. loc1 = self.xrm.make_sure_var_in_reg(box)
  653. loc0 = self.rm.force_allocate_reg(op)
  654. self.perform_llong(op, [loc1], loc0)
  655. return True
  656. def _consider_llong_to_int(self, op):
  657. # accept an argument in a xmm register or in the stack
  658. loc1 = self.xrm.loc(op.getarg(1))
  659. loc0 = self.rm.force_allocate_reg(op)
  660. self.perform_llong(op, [loc1], loc0)
  661. def _loc_of_const_longlong(self, value64):
  662. c = ConstFloat(value64)
  663. return self.xrm.convert_to_imm(c)
  664. def _consider_llong_from_int(self, op):
  665. assert IS_X86_32
  666. loc0 = self.xrm.force_allocate_reg(op)
  667. box = op.getarg(1)
  668. if isinstance(box, ConstInt):
  669. loc1 = self._loc_of_const_longlong(r_longlong(box.value))
  670. loc2 = None # unused
  671. else:
  672. loc1 = self.rm.make_sure_var_in_reg(box)
  673. tmpxvar = TempVar()
  674. loc2 = self.xrm.force_allocate_reg(tmpxvar, [op])
  675. self.xrm.possibly_free_var(tmpxvar)
  676. self.perform_llong(op, [loc1, loc2], loc0)
  677. def _consider_llong_from_uint(self, op):
  678. assert IS_X86_32
  679. loc0 = self.xrm.force_allocate_reg(op)
  680. loc1 = self.rm.make_sure_var_in_reg(op.getarg(1))
  681. self.perform_llong(op, [loc1], loc0)
  682. def _consider_math_sqrt(self, op):
  683. loc0 = self.xrm.force_result_in_reg(op, op.getarg(1))
  684. self.perform_math(op, [loc0], loc0)
  685. def _consider_threadlocalref_get(self, op):
  686. if self.translate_support_code:
  687. offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get'
  688. calldescr = op.getdescr()
  689. size = calldescr.get_result_size()
  690. sign = calldescr.is_result_signed()
  691. resloc = self.force_allocate_reg(op)
  692. self.assembler.threadlocalref_get(offset, resloc, size, sign)
  693. else:
  694. self._consider_call(op)
  695. def _call(self, op, arglocs, gc_level):
  696. # we need to save registers on the stack:
  697. #
  698. # - at least the non-callee-saved registers
  699. #
  700. # - if gc_level > 0, we save also the callee-saved registers that
  701. # contain GC pointers
  702. #
  703. # - gc_level == 2 for CALL_MAY_FORCE or CALL_ASSEMBLER. We
  704. # have to save all regs anyway, in case we need to do
  705. # cpu.force(). The issue is that grab_frame_values() would
  706. # not be able to locate values in callee-saved registers.
  707. #
  708. save_all_regs = gc_level == 2
  709. self.xrm.before_call(save_all_regs=save_all_regs)
  710. if gc_level == 1:
  711. gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
  712. # we save all the registers for shadowstack and asmgcc for now
  713. # --- for asmgcc too: we can't say "register x is a gc ref"
  714. # without distinguishing call sites, which we don't do any
  715. # more for now.
  716. if gcrootmap: # and gcrootmap.is_shadow_stack:
  717. save_all_regs = 2
  718. self.rm.before_call(save_all_regs=save_all_regs)
  719. if op.type != 'v':
  720. if op.type == FLOAT:
  721. resloc = self.xrm.after_call(op)
  722. else:
  723. resloc = self.rm.after_call(op)
  724. else:
  725. resloc = None
  726. self.perform(op, arglocs, resloc)
  727. def _consider_call(self, op, guard_not_forced=False, first_arg_index=1):
  728. calldescr = op.getdescr()
  729. assert isinstance(calldescr, CallDescr)
  730. assert len(calldescr.arg_classes) == op.numargs() - first_arg_index
  731. size = calldescr.get_result_size()
  732. sign = calldescr.is_result_signed()
  733. if sign:
  734. sign_loc = imm1
  735. else:
  736. sign_loc = imm0
  737. #
  738. effectinfo = calldescr.get_extra_info()
  739. if guard_not_forced:
  740. gc_level = 2
  741. elif effectinfo is None or effectinfo.check_can_collect():
  742. gc_level = 1
  743. else:
  744. gc_level = 0
  745. #
  746. self._call(op, [imm(size), sign_loc] +
  747. [self.loc(op.getarg(i)) for i in range(op.numargs())],
  748. gc_level=gc_level)
  749. def _consider_real_call(self, op):
  750. effectinfo = op.getdescr().get_extra_info()
  751. assert effectinfo is not None
  752. oopspecindex = effectinfo.oopspecindex
  753. if oopspecindex != EffectInfo.OS_NONE:
  754. if IS_X86_32:
  755. # support for some of the llong operations,
  756. # which only exist on x86-32
  757. if oopspecindex in (EffectInfo.OS_LLONG_ADD,
  758. EffectInfo.OS_LLONG_SUB,
  759. EffectInfo.OS_LLONG_AND,
  760. EffectInfo.OS_LLONG_OR,
  761. EffectInfo.OS_LLONG_XOR):
  762. return self._consider_llong_binop_xx(op)
  763. if oopspecindex == EffectInfo.OS_LLONG_TO_INT:
  764. return self._consider_llong_to_int(op)
  765. if oopspecindex == EffectInfo.OS_LLONG_FROM_INT:
  766. return self._consider_llong_from_int(op)
  767. if oopspecindex == EffectInfo.OS_LLONG_FROM_UINT:
  768. return self._consider_llong_from_uint(op)
  769. if (oopspecindex == EffectInfo.OS_LLONG_EQ or
  770. oopspecindex == EffectInfo.OS_LLONG_NE):
  771. return self._consider_llong_eq_ne_xx(op)
  772. if oopspecindex == EffectInfo.OS_LLONG_LT:
  773. if self._maybe_consider_llong_lt(op):
  774. return
  775. if oopspecindex == EffectInfo.OS_MATH_SQRT:
  776. return self._consider_math_sqrt(op)
  777. if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
  778. return self._consider_threadlocalref_get(op)
  779. if oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
  780. return self._consider_math_read_timestamp(op)
  781. self._consider_call(op)
  782. consider_call_i = _consider_real_call
  783. consider_call_r = _consider_real_call
  784. consider_call_f = _consider_real_call
  785. consider_call_n = _consider_real_call
  786. def _consider_call_may_force(self, op):
  787. self._consider_call(op, guard_not_forced=True)
  788. consider_call_may_force_i = _consider_call_may_force
  789. consider_call_may_force_r = _consider_call_may_force
  790. consider_call_may_force_f = _consider_call_may_force
  791. consider_call_may_force_n = _consider_call_may_force
  792. def _consider_call_release_gil(self, op):
  793. # [Const(save_err), func_addr, args...]
  794. self._consider_call(op, guard_not_forced=True, first_arg_index=2)
  795. consider_call_release_gil_i = _consider_call_release_gil
  796. consider_call_release_gil_f = _consider_call_release_gil
  797. consider_call_release_gil_n = _consider_call_release_gil
  798. def consider_check_memory_error(self, op):
  799. x = self.rm.make_sure_var_in_reg(op.getarg(0))
  800. self.perform_discard(op, [x])
  801. def _consider_call_assembler(self, op):
  802. locs = self.locs_for_call_assembler(op)
  803. self._call(op, locs, gc_level=2)
  804. consider_call_assembler_i = _consider_call_assembler
  805. consider_call_assembler_r = _consider_call_assembler
  806. consider_call_assembler_f = _consider_call_assembler
  807. consider_call_assembler_n = _consider_call_assembler
  808. def consider_cond_call_gc_wb(self, op):
  809. assert op.type == 'v'
  810. args = op.getarglist()
  811. N = len(args)
  812. # we force all arguments in a reg (unless they are Consts),
  813. # because it will be needed anyway by the following gc_load
  814. # It avoids loading it twice from the memory.
  815. arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
  816. for i in range(N)]
  817. self.perform_discard(op, arglocs)
  818. consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
  819. def consider_cond_call(self, op):
  820. # A 32-bit-only, asmgcc-only issue: 'cond_call_register_arguments'
  821. # contains edi and esi, which are also in asmgcroot.py:ASM_FRAMEDATA.
  822. # We must make sure that edi and esi do not contain GC pointers.
  823. if IS_X86_32 and self.assembler._is_asmgcc():
  824. for box, loc in self.rm.reg_bindings.items():
  825. if (loc == edi or loc == esi) and box.type == REF:
  826. self.rm.force_spill_var(box)
  827. assert box not in self.rm.reg_bindings
  828. #
  829. assert op.type == 'v'
  830. args = op.getarglist()
  831. assert 2 <= len(args) <= 4 + 2 # maximum 4 arguments
  832. v = args[1]
  833. assert isinstance(v, Const)
  834. imm_func = self.rm.convert_to_imm(v)
  835. arglocs = [self.loc(args[i]) for i in range(2, len(args))]
  836. gcmap = self.get_gcmap()
  837. self.load_condition_into_cc(op.getarg(0))
  838. self.assembler.cond_call(op, gcmap, imm_func, arglocs)
  839. def consider_call_malloc_nursery(self, op):
  840. size_box = op.getarg(0)
  841. assert isinstance(size_box, ConstInt)
  842. size = size_box.getint()
  843. # hint: try to move unrelated registers away from eax and edx now
  844. self.rm.spill_or_move_registers_before_call([ecx, edx])
  845. # the result will be in ecx
  846. self.rm.force_allocate_reg(op, selected_reg=ecx)
  847. #
  848. # We need edx as a temporary, but otherwise don't save any more
  849. # register. See comments in _build_malloc_slowpath().
  850. tmp_box = TempVar()
  851. self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
  852. gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
  853. self.rm.possibly_free_var(tmp_box)
  854. #
  855. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  856. self.assembler.malloc_cond(
  857. gc_ll_descr.get_nursery_free_addr(),
  858. gc_ll_descr.get_nursery_top_addr(),
  859. size, gcmap)
  860. def consider_call_malloc_nursery_varsize_frame(self, op):
  861. size_box = op.getarg(0)
  862. assert not isinstance(size_box, Const) # we cannot have a const here!
  863. # sizeloc must be in a register, but we can free it now
  864. # (we take care explicitly of conflicts with ecx or edx)
  865. sizeloc = self.rm.make_sure_var_in_reg(size_box)
  866. self.rm.spill_or_move_registers_before_call([ecx, edx]) # sizeloc safe
  867. self.rm.possibly_free_var(size_box)
  868. # the result will be in ecx
  869. self.rm.force_allocate_reg(op, selected_reg=ecx)
  870. # we need edx as a temporary
  871. tmp_box = TempVar()
  872. self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
  873. gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
  874. self.rm.possibly_free_var(tmp_box)
  875. #
  876. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  877. self.assembler.malloc_cond_varsize_frame(
  878. gc_ll_descr.get_nursery_free_addr(),
  879. gc_ll_descr.get_nursery_top_addr(),
  880. sizeloc, gcmap)
  881. def consider_call_malloc_nursery_varsize(self, op):
  882. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  883. if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
  884. raise Exception("unreachable code")
  885. # for boehm, this function should never be called
  886. arraydescr = op.getdescr()
  887. length_box = op.getarg(2)
  888. assert not isinstance(length_box, Const) # we cannot have a const here!
  889. # can only use spill_or_move_registers_before_call() as a hint if
  890. # we are sure that length_box stays alive and won't be freed now
  891. # (it should always be the case, see below, but better safe than sorry)
  892. if self.rm.stays_alive(length_box):
  893. self.rm.spill_or_move_registers_before_call([ecx, edx])
  894. # the result will be in ecx
  895. self.rm.force_allocate_reg(op, selected_reg=ecx)
  896. # we need edx as a temporary
  897. tmp_box = TempVar()
  898. self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
  899. gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
  900. self.rm.possibly_free_var(tmp_box)
  901. # length_box always survives: it's typically also present in the
  902. # next operation that will copy it inside the new array. It's
  903. # fine to load it from the stack too, as long as it is != ecx, edx.
  904. lengthloc = self.rm.loc(length_box)
  905. self.rm.possibly_free_var(length_box)
  906. #
  907. itemsize = op.getarg(1).getint()
  908. maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2)
  909. self.assembler.malloc_cond_varsize(
  910. op.getarg(0).getint(),
  911. gc_ll_descr.get_nursery_free_addr(),
  912. gc_ll_descr.get_nursery_top_addr(),
  913. lengthloc, itemsize, maxlength, gcmap, arraydescr)
  914. def get_gcmap(self, forbidden_regs=[], noregs=False):
  915. frame_depth = self.fm.get_frame_depth()
  916. gcmap = allocate_gcmap(self.assembler, frame_depth, JITFRAME_FIXED_SIZE)
  917. for box, loc in self.rm.reg_bindings.iteritems():
  918. if loc in forbidden_regs:
  919. continue
  920. if box.type == REF and self.rm.is_still_alive(box):
  921. assert not noregs
  922. assert isinstance(loc, RegLoc)
  923. val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
  924. gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
  925. for box, loc in self.fm.bindings.iteritems():
  926. if box.type == REF and self.rm.is_still_alive(box):
  927. assert isinstance(loc, FrameLoc)
  928. val = loc.position + JITFRAME_FIXED_SIZE
  929. gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
  930. return gcmap
  931. def consider_gc_store(self, op):
  932. args = op.getarglist()
  933. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  934. size_box = op.getarg(3)
  935. assert isinstance(size_box, ConstInt)
  936. size = size_box.value
  937. assert size >= 1
  938. if size == 1:
  939. need_lower_byte = True
  940. else:
  941. need_lower_byte = False
  942. value_loc = self.make_sure_var_in_reg(op.getarg(2), args,
  943. need_lower_byte=need_lower_byte)
  944. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  945. self.perform_discard(op, [base_loc, ofs_loc, value_loc,
  946. imm(size)])
  947. def consider_gc_store_indexed(self, op):
  948. args = op.getarglist()
  949. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  950. scale_box = op.getarg(3)
  951. offset_box = op.getarg(4)
  952. size_box = op.getarg(5)
  953. assert isinstance(scale_box, ConstInt)
  954. assert isinstance(offset_box, ConstInt)
  955. assert isinstance(size_box, ConstInt)
  956. factor = scale_box.value
  957. offset = offset_box.value
  958. size = size_box.value
  959. assert size >= 1
  960. if size == 1:
  961. need_lower_byte = True
  962. else:
  963. need_lower_byte = False
  964. value_loc = self.make_sure_var_in_reg(op.getarg(2), args,
  965. need_lower_byte=need_lower_byte)
  966. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  967. self.perform_discard(op, [base_loc, ofs_loc, value_loc,
  968. imm(factor), imm(offset), imm(size)])
  969. def consider_increment_debug_counter(self, op):
  970. base_loc = self.loc(op.getarg(0))
  971. self.perform_discard(op, [base_loc])
  972. def _consider_gc_load(self, op):
  973. args = op.getarglist()
  974. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  975. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  976. result_loc = self.force_allocate_reg(op)
  977. size_box = op.getarg(2)
  978. assert isinstance(size_box, ConstInt)
  979. nsize = size_box.value # negative for "signed"
  980. size_loc = imm(abs(nsize))
  981. if nsize < 0:
  982. sign_loc = imm1
  983. else:
  984. sign_loc = imm0
  985. self.perform(op, [base_loc, ofs_loc, size_loc, sign_loc], result_loc)
  986. consider_gc_load_i = _consider_gc_load
  987. consider_gc_load_r = _consider_gc_load
  988. consider_gc_load_f = _consider_gc_load
  989. def _consider_gc_load_indexed(self, op):
  990. args = op.getarglist()
  991. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  992. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  993. result_loc = self.force_allocate_reg(op)
  994. scale_box = op.getarg(2)
  995. offset_box = op.getarg(3)
  996. size_box = op.getarg(4)
  997. assert isinstance(scale_box, ConstInt)
  998. assert isinstance(offset_box, ConstInt)
  999. assert isinstance(size_box, ConstInt)
  1000. scale = scale_box.value
  1001. offset = offset_box.value
  1002. nsize = size_box.value # negative for "signed"
  1003. size_loc = imm(abs(nsize))
  1004. if nsize < 0:
  1005. sign_loc = imm1
  1006. else:
  1007. sign_loc = imm0
  1008. locs = [base_loc, ofs_loc, imm(scale), imm(offset), size_loc, sign_loc]
  1009. self.perform(op, locs, result_loc)
  1010. consider_gc_load_indexed_i = _consider_gc_load_indexed
  1011. consider_gc_load_indexed_r = _consider_gc_load_indexed
  1012. consider_gc_load_indexed_f = _consider_gc_load_indexed
  1013. def consider_int_is_true(self, op):
  1014. # doesn't need arg to be in a register
  1015. argloc = self.loc(op.getarg(0))
  1016. resloc = self.force_allocate_reg_or_cc(op)
  1017. self.perform(op, [argloc], resloc)
  1018. consider_int_is_zero = consider_int_is_true
  1019. def _consider_same_as(self, op):
  1020. argloc = self.loc(op.getarg(0))
  1021. resloc = self.force_allocate_reg(op)
  1022. self.perform(op, [argloc], resloc)
  1023. consider_cast_ptr_to_int = _consider_same_as
  1024. consider_cast_int_to_ptr = _consider_same_as
  1025. consider_same_as_i = _consider_same_as
  1026. consider_same_as_r = _consider_same_as
  1027. consider_same_as_f = _consider_same_as
  1028. def consider_load_from_gc_table(self, op):
  1029. resloc = self.rm.force_allocate_reg(op)
  1030. self.perform(op, [], resloc)
  1031. def consider_int_force_ge_zero(self, op):
  1032. argloc = self.make_sure_var_in_reg(op.getarg(0))
  1033. resloc = self.force_allocate_reg(op, [op.getarg(0)])
  1034. self.perform(op, [argloc], resloc)
  1035. def consider_copystrcontent(self, op):
  1036. self._consider_copystrcontent(op, is_unicode=False)
  1037. def consider_copyunicodecontent(self, op):
  1038. self._consider_copystrcontent(op, is_unicode=True)
  1039. def _consider_copystrcontent(self, op, is_unicode):
  1040. # compute the source address
  1041. args = op.getarglist()
  1042. base_loc = self.rm.make_sure_var_in_reg(args[0], args)
  1043. ofs_loc = self.rm.make_sure_var_in_reg(args[2], args)
  1044. assert args[0] is not args[1] # forbidden case of aliasing
  1045. srcaddr_box = TempVar()
  1046. forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
  1047. srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
  1048. self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
  1049. is_unicode=is_unicode)
  1050. # compute the destination address
  1051. base_loc = self.rm.make_sure_var_in_reg(args[1], forbidden_vars)
  1052. ofs_loc = self.rm.make_sure_var_in_reg(args[3], forbidden_vars)
  1053. forbidden_vars = [args[4], srcaddr_box]
  1054. dstaddr_box = TempVar()
  1055. dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
  1056. self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
  1057. is_unicode=is_unicode)
  1058. # compute the length in bytes
  1059. length_box = args[4]
  1060. length_loc = self.loc(length_box)
  1061. if is_unicode:
  1062. forbidden_vars = [srcaddr_box, dstaddr_box]
  1063. bytes_box = TempVar()
  1064. bytes_loc = self.rm.force_allocate_reg(bytes_box, forbidden_vars

Large files files are truncated, but you can click here to view the full file