PageRenderTime 50ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/jit/backend/x86/regalloc.py

https://bitbucket.org/pypy/pypy/
Python | 1527 lines | 1202 code | 156 blank | 169 comment | 226 complexity | d71ae4695ed4e0beaf71f3d838627a99 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """ Register allocation scheme.
  2. """
  3. import os
  4. from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
  5. ResOperation, BoxPtr, ConstFloat,
  6. BoxFloat, INT, REF, FLOAT,
  7. TargetToken, JitCellToken)
  8. from pypy.jit.backend.x86.regloc import *
  9. from pypy.rpython.lltypesystem import lltype, rffi, rstr
  10. from pypy.rlib.objectmodel import we_are_translated
  11. from pypy.rlib import rgc
  12. from pypy.jit.backend.llsupport import symbolic
  13. from pypy.jit.backend.x86.jump import remap_frame_layout_mixed
  14. from pypy.jit.codewriter import heaptracker, longlong
  15. from pypy.jit.codewriter.effectinfo import EffectInfo
  16. from pypy.jit.metainterp.resoperation import rop
  17. from pypy.jit.backend.llsupport.descr import FieldDescr, ArrayDescr
  18. from pypy.jit.backend.llsupport.descr import CallDescr, SizeDescr
  19. from pypy.jit.backend.llsupport.descr import InteriorFieldDescr
  20. from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
  21. TempBox
  22. from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
  23. from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
  24. from pypy.rlib.rarithmetic import r_longlong
  25. class X86RegisterManager(RegisterManager):
  26. box_types = [INT, REF]
  27. all_regs = [ecx, eax, edx, ebx, esi, edi]
  28. no_lower_byte_regs = [esi, edi]
  29. save_around_call_regs = [eax, edx, ecx]
  30. frame_reg = ebp
  31. REGLOC_TO_GCROOTMAP_REG_INDEX = {
  32. ebx: 1,
  33. esi: 2,
  34. edi: 3,
  35. }
  36. REGLOC_TO_COPY_AREA_OFS = {
  37. ecx: MY_COPY_OF_REGS + 0 * WORD,
  38. ebx: MY_COPY_OF_REGS + 1 * WORD,
  39. esi: MY_COPY_OF_REGS + 2 * WORD,
  40. edi: MY_COPY_OF_REGS + 3 * WORD,
  41. }
  42. def call_result_location(self, v):
  43. return eax
  44. def convert_to_imm(self, c):
  45. if isinstance(c, ConstInt):
  46. return imm(c.value)
  47. elif isinstance(c, ConstPtr):
  48. if we_are_translated() and c.value and rgc.can_move(c.value):
  49. not_implemented("convert_to_imm: ConstPtr needs special care")
  50. return imm(rffi.cast(lltype.Signed, c.value))
  51. else:
  52. not_implemented("convert_to_imm: got a %s" % c)
  53. class X86_64_RegisterManager(X86RegisterManager):
  54. # r11 omitted because it's used as scratch
  55. all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
  56. no_lower_byte_regs = []
  57. save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
  58. REGLOC_TO_GCROOTMAP_REG_INDEX = {
  59. ebx: 1,
  60. r12: 2,
  61. r13: 3,
  62. r14: 4,
  63. r15: 5,
  64. }
  65. REGLOC_TO_COPY_AREA_OFS = {
  66. ecx: MY_COPY_OF_REGS + 0 * WORD,
  67. ebx: MY_COPY_OF_REGS + 1 * WORD,
  68. esi: MY_COPY_OF_REGS + 2 * WORD,
  69. edi: MY_COPY_OF_REGS + 3 * WORD,
  70. r8: MY_COPY_OF_REGS + 4 * WORD,
  71. r9: MY_COPY_OF_REGS + 5 * WORD,
  72. r10: MY_COPY_OF_REGS + 6 * WORD,
  73. r12: MY_COPY_OF_REGS + 7 * WORD,
  74. r13: MY_COPY_OF_REGS + 8 * WORD,
  75. r14: MY_COPY_OF_REGS + 9 * WORD,
  76. r15: MY_COPY_OF_REGS + 10 * WORD,
  77. }
  78. class X86XMMRegisterManager(RegisterManager):
  79. box_types = [FLOAT]
  80. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
  81. # we never need lower byte I hope
  82. save_around_call_regs = all_regs
  83. def convert_to_imm(self, c):
  84. adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
  85. x = c.getfloatstorage()
  86. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  87. return ConstFloatLoc(adr)
  88. def convert_to_imm_16bytes_align(self, c):
  89. adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
  90. x = c.getfloatstorage()
  91. y = longlong.ZEROF
  92. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  93. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y
  94. return ConstFloatLoc(adr)
  95. def after_call(self, v):
  96. # the result is stored in st0, but we don't have this around,
  97. # so genop_call will move it to some frame location immediately
  98. # after the call
  99. return self.frame_manager.loc(v)
  100. class X86_64_XMMRegisterManager(X86XMMRegisterManager):
  101. # xmm15 reserved for scratch use
  102. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
  103. save_around_call_regs = all_regs
  104. def call_result_location(self, v):
  105. return xmm0
  106. def after_call(self, v):
  107. # We use RegisterManager's implementation, since X86XMMRegisterManager
  108. # places the result on the stack, which we don't need to do when the
  109. # calling convention places the result in xmm0
  110. return RegisterManager.after_call(self, v)
  111. class X86FrameManager(FrameManager):
  112. @staticmethod
  113. def frame_pos(i, box_type):
  114. if IS_X86_32 and box_type == FLOAT:
  115. return StackLoc(i, get_ebp_ofs(i+1), box_type)
  116. else:
  117. return StackLoc(i, get_ebp_ofs(i), box_type)
  118. @staticmethod
  119. def frame_size(box_type):
  120. if IS_X86_32 and box_type == FLOAT:
  121. return 2
  122. else:
  123. return 1
  124. @staticmethod
  125. def get_loc_index(loc):
  126. assert isinstance(loc, StackLoc)
  127. return loc.position
  128. if WORD == 4:
  129. gpr_reg_mgr_cls = X86RegisterManager
  130. xmm_reg_mgr_cls = X86XMMRegisterManager
  131. elif WORD == 8:
  132. gpr_reg_mgr_cls = X86_64_RegisterManager
  133. xmm_reg_mgr_cls = X86_64_XMMRegisterManager
  134. else:
  135. raise AssertionError("Word size should be 4 or 8")
  136. class RegAlloc(object):
  137. def __init__(self, assembler, translate_support_code=False):
  138. assert isinstance(translate_support_code, bool)
  139. # variables that have place in register
  140. self.assembler = assembler
  141. self.translate_support_code = translate_support_code
  142. # to be read/used by the assembler too
  143. self.jump_target_descr = None
  144. self.close_stack_struct = 0
  145. self.final_jump_op = None
  146. self.min_bytes_before_label = 0
  147. def _prepare(self, inputargs, operations, allgcrefs):
  148. self.fm = X86FrameManager()
  149. self.param_depth = 0
  150. cpu = self.assembler.cpu
  151. operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
  152. allgcrefs)
  153. # compute longevity of variables
  154. self._compute_vars_longevity(inputargs, operations)
  155. self.rm = gpr_reg_mgr_cls(self.longevity,
  156. frame_manager = self.fm,
  157. assembler = self.assembler)
  158. self.xrm = xmm_reg_mgr_cls(self.longevity, frame_manager = self.fm,
  159. assembler = self.assembler)
  160. return operations
  161. def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
  162. operations = self._prepare(inputargs, operations, allgcrefs)
  163. self._set_initial_bindings(inputargs)
  164. # note: we need to make a copy of inputargs because possibly_free_vars
  165. # is also used on op args, which is a non-resizable list
  166. self.possibly_free_vars(list(inputargs))
  167. if WORD == 4: # see redirect_call_assembler()
  168. self.min_bytes_before_label = 5
  169. else:
  170. self.min_bytes_before_label = 13
  171. return operations
  172. def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
  173. allgcrefs):
  174. operations = self._prepare(inputargs, operations, allgcrefs)
  175. self._update_bindings(arglocs, inputargs)
  176. self.param_depth = prev_depths[1]
  177. return operations
  178. def reserve_param(self, n):
  179. self.param_depth = max(self.param_depth, n)
  180. def _set_initial_bindings(self, inputargs):
  181. if IS_X86_64:
  182. inputargs = self._set_initial_bindings_regs_64(inputargs)
  183. # ...
  184. # stack layout: arg2
  185. # arg1
  186. # arg0
  187. # return address
  188. # saved ebp <-- ebp points here
  189. # ...
  190. cur_frame_pos = - 1 - FRAME_FIXED_SIZE
  191. assert get_ebp_ofs(cur_frame_pos-1) == 2*WORD
  192. assert get_ebp_ofs(cur_frame_pos-2) == 3*WORD
  193. #
  194. for box in inputargs:
  195. assert isinstance(box, Box)
  196. #
  197. if IS_X86_32 and box.type == FLOAT:
  198. cur_frame_pos -= 2
  199. else:
  200. cur_frame_pos -= 1
  201. loc = self.fm.frame_pos(cur_frame_pos, box.type)
  202. self.fm.set_binding(box, loc)
  203. def _set_initial_bindings_regs_64(self, inputargs):
  204. # In reverse order for use with pop()
  205. unused_gpr = [r9, r8, ecx, edx, esi, edi]
  206. unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
  207. #
  208. pass_on_stack = []
  209. #
  210. for box in inputargs:
  211. assert isinstance(box, Box)
  212. #
  213. if box.type == FLOAT:
  214. if len(unused_xmm) > 0:
  215. ask = unused_xmm.pop()
  216. got = self.xrm.try_allocate_reg(box, selected_reg=ask)
  217. assert ask == got
  218. else:
  219. pass_on_stack.append(box)
  220. else:
  221. if len(unused_gpr) > 0:
  222. ask = unused_gpr.pop()
  223. got = self.rm.try_allocate_reg(box, selected_reg=ask)
  224. assert ask == got
  225. else:
  226. pass_on_stack.append(box)
  227. #
  228. return pass_on_stack
  229. def possibly_free_var(self, var):
  230. if var.type == FLOAT:
  231. self.xrm.possibly_free_var(var)
  232. else:
  233. self.rm.possibly_free_var(var)
  234. def possibly_free_vars_for_op(self, op):
  235. for i in range(op.numargs()):
  236. var = op.getarg(i)
  237. if var is not None: # xxx kludgy
  238. self.possibly_free_var(var)
  239. def possibly_free_vars(self, vars):
  240. for var in vars:
  241. if var is not None: # xxx kludgy
  242. self.possibly_free_var(var)
  243. def make_sure_var_in_reg(self, var, forbidden_vars=[],
  244. selected_reg=None, need_lower_byte=False):
  245. if var.type == FLOAT:
  246. if isinstance(var, ConstFloat):
  247. return FloatImmedLoc(var.getfloatstorage())
  248. return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
  249. selected_reg, need_lower_byte)
  250. else:
  251. return self.rm.make_sure_var_in_reg(var, forbidden_vars,
  252. selected_reg, need_lower_byte)
  253. def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
  254. need_lower_byte=False):
  255. if var.type == FLOAT:
  256. return self.xrm.force_allocate_reg(var, forbidden_vars,
  257. selected_reg, need_lower_byte)
  258. else:
  259. return self.rm.force_allocate_reg(var, forbidden_vars,
  260. selected_reg, need_lower_byte)
  261. def force_spill_var(self, var):
  262. if var.type == FLOAT:
  263. return self.xrm.force_spill_var(var)
  264. else:
  265. return self.rm.force_spill_var(var)
  266. def load_xmm_aligned_16_bytes(self, var, forbidden_vars=[]):
  267. # Load 'var' in a register; but if it is a constant, we can return
  268. # a 16-bytes-aligned ConstFloatLoc.
  269. if isinstance(var, Const):
  270. return self.xrm.convert_to_imm_16bytes_align(var)
  271. else:
  272. return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
  273. #def _compute_loop_consts(self, inputargs, jump, looptoken):
  274. # if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
  275. # loop_consts = {}
  276. # else:
  277. # loop_consts = {}
  278. # for i in range(len(inputargs)):
  279. # if inputargs[i] is jump.getarg(i):
  280. # loop_consts[inputargs[i]] = i
  281. # return loop_consts
  282. def _update_bindings(self, locs, inputargs):
  283. # XXX this should probably go to llsupport/regalloc.py
  284. used = {}
  285. i = 0
  286. for loc in locs:
  287. if loc is None: # xxx bit kludgy
  288. continue
  289. arg = inputargs[i]
  290. i += 1
  291. if arg.type == FLOAT:
  292. if isinstance(loc, RegLoc):
  293. self.xrm.reg_bindings[arg] = loc
  294. used[loc] = None
  295. else:
  296. self.fm.set_binding(arg, loc)
  297. else:
  298. if isinstance(loc, RegLoc):
  299. if loc is ebp:
  300. self.rm.bindings_to_frame_reg[arg] = None
  301. else:
  302. self.rm.reg_bindings[arg] = loc
  303. used[loc] = None
  304. else:
  305. self.fm.set_binding(arg, loc)
  306. self.rm.free_regs = []
  307. for reg in self.rm.all_regs:
  308. if reg not in used:
  309. self.rm.free_regs.append(reg)
  310. self.xrm.free_regs = []
  311. for reg in self.xrm.all_regs:
  312. if reg not in used:
  313. self.xrm.free_regs.append(reg)
  314. # note: we need to make a copy of inputargs because possibly_free_vars
  315. # is also used on op args, which is a non-resizable list
  316. self.possibly_free_vars(list(inputargs))
  317. self.rm._check_invariants()
  318. self.xrm._check_invariants()
  319. def Perform(self, op, arglocs, result_loc):
  320. if not we_are_translated():
  321. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  322. self.assembler.regalloc_perform(op, arglocs, result_loc)
  323. def PerformLLong(self, op, arglocs, result_loc):
  324. if not we_are_translated():
  325. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  326. self.assembler.regalloc_perform_llong(op, arglocs, result_loc)
  327. def PerformMath(self, op, arglocs, result_loc):
  328. if not we_are_translated():
  329. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  330. self.assembler.regalloc_perform_math(op, arglocs, result_loc)
  331. def locs_for_fail(self, guard_op):
  332. return [self.loc(v) for v in guard_op.getfailargs()]
  333. def get_current_depth(self):
  334. # return (self.fm.frame_depth, self.param_depth), but trying to share
  335. # the resulting tuple among several calls
  336. arg0 = self.fm.get_frame_depth()
  337. arg1 = self.param_depth
  338. result = self.assembler._current_depths_cache
  339. if result[0] != arg0 or result[1] != arg1:
  340. result = (arg0, arg1)
  341. self.assembler._current_depths_cache = result
  342. return result
  343. def perform_with_guard(self, op, guard_op, arglocs, result_loc):
  344. faillocs = self.locs_for_fail(guard_op)
  345. self.rm.position += 1
  346. self.xrm.position += 1
  347. current_depths = self.get_current_depth()
  348. self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
  349. arglocs, result_loc,
  350. current_depths)
  351. if op.result is not None:
  352. self.possibly_free_var(op.result)
  353. self.possibly_free_vars(guard_op.getfailargs())
  354. def perform_guard(self, guard_op, arglocs, result_loc):
  355. faillocs = self.locs_for_fail(guard_op)
  356. if not we_are_translated():
  357. if result_loc is not None:
  358. self.assembler.dump('%s <- %s(%s)' % (result_loc, guard_op,
  359. arglocs))
  360. else:
  361. self.assembler.dump('%s(%s)' % (guard_op, arglocs))
  362. current_depths = self.get_current_depth()
  363. self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
  364. result_loc,
  365. current_depths)
  366. self.possibly_free_vars(guard_op.getfailargs())
  367. def PerformDiscard(self, op, arglocs):
  368. if not we_are_translated():
  369. self.assembler.dump('%s(%s)' % (op, arglocs))
  370. self.assembler.regalloc_perform_discard(op, arglocs)
  371. def can_merge_with_next_guard(self, op, i, operations):
  372. if (op.getopnum() == rop.CALL_MAY_FORCE or
  373. op.getopnum() == rop.CALL_ASSEMBLER or
  374. op.getopnum() == rop.CALL_RELEASE_GIL):
  375. assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
  376. return True
  377. if not op.is_comparison():
  378. if op.is_ovf():
  379. if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
  380. operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
  381. not_implemented("int_xxx_ovf not followed by "
  382. "guard_(no)_overflow")
  383. return True
  384. return False
  385. if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
  386. operations[i + 1].getopnum() != rop.GUARD_FALSE):
  387. return False
  388. if operations[i + 1].getarg(0) is not op.result:
  389. return False
  390. if (self.longevity[op.result][1] > i + 1 or
  391. op.result in operations[i + 1].getfailargs()):
  392. return False
  393. return True
  394. def walk_operations(self, operations):
  395. i = 0
  396. #self.operations = operations
  397. while i < len(operations):
  398. op = operations[i]
  399. self.assembler.mc.mark_op(op)
  400. self.rm.position = i
  401. self.xrm.position = i
  402. if op.has_no_side_effect() and op.result not in self.longevity:
  403. i += 1
  404. self.possibly_free_vars_for_op(op)
  405. continue
  406. if self.can_merge_with_next_guard(op, i, operations):
  407. oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
  408. i += 1
  409. elif not we_are_translated() and op.getopnum() == -124:
  410. self._consider_force_spill(op)
  411. else:
  412. oplist[op.getopnum()](self, op)
  413. if op.result is not None:
  414. self.possibly_free_var(op.result)
  415. self.rm._check_invariants()
  416. self.xrm._check_invariants()
  417. i += 1
  418. assert not self.rm.reg_bindings
  419. assert not self.xrm.reg_bindings
  420. self.flush_loop()
  421. self.assembler.mc.mark_op(None) # end of the loop
  422. def flush_loop(self):
  423. # rare case: if the loop is too short, pad with NOPs
  424. mc = self.assembler.mc
  425. while mc.get_relative_pos() < self.min_bytes_before_label:
  426. mc.NOP()
  427. def _compute_vars_longevity(self, inputargs, operations):
  428. # compute a dictionary that maps variables to index in
  429. # operations that is a "last-time-seen"
  430. # returns a pair longevity/useful. Non-useful variables are ones that
  431. # never appear in the assembler or it does not matter if they appear on
  432. # stack or in registers. Main example is loop arguments that go
  433. # only to guard operations or to jump or to finish
  434. produced = {}
  435. last_used = {}
  436. last_real_usage = {}
  437. for i in range(len(operations)-1, -1, -1):
  438. op = operations[i]
  439. if op.result:
  440. if op.result not in last_used and op.has_no_side_effect():
  441. continue
  442. assert op.result not in produced
  443. produced[op.result] = i
  444. opnum = op.getopnum()
  445. for j in range(op.numargs()):
  446. arg = op.getarg(j)
  447. if not isinstance(arg, Box):
  448. continue
  449. if arg not in last_used:
  450. last_used[arg] = i
  451. if opnum != rop.JUMP and opnum != rop.LABEL:
  452. if arg not in last_real_usage:
  453. last_real_usage[arg] = i
  454. if op.is_guard():
  455. for arg in op.getfailargs():
  456. if arg is None: # hole
  457. continue
  458. assert isinstance(arg, Box)
  459. if arg not in last_used:
  460. last_used[arg] = i
  461. self.last_real_usage = last_real_usage
  462. #
  463. longevity = {}
  464. for arg in produced:
  465. if arg in last_used:
  466. assert isinstance(arg, Box)
  467. assert produced[arg] < last_used[arg]
  468. longevity[arg] = (produced[arg], last_used[arg])
  469. del last_used[arg]
  470. for arg in inputargs:
  471. assert isinstance(arg, Box)
  472. if arg not in last_used:
  473. longevity[arg] = (-1, -1)
  474. else:
  475. longevity[arg] = (0, last_used[arg])
  476. del last_used[arg]
  477. assert len(last_used) == 0
  478. self.longevity = longevity
  479. def loc(self, v):
  480. if v is None: # xxx kludgy
  481. return None
  482. if v.type == FLOAT:
  483. return self.xrm.loc(v)
  484. return self.rm.loc(v)
  485. def _consider_guard(self, op):
  486. loc = self.rm.make_sure_var_in_reg(op.getarg(0))
  487. self.perform_guard(op, [loc], None)
  488. self.rm.possibly_free_var(op.getarg(0))
  489. consider_guard_true = _consider_guard
  490. consider_guard_false = _consider_guard
  491. consider_guard_nonnull = _consider_guard
  492. consider_guard_isnull = _consider_guard
  493. def consider_finish(self, op):
  494. locs = [self.loc(op.getarg(i)) for i in range(op.numargs())]
  495. locs_are_ref = [op.getarg(i).type == REF for i in range(op.numargs())]
  496. fail_index = self.assembler.cpu.get_fail_descr_number(op.getdescr())
  497. # note: no exception should currently be set in llop.get_exception_addr
  498. # even if this finish may be an exit_frame_with_exception (in this case
  499. # the exception instance is in locs[0]).
  500. self.assembler.generate_failure(fail_index, locs, False,
  501. locs_are_ref)
  502. self.possibly_free_vars_for_op(op)
  503. def consider_guard_no_exception(self, op):
  504. self.perform_guard(op, [], None)
  505. consider_guard_not_invalidated = consider_guard_no_exception
  506. def consider_guard_exception(self, op):
  507. loc = self.rm.make_sure_var_in_reg(op.getarg(0))
  508. box = TempBox()
  509. args = op.getarglist()
  510. loc1 = self.rm.force_allocate_reg(box, args)
  511. if op.result in self.longevity:
  512. # this means, is it ever used
  513. resloc = self.rm.force_allocate_reg(op.result, args + [box])
  514. else:
  515. resloc = None
  516. self.perform_guard(op, [loc, loc1], resloc)
  517. self.rm.possibly_free_vars_for_op(op)
  518. self.rm.possibly_free_var(box)
  519. consider_guard_no_overflow = consider_guard_no_exception
  520. consider_guard_overflow = consider_guard_no_exception
  521. def consider_guard_value(self, op):
  522. x = self.make_sure_var_in_reg(op.getarg(0))
  523. y = self.loc(op.getarg(1))
  524. self.perform_guard(op, [x, y], None)
  525. self.possibly_free_vars_for_op(op)
  526. def consider_guard_class(self, op):
  527. assert isinstance(op.getarg(0), Box)
  528. x = self.rm.make_sure_var_in_reg(op.getarg(0))
  529. y = self.loc(op.getarg(1))
  530. self.perform_guard(op, [x, y], None)
  531. self.rm.possibly_free_vars_for_op(op)
  532. consider_guard_nonnull_class = consider_guard_class
  533. def _consider_binop_part(self, op):
  534. x = op.getarg(0)
  535. argloc = self.loc(op.getarg(1))
  536. args = op.getarglist()
  537. loc = self.rm.force_result_in_reg(op.result, x, args)
  538. self.rm.possibly_free_var(op.getarg(1))
  539. return loc, argloc
  540. def _consider_binop(self, op):
  541. loc, argloc = self._consider_binop_part(op)
  542. self.Perform(op, [loc, argloc], loc)
  543. consider_int_add = _consider_binop
  544. consider_int_mul = _consider_binop
  545. consider_int_sub = _consider_binop
  546. consider_int_and = _consider_binop
  547. consider_int_or = _consider_binop
  548. consider_int_xor = _consider_binop
  549. def _consider_binop_with_guard(self, op, guard_op):
  550. loc, argloc = self._consider_binop_part(op)
  551. self.perform_with_guard(op, guard_op, [loc, argloc], loc)
  552. consider_int_mul_ovf = _consider_binop_with_guard
  553. consider_int_sub_ovf = _consider_binop_with_guard
  554. consider_int_add_ovf = _consider_binop_with_guard
  555. def consider_int_neg(self, op):
  556. res = self.rm.force_result_in_reg(op.result, op.getarg(0))
  557. self.Perform(op, [res], res)
  558. consider_int_invert = consider_int_neg
  559. def consider_int_lshift(self, op):
  560. if isinstance(op.getarg(1), Const):
  561. loc2 = self.rm.convert_to_imm(op.getarg(1))
  562. else:
  563. loc2 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
  564. args = op.getarglist()
  565. loc1 = self.rm.force_result_in_reg(op.result, op.getarg(0), args)
  566. self.Perform(op, [loc1, loc2], loc1)
  567. self.rm.possibly_free_vars_for_op(op)
  568. consider_int_rshift = consider_int_lshift
  569. consider_uint_rshift = consider_int_lshift
  570. def _consider_int_div_or_mod(self, op, resultreg, trashreg):
  571. l0 = self.rm.make_sure_var_in_reg(op.getarg(0), selected_reg=eax)
  572. l1 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
  573. l2 = self.rm.force_allocate_reg(op.result, selected_reg=resultreg)
  574. # the register (eax or edx) not holding what we are looking for
  575. # will be just trash after that operation
  576. tmpvar = TempBox()
  577. self.rm.force_allocate_reg(tmpvar, selected_reg=trashreg)
  578. assert l0 is eax
  579. assert l1 is ecx
  580. assert l2 is resultreg
  581. self.rm.possibly_free_vars_for_op(op)
  582. self.rm.possibly_free_var(tmpvar)
  583. def consider_int_mod(self, op):
  584. self._consider_int_div_or_mod(op, edx, eax)
  585. self.Perform(op, [eax, ecx], edx)
  586. def consider_int_floordiv(self, op):
  587. self._consider_int_div_or_mod(op, eax, edx)
  588. self.Perform(op, [eax, ecx], eax)
  589. consider_uint_floordiv = consider_int_floordiv
  590. def _consider_compop(self, op, guard_op):
  591. vx = op.getarg(0)
  592. vy = op.getarg(1)
  593. arglocs = [self.loc(vx), self.loc(vy)]
  594. if (vx in self.rm.reg_bindings or vy in self.rm.reg_bindings or
  595. isinstance(vx, Const) or isinstance(vy, Const)):
  596. pass
  597. else:
  598. arglocs[0] = self.rm.make_sure_var_in_reg(vx)
  599. args = op.getarglist()
  600. self.rm.possibly_free_vars(args)
  601. if guard_op is None:
  602. loc = self.rm.force_allocate_reg(op.result, args,
  603. need_lower_byte=True)
  604. self.Perform(op, arglocs, loc)
  605. else:
  606. self.perform_with_guard(op, guard_op, arglocs, None)
  607. consider_int_lt = _consider_compop
  608. consider_int_gt = _consider_compop
  609. consider_int_ge = _consider_compop
  610. consider_int_le = _consider_compop
  611. consider_int_ne = _consider_compop
  612. consider_int_eq = _consider_compop
  613. consider_uint_gt = _consider_compop
  614. consider_uint_lt = _consider_compop
  615. consider_uint_le = _consider_compop
  616. consider_uint_ge = _consider_compop
  617. consider_ptr_eq = consider_instance_ptr_eq = _consider_compop
  618. consider_ptr_ne = consider_instance_ptr_ne = _consider_compop
  619. def _consider_float_op(self, op):
  620. loc1 = self.xrm.loc(op.getarg(1))
  621. args = op.getarglist()
  622. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
  623. self.Perform(op, [loc0, loc1], loc0)
  624. self.xrm.possibly_free_vars_for_op(op)
  625. consider_float_add = _consider_float_op
  626. consider_float_sub = _consider_float_op
  627. consider_float_mul = _consider_float_op
  628. consider_float_truediv = _consider_float_op
  629. def _consider_float_cmp(self, op, guard_op):
  630. vx = op.getarg(0)
  631. vy = op.getarg(1)
  632. arglocs = [self.loc(vx), self.loc(vy)]
  633. if not (isinstance(arglocs[0], RegLoc) or
  634. isinstance(arglocs[1], RegLoc)):
  635. if isinstance(vx, Const):
  636. arglocs[1] = self.xrm.make_sure_var_in_reg(vy)
  637. else:
  638. arglocs[0] = self.xrm.make_sure_var_in_reg(vx)
  639. self.xrm.possibly_free_vars_for_op(op)
  640. if guard_op is None:
  641. res = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  642. self.Perform(op, arglocs, res)
  643. else:
  644. self.perform_with_guard(op, guard_op, arglocs, None)
  645. consider_float_lt = _consider_float_cmp
  646. consider_float_le = _consider_float_cmp
  647. consider_float_eq = _consider_float_cmp
  648. consider_float_ne = _consider_float_cmp
  649. consider_float_gt = _consider_float_cmp
  650. consider_float_ge = _consider_float_cmp
  651. def _consider_float_unary_op(self, op):
  652. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0))
  653. self.Perform(op, [loc0], loc0)
  654. self.xrm.possibly_free_var(op.getarg(0))
  655. consider_float_neg = _consider_float_unary_op
  656. consider_float_abs = _consider_float_unary_op
  657. def consider_cast_float_to_int(self, op):
  658. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  659. loc1 = self.rm.force_allocate_reg(op.result)
  660. self.Perform(op, [loc0], loc1)
  661. self.xrm.possibly_free_var(op.getarg(0))
  662. def consider_cast_int_to_float(self, op):
  663. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
  664. loc1 = self.xrm.force_allocate_reg(op.result)
  665. self.Perform(op, [loc0], loc1)
  666. self.rm.possibly_free_var(op.getarg(0))
  667. def consider_cast_float_to_singlefloat(self, op):
  668. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  669. loc1 = self.rm.force_allocate_reg(op.result)
  670. self.xrm.possibly_free_var(op.getarg(0))
  671. tmpxvar = TempBox()
  672. loctmp = self.xrm.force_allocate_reg(tmpxvar) # may be equal to loc0
  673. self.xrm.possibly_free_var(tmpxvar)
  674. self.Perform(op, [loc0, loctmp], loc1)
  675. consider_cast_singlefloat_to_float = consider_cast_int_to_float
  676. def _consider_llong_binop_xx(self, op):
  677. # must force both arguments into xmm registers, because we don't
  678. # know if they will be suitably aligned. Exception: if the second
  679. # argument is a constant, we can ask it to be aligned to 16 bytes.
  680. args = [op.getarg(1), op.getarg(2)]
  681. loc1 = self.load_xmm_aligned_16_bytes(args[1])
  682. loc0 = self.xrm.force_result_in_reg(op.result, args[0], args)
  683. self.PerformLLong(op, [loc0, loc1], loc0)
  684. self.xrm.possibly_free_vars(args)
  685. def _consider_llong_eq_ne_xx(self, op):
  686. # must force both arguments into xmm registers, because we don't
  687. # know if they will be suitably aligned. Exception: if they are
  688. # constants, we can ask them to be aligned to 16 bytes.
  689. args = [op.getarg(1), op.getarg(2)]
  690. loc1 = self.load_xmm_aligned_16_bytes(args[0])
  691. loc2 = self.load_xmm_aligned_16_bytes(args[1], args)
  692. tmpxvar = TempBox()
  693. loc3 = self.xrm.force_allocate_reg(tmpxvar, args)
  694. self.xrm.possibly_free_var(tmpxvar)
  695. loc0 = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  696. self.PerformLLong(op, [loc1, loc2, loc3], loc0)
  697. self.xrm.possibly_free_vars(args)
  698. def _maybe_consider_llong_lt(self, op):
  699. # XXX just a special case for now
  700. box = op.getarg(2)
  701. if not isinstance(box, ConstFloat):
  702. return False
  703. if box.getlonglong() != 0:
  704. return False
  705. # "x < 0"
  706. box = op.getarg(1)
  707. assert isinstance(box, BoxFloat)
  708. loc1 = self.xrm.make_sure_var_in_reg(box)
  709. loc0 = self.rm.force_allocate_reg(op.result)
  710. self.PerformLLong(op, [loc1], loc0)
  711. self.xrm.possibly_free_var(box)
  712. return True
  713. def _consider_llong_to_int(self, op):
  714. # accept an argument in a xmm register or in the stack
  715. loc1 = self.xrm.loc(op.getarg(1))
  716. loc0 = self.rm.force_allocate_reg(op.result)
  717. self.PerformLLong(op, [loc1], loc0)
  718. self.xrm.possibly_free_var(op.getarg(1))
  719. def _loc_of_const_longlong(self, value64):
  720. c = ConstFloat(value64)
  721. return self.xrm.convert_to_imm(c)
  722. def _consider_llong_from_int(self, op):
  723. assert IS_X86_32
  724. loc0 = self.xrm.force_allocate_reg(op.result)
  725. box = op.getarg(1)
  726. if isinstance(box, ConstInt):
  727. loc1 = self._loc_of_const_longlong(r_longlong(box.value))
  728. loc2 = None # unused
  729. else:
  730. loc1 = self.rm.make_sure_var_in_reg(box)
  731. tmpxvar = TempBox()
  732. loc2 = self.xrm.force_allocate_reg(tmpxvar, [op.result])
  733. self.xrm.possibly_free_var(tmpxvar)
  734. self.PerformLLong(op, [loc1, loc2], loc0)
  735. self.rm.possibly_free_var(box)
  736. def _consider_llong_from_uint(self, op):
  737. assert IS_X86_32
  738. loc0 = self.xrm.force_allocate_reg(op.result)
  739. loc1 = self.rm.make_sure_var_in_reg(op.getarg(1))
  740. self.PerformLLong(op, [loc1], loc0)
  741. self.rm.possibly_free_vars_for_op(op)
  742. def _consider_math_sqrt(self, op):
  743. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1))
  744. self.PerformMath(op, [loc0], loc0)
  745. self.xrm.possibly_free_var(op.getarg(1))
  746. def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
  747. # we need to save registers on the stack:
  748. #
  749. # - at least the non-callee-saved registers
  750. #
  751. # - for shadowstack, we assume that any call can collect, and we
  752. # save also the callee-saved registers that contain GC pointers,
  753. # so that they can be found by follow_stack_frame_of_assembler()
  754. #
  755. # - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
  756. # anyway, in case we need to do cpu.force(). The issue is that
  757. # grab_frame_values() would not be able to locate values in
  758. # callee-saved registers.
  759. #
  760. save_all_regs = guard_not_forced_op is not None
  761. self.xrm.before_call(force_store, save_all_regs=save_all_regs)
  762. if not save_all_regs:
  763. gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
  764. if gcrootmap and gcrootmap.is_shadow_stack:
  765. save_all_regs = 2
  766. self.rm.before_call(force_store, save_all_regs=save_all_regs)
  767. if op.result is not None:
  768. if op.result.type == FLOAT:
  769. resloc = self.xrm.after_call(op.result)
  770. else:
  771. resloc = self.rm.after_call(op.result)
  772. else:
  773. resloc = None
  774. if guard_not_forced_op is not None:
  775. self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc)
  776. else:
  777. self.Perform(op, arglocs, resloc)
  778. def _consider_call(self, op, guard_not_forced_op=None):
  779. calldescr = op.getdescr()
  780. assert isinstance(calldescr, CallDescr)
  781. assert len(calldescr.arg_classes) == op.numargs() - 1
  782. size = calldescr.get_result_size()
  783. sign = calldescr.is_result_signed()
  784. if sign:
  785. sign_loc = imm1
  786. else:
  787. sign_loc = imm0
  788. self._call(op, [imm(size), sign_loc] +
  789. [self.loc(op.getarg(i)) for i in range(op.numargs())],
  790. guard_not_forced_op=guard_not_forced_op)
  791. def consider_call(self, op):
  792. effectinfo = op.getdescr().get_extra_info()
  793. oopspecindex = effectinfo.oopspecindex
  794. if oopspecindex != EffectInfo.OS_NONE:
  795. if IS_X86_32:
  796. # support for some of the llong operations,
  797. # which only exist on x86-32
  798. if oopspecindex in (EffectInfo.OS_LLONG_ADD,
  799. EffectInfo.OS_LLONG_SUB,
  800. EffectInfo.OS_LLONG_AND,
  801. EffectInfo.OS_LLONG_OR,
  802. EffectInfo.OS_LLONG_XOR):
  803. return self._consider_llong_binop_xx(op)
  804. if oopspecindex == EffectInfo.OS_LLONG_TO_INT:
  805. return self._consider_llong_to_int(op)
  806. if oopspecindex == EffectInfo.OS_LLONG_FROM_INT:
  807. return self._consider_llong_from_int(op)
  808. if oopspecindex == EffectInfo.OS_LLONG_FROM_UINT:
  809. return self._consider_llong_from_uint(op)
  810. if (oopspecindex == EffectInfo.OS_LLONG_EQ or
  811. oopspecindex == EffectInfo.OS_LLONG_NE):
  812. return self._consider_llong_eq_ne_xx(op)
  813. if oopspecindex == EffectInfo.OS_LLONG_LT:
  814. if self._maybe_consider_llong_lt(op):
  815. return
  816. if oopspecindex == EffectInfo.OS_MATH_SQRT:
  817. return self._consider_math_sqrt(op)
  818. self._consider_call(op)
  819. def consider_call_may_force(self, op, guard_op):
  820. assert guard_op is not None
  821. self._consider_call(op, guard_op)
  822. consider_call_release_gil = consider_call_may_force
  823. def consider_call_malloc_gc(self, op):
  824. self._consider_call(op)
  825. def consider_call_assembler(self, op, guard_op):
  826. descr = op.getdescr()
  827. assert isinstance(descr, JitCellToken)
  828. jd = descr.outermost_jitdriver_sd
  829. assert jd is not None
  830. size = jd.portal_calldescr.get_result_size()
  831. vable_index = jd.index_of_virtualizable
  832. if vable_index >= 0:
  833. self.rm._sync_var(op.getarg(vable_index))
  834. vable = self.fm.loc(op.getarg(vable_index))
  835. else:
  836. vable = imm0
  837. self._call(op, [imm(size), vable] +
  838. [self.loc(op.getarg(i)) for i in range(op.numargs())],
  839. guard_not_forced_op=guard_op)
  840. def consider_cond_call_gc_wb(self, op):
  841. assert op.result is None
  842. args = op.getarglist()
  843. N = len(args)
  844. # we force all arguments in a reg (unless they are Consts),
  845. # because it will be needed anyway by the following setfield_gc
  846. # or setarrayitem_gc. It avoids loading it twice from the memory.
  847. arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
  848. for i in range(N)]
  849. # add eax, ecx and edx as extra "arguments" to ensure they are
  850. # saved and restored. Fish in self.rm to know which of these
  851. # registers really need to be saved (a bit of a hack). Moreover,
  852. # we don't save and restore any SSE register because the called
  853. # function, a GC write barrier, is known not to touch them.
  854. # See remember_young_pointer() in rpython/memory/gc/generation.py.
  855. for v, reg in self.rm.reg_bindings.items():
  856. if (reg in self.rm.save_around_call_regs
  857. and self.rm.stays_alive(v)):
  858. arglocs.append(reg)
  859. self.PerformDiscard(op, arglocs)
  860. self.rm.possibly_free_vars_for_op(op)
  861. consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
  862. def consider_call_malloc_nursery(self, op):
  863. size_box = op.getarg(0)
  864. assert isinstance(size_box, ConstInt)
  865. size = size_box.getint()
  866. self.rm.force_allocate_reg(op.result, selected_reg=eax)
  867. #
  868. # We need edx as a temporary, but otherwise don't save any more
  869. # register. See comments in _build_malloc_slowpath().
  870. tmp_box = TempBox()
  871. self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
  872. self.rm.possibly_free_var(tmp_box)
  873. #
  874. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  875. self.assembler.malloc_cond(
  876. gc_ll_descr.get_nursery_free_addr(),
  877. gc_ll_descr.get_nursery_top_addr(),
  878. size)
  879. def _unpack_arraydescr(self, arraydescr):
  880. assert isinstance(arraydescr, ArrayDescr)
  881. ofs = arraydescr.basesize
  882. size = arraydescr.itemsize
  883. sign = arraydescr.is_item_signed()
  884. return size, ofs, sign
  885. def _unpack_fielddescr(self, fielddescr):
  886. assert isinstance(fielddescr, FieldDescr)
  887. ofs = fielddescr.offset
  888. size = fielddescr.field_size
  889. sign = fielddescr.is_field_signed()
  890. return imm(ofs), imm(size), sign
  891. _unpack_fielddescr._always_inline_ = True
  892. def _unpack_interiorfielddescr(self, descr):
  893. assert isinstance(descr, InteriorFieldDescr)
  894. arraydescr = descr.arraydescr
  895. ofs = arraydescr.basesize
  896. itemsize = arraydescr.itemsize
  897. fieldsize = descr.fielddescr.field_size
  898. sign = descr.fielddescr.is_field_signed()
  899. ofs += descr.fielddescr.offset
  900. return imm(ofs), imm(itemsize), imm(fieldsize), sign
  901. def consider_setfield_gc(self, op):
  902. ofs_loc, size_loc, _ = self._unpack_fielddescr(op.getdescr())
  903. assert isinstance(size_loc, ImmedLoc)
  904. if size_loc.value == 1:
  905. need_lower_byte = True
  906. else:
  907. need_lower_byte = False
  908. args = op.getarglist()
  909. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  910. value_loc = self.make_sure_var_in_reg(op.getarg(1), args,
  911. need_lower_byte=need_lower_byte)
  912. self.possibly_free_vars(args)
  913. self.PerformDiscard(op, [base_loc, ofs_loc, size_loc, value_loc])
  914. consider_setfield_raw = consider_setfield_gc
  915. def consider_setinteriorfield_gc(self, op):
  916. t = self._unpack_interiorfielddescr(op.getdescr())
  917. ofs, itemsize, fieldsize, _ = t
  918. args = op.getarglist()
  919. if fieldsize.value == 1:
  920. need_lower_byte = True
  921. else:
  922. need_lower_byte = False
  923. box_base, box_index, box_value = args
  924. base_loc = self.rm.make_sure_var_in_reg(box_base, args)
  925. index_loc = self.rm.make_sure_var_in_reg(box_index, args)
  926. value_loc = self.make_sure_var_in_reg(box_value, args,
  927. need_lower_byte=need_lower_byte)
  928. # If 'index_loc' is not an immediate, then we need a 'temp_loc' that
  929. # is a register whose value will be destroyed. It's fine to destroy
  930. # the same register as 'index_loc', but not the other ones.
  931. self.rm.possibly_free_var(box_index)
  932. if not isinstance(index_loc, ImmedLoc):
  933. tempvar = TempBox()
  934. temp_loc = self.rm.force_allocate_reg(tempvar, [box_base,
  935. box_value])
  936. self.rm.possibly_free_var(tempvar)
  937. else:
  938. temp_loc = None
  939. self.rm.possibly_free_var(box_base)
  940. self.possibly_free_var(box_value)
  941. self.PerformDiscard(op, [base_loc, ofs, itemsize, fieldsize,
  942. index_loc, temp_loc, value_loc])
  943. consider_setinteriorfield_raw = consider_setinteriorfield_gc
  944. def consider_strsetitem(self, op):
  945. args = op.getarglist()
  946. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  947. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  948. value_loc = self.rm.make_sure_var_in_reg(op.getarg(2), args,
  949. need_lower_byte=True)
  950. self.rm.possibly_free_vars_for_op(op)
  951. self.PerformDiscard(op, [base_loc, ofs_loc, value_loc])
  952. consider_unicodesetitem = consider_strsetitem
  953. def consider_setarrayitem_gc(self, op):
  954. itemsize, ofs, _ = self._unpack_arraydescr(op.getdescr())
  955. args = op.getarglist()
  956. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  957. if itemsize == 1:
  958. need_lower_byte = True
  959. else:
  960. need_lower_byte = False
  961. value_loc = self.make_sure_var_in_reg(op.getarg(2), args,
  962. need_lower_byte=need_lower_byte)
  963. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  964. self.possibly_free_vars(args)
  965. self.PerformDiscard(op, [base_loc, ofs_loc, value_loc,
  966. imm(itemsize), imm(ofs)])
  967. consider_setarrayitem_raw = consider_setarrayitem_gc
  968. def consider_getfield_gc(self, op):
  969. ofs_loc, size_loc, sign = self._unpack_fielddescr(op.getdescr())
  970. args = op.getarglist()
  971. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  972. self.rm.possibly_free_vars(args)
  973. result_loc = self.force_allocate_reg(op.result)
  974. if sign:
  975. sign_loc = imm1
  976. else:
  977. sign_loc = imm0
  978. self.Perform(op, [base_loc, ofs_loc, size_loc, sign_loc], result_loc)
  979. consider_getfield_raw = consider_getfield_gc
  980. consider_getfield_raw_pure = consider_getfield_gc
  981. consider_getfield_gc_pure = consider_getfield_gc
  982. def consider_getarrayitem_gc(self, op):
  983. itemsize, ofs, sign = self._unpack_arraydescr(op.getdescr())
  984. args = op.getarglist()
  985. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  986. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  987. self.rm.possibly_free_vars_for_op(op)
  988. result_loc = self.force_allocate_reg(op.result)
  989. if sign:
  990. sign_loc = imm1
  991. else:
  992. sign_loc = imm0
  993. self.Perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
  994. sign_loc], result_loc)
  995. consider_getarrayitem_raw = consider_getarrayitem_gc
  996. consider_getarrayitem_gc_pure = consider_getarrayitem_gc
  997. def consider_getinteriorfield_gc(self, op):
  998. t = self._unpack_interiorfielddescr(op.getdescr())
  999. ofs, itemsize, fieldsize, sign = t
  1000. if sign:
  1001. sign_loc = imm1
  1002. else:
  1003. sign_loc = imm0
  1004. args = op.getarglist()
  1005. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  1006. index_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  1007. # 'base' and 'index' are put in two registers (or one if 'index'
  1008. # is an immediate). 'result' can be in the same register as
  1009. # 'index' but must be in a different register than 'base'.
  1010. self.rm.possibly_free_var(op.getarg(1))
  1011. result_loc = self.force_allocate_reg(op.result, [op.getarg(0)])
  1012. assert isinstance(result_loc, RegLoc)
  1013. # two cases: 1) if result_loc is a normal register, use it as temp_loc
  1014. if not result_loc.is_xmm:
  1015. temp_loc = result_loc
  1016. else:
  1017. # 2) if result_loc is an xmm register, we (likely) need another
  1018. # temp_loc that is a normal register. It can be in the same
  1019. # register as 'index' but not 'base'.
  1020. tempvar = TempBox()
  1021. temp_loc = self.rm.force_allocate_reg(tempvar, [op.getarg(0)])
  1022. self.rm.possibly_free_var(tempvar)
  1023. self.rm.possibly_free_var(op.getarg(0))
  1024. self.Perform(op, [base_loc, ofs, itemsize, fieldsize,
  1025. index_loc, temp_loc, sign_loc], result_loc)
  1026. consider_getinteriorfield_raw = consider_getinteriorfield_gc
  1027. def consider_int_is_true(self, op, guard_op):
  1028. # doesn't need arg to be in a register
  1029. argloc = self.loc(op.getarg(0))
  1030. self.rm.possibly_free_var(op.getarg(0))
  1031. if guard_op is not None:
  1032. self.perform_with_guard(op, guard_op, [argloc], None)
  1033. else:
  1034. resloc = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  1035. self.Perform(op, [argloc], resloc)
  1036. consider_int_is_zero = consider_int_is_true
  1037. def consider_same_as(self, op):
  1038. argloc = self.loc(op.getarg(0))
  1039. self.possibly_free_var(op.getarg(0))
  1040. resloc = self.force_allocate_reg(op.result)
  1041. self.Perform(op, [argloc], resloc)
  1042. consider_cast_ptr_to_int = consider_same_as
  1043. consider_cast_int_to_ptr = consider_same_as
  1044. def consider_strlen(self, op):
  1045. args = op.getarglist()
  1046. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  1047. self.rm.possibly_free_vars_for_op(op)
  1048. result_loc = self.rm.force_allocate_reg(op.result)
  1049. self.Perform(op, [base_loc], result_loc)
  1050. consider_unicodelen = consider_strlen
  1051. def consider_arraylen_gc(self, op):
  1052. arraydescr = op.getdescr()
  1053. assert isinstance(arraydescr, ArrayDescr)
  1054. ofs = arraydescr.lendescr.offset
  1055. args = op.getarglist()
  1056. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  1057. self.rm.possibly_free_vars_for_op(op)
  1058. result_loc = self.rm.force_allocate_reg(op.result)
  1059. self.Perform(op, [base_loc, imm(ofs)], result_loc)
  1060. def consider_strgetitem(self, op):
  1061. args = op.getarglist()
  1062. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  1063. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  1064. self.rm.possibly_free_vars_for_op(op)
  1065. result_loc = self.rm.force_allocate_reg(op.result)
  1066. self.Perform(op, [base_loc, ofs_loc], result_loc)
  1067. consider_unicodegetitem = consider_strgetitem
  1068. def consider_copystrcontent(self, op):
  1069. self._consider_copystrcontent(op, is_unicode=False)
  1070. def consider_copyunicodecontent(self, op):
  1071. self._consider_copystrcontent(op, is_unicode=True)
  1072. def _consider_copystrcontent(self, op, is_unicode):
  1073. # compute the source address
  1074. args = op.getarglist()
  1075. base_loc = self.rm.make_sure_var_in_reg(args[0], args)
  1076. ofs_loc = self.rm.make_sure_var_in_reg(args[2], args)
  1077. assert args[0] is not args[1] # forbidden case of aliasing
  1078. self.rm.possibly_free_var(args[0])
  1079. if args[3] is not args[2] is not args[4]: # MESS MESS MESS: don't free
  1080. self.rm.possibly_free_var(args[2]) # it if ==args[3] or args[4]
  1081. srcaddr_box = TempBox()
  1082. forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
  1083. srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
  1084. self._gen_address_i

Large files files are truncated, but you can click here to view the full file