PageRenderTime 58ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/x86/regalloc.py

https://bitbucket.org/kcr/pypy
Python | 1369 lines | 1065 code | 158 blank | 146 comment | 182 complexity | f100deee2862f55d78f055b6e2180b8e MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """ Register allocation scheme.
  2. """
  3. import os
  4. from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
  5. ConstFloat, BoxInt,
  6. BoxFloat, INT, REF, FLOAT,
  7. TargetToken)
  8. from rpython.jit.backend.x86.regloc import *
  9. from rpython.rtyper.lltypesystem import lltype, rffi, rstr
  10. from rpython.rtyper.annlowlevel import cast_instance_to_gcref
  11. from rpython.rlib.objectmodel import we_are_translated
  12. from rpython.rlib import rgc
  13. from rpython.jit.backend.llsupport import symbolic
  14. from rpython.jit.backend.x86.jump import remap_frame_layout_mixed
  15. from rpython.jit.codewriter import longlong
  16. from rpython.jit.codewriter.effectinfo import EffectInfo
  17. from rpython.jit.metainterp.resoperation import rop
  18. from rpython.jit.backend.llsupport.descr import ArrayDescr
  19. from rpython.jit.backend.llsupport.descr import CallDescr
  20. from rpython.jit.backend.llsupport.descr import unpack_arraydescr
  21. from rpython.jit.backend.llsupport.descr import unpack_fielddescr
  22. from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
  23. from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
  24. from rpython.jit.backend.llsupport.regalloc import FrameManager, BaseRegalloc,\
  25. RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op
  26. from rpython.jit.backend.x86.arch import WORD, JITFRAME_FIXED_SIZE
  27. from rpython.jit.backend.x86.arch import IS_X86_32, IS_X86_64
  28. from rpython.jit.backend.x86 import rx86
  29. from rpython.rlib.rarithmetic import r_longlong, r_uint
  30. class X86RegisterManager(RegisterManager):
  31. box_types = [INT, REF]
  32. all_regs = [ecx, eax, edx, ebx, esi, edi]
  33. no_lower_byte_regs = [esi, edi]
  34. save_around_call_regs = [eax, edx, ecx]
  35. frame_reg = ebp
  36. def call_result_location(self, v):
  37. return eax
  38. def convert_to_imm(self, c):
  39. if isinstance(c, ConstInt):
  40. return imm(c.value)
  41. elif isinstance(c, ConstPtr):
  42. if we_are_translated() and c.value and rgc.can_move(c.value):
  43. not_implemented("convert_to_imm: ConstPtr needs special care")
  44. return imm(rffi.cast(lltype.Signed, c.value))
  45. else:
  46. not_implemented("convert_to_imm: got a %s" % c)
  47. class X86_64_RegisterManager(X86RegisterManager):
  48. # r11 omitted because it's used as scratch
  49. all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
  50. no_lower_byte_regs = []
  51. save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
  52. class X86XMMRegisterManager(RegisterManager):
  53. box_types = [FLOAT]
  54. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
  55. # we never need lower byte I hope
  56. save_around_call_regs = all_regs
  57. def convert_to_imm(self, c):
  58. adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
  59. x = c.getfloatstorage()
  60. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  61. return ConstFloatLoc(adr)
  62. def convert_to_imm_16bytes_align(self, c):
  63. adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
  64. x = c.getfloatstorage()
  65. y = longlong.ZEROF
  66. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
  67. rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y
  68. return ConstFloatLoc(adr)
  69. def after_call(self, v):
  70. # the result is stored in st0, but we don't have this around,
  71. # so genop_call will move it to some frame location immediately
  72. # after the call
  73. return self.frame_manager.loc(v)
  74. class X86_64_XMMRegisterManager(X86XMMRegisterManager):
  75. # xmm15 reserved for scratch use
  76. all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
  77. save_around_call_regs = all_regs
  78. def call_result_location(self, v):
  79. return xmm0
  80. def after_call(self, v):
  81. # We use RegisterManager's implementation, since X86XMMRegisterManager
  82. # places the result on the stack, which we don't need to do when the
  83. # calling convention places the result in xmm0
  84. return RegisterManager.after_call(self, v)
  85. class X86FrameManager(FrameManager):
  86. def __init__(self, base_ofs):
  87. FrameManager.__init__(self)
  88. self.base_ofs = base_ofs
  89. def frame_pos(self, i, box_type):
  90. return FrameLoc(i, get_ebp_ofs(self.base_ofs, i), box_type)
  91. @staticmethod
  92. def frame_size(box_type):
  93. if IS_X86_32 and box_type == FLOAT:
  94. return 2
  95. else:
  96. return 1
  97. @staticmethod
  98. def get_loc_index(loc):
  99. assert isinstance(loc, FrameLoc)
  100. return loc.position
  101. if WORD == 4:
  102. gpr_reg_mgr_cls = X86RegisterManager
  103. xmm_reg_mgr_cls = X86XMMRegisterManager
  104. elif WORD == 8:
  105. gpr_reg_mgr_cls = X86_64_RegisterManager
  106. xmm_reg_mgr_cls = X86_64_XMMRegisterManager
  107. else:
  108. raise AssertionError("Word size should be 4 or 8")
  109. gpr_reg_mgr_cls.all_reg_indexes = [-1] * WORD * 2 # eh, happens to be true
  110. for _i, _reg in enumerate(gpr_reg_mgr_cls.all_regs):
  111. gpr_reg_mgr_cls.all_reg_indexes[_reg.value] = _i
  112. class RegAlloc(BaseRegalloc):
  113. def __init__(self, assembler, translate_support_code=False):
  114. assert isinstance(translate_support_code, bool)
  115. # variables that have place in register
  116. self.assembler = assembler
  117. self.translate_support_code = translate_support_code
  118. # to be read/used by the assembler too
  119. self.jump_target_descr = None
  120. self.final_jump_op = None
  121. def _prepare(self, inputargs, operations, allgcrefs):
  122. cpu = self.assembler.cpu
  123. self.fm = X86FrameManager(cpu.get_baseofs_of_frame_field())
  124. operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
  125. allgcrefs)
  126. # compute longevity of variables
  127. longevity, last_real_usage = compute_vars_longevity(
  128. inputargs, operations)
  129. self.longevity = longevity
  130. self.last_real_usage = last_real_usage
  131. self.rm = gpr_reg_mgr_cls(self.longevity,
  132. frame_manager = self.fm,
  133. assembler = self.assembler)
  134. self.xrm = xmm_reg_mgr_cls(self.longevity, frame_manager = self.fm,
  135. assembler = self.assembler)
  136. return operations
  137. def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
  138. operations = self._prepare(inputargs, operations, allgcrefs)
  139. self._set_initial_bindings(inputargs, looptoken)
  140. # note: we need to make a copy of inputargs because possibly_free_vars
  141. # is also used on op args, which is a non-resizable list
  142. self.possibly_free_vars(list(inputargs))
  143. if WORD == 4: # see redirect_call_assembler()
  144. self.min_bytes_before_label = 5
  145. else:
  146. self.min_bytes_before_label = 13
  147. return operations
  148. def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
  149. frame_info):
  150. operations = self._prepare(inputargs, operations, allgcrefs)
  151. self._update_bindings(arglocs, inputargs)
  152. self.min_bytes_before_label = 0
  153. return operations
  154. def ensure_next_label_is_at_least_at_position(self, at_least_position):
  155. self.min_bytes_before_label = max(self.min_bytes_before_label,
  156. at_least_position)
  157. def get_final_frame_depth(self):
  158. return self.fm.get_frame_depth()
  159. def possibly_free_var(self, var):
  160. if var.type == FLOAT:
  161. self.xrm.possibly_free_var(var)
  162. else:
  163. self.rm.possibly_free_var(var)
  164. def possibly_free_vars_for_op(self, op):
  165. for i in range(op.numargs()):
  166. var = op.getarg(i)
  167. if var is not None: # xxx kludgy
  168. self.possibly_free_var(var)
  169. if op.result:
  170. self.possibly_free_var(op.result)
  171. def possibly_free_vars(self, vars):
  172. for var in vars:
  173. if var is not None: # xxx kludgy
  174. self.possibly_free_var(var)
  175. def make_sure_var_in_reg(self, var, forbidden_vars=[],
  176. selected_reg=None, need_lower_byte=False):
  177. if var.type == FLOAT:
  178. if isinstance(var, ConstFloat):
  179. return FloatImmedLoc(var.getfloatstorage())
  180. return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
  181. selected_reg, need_lower_byte)
  182. else:
  183. return self.rm.make_sure_var_in_reg(var, forbidden_vars,
  184. selected_reg, need_lower_byte)
  185. def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
  186. need_lower_byte=False):
  187. if var.type == FLOAT:
  188. return self.xrm.force_allocate_reg(var, forbidden_vars,
  189. selected_reg, need_lower_byte)
  190. else:
  191. return self.rm.force_allocate_reg(var, forbidden_vars,
  192. selected_reg, need_lower_byte)
  193. def force_spill_var(self, var):
  194. if var.type == FLOAT:
  195. return self.xrm.force_spill_var(var)
  196. else:
  197. return self.rm.force_spill_var(var)
  198. def load_xmm_aligned_16_bytes(self, var, forbidden_vars=[]):
  199. # Load 'var' in a register; but if it is a constant, we can return
  200. # a 16-bytes-aligned ConstFloatLoc.
  201. if isinstance(var, Const):
  202. return self.xrm.convert_to_imm_16bytes_align(var)
  203. else:
  204. return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
  205. def _frame_bindings(self, locs, inputargs):
  206. bindings = {}
  207. i = 0
  208. for loc in locs:
  209. if loc is None:
  210. continue
  211. arg = inputargs[i]
  212. i += 1
  213. if not isinstance(loc, RegLoc):
  214. bindings[arg] = loc
  215. return bindings
  216. def _update_bindings(self, locs, inputargs):
  217. # XXX this should probably go to llsupport/regalloc.py
  218. used = {}
  219. i = 0
  220. for loc in locs:
  221. if loc is None: # xxx bit kludgy
  222. loc = ebp
  223. arg = inputargs[i]
  224. i += 1
  225. if isinstance(loc, RegLoc):
  226. if arg.type == FLOAT:
  227. self.xrm.reg_bindings[arg] = loc
  228. used[loc] = None
  229. else:
  230. if loc is ebp:
  231. self.rm.bindings_to_frame_reg[arg] = None
  232. else:
  233. self.rm.reg_bindings[arg] = loc
  234. used[loc] = None
  235. else:
  236. self.fm.bind(arg, loc)
  237. self.rm.free_regs = []
  238. for reg in self.rm.all_regs:
  239. if reg not in used:
  240. self.rm.free_regs.append(reg)
  241. self.xrm.free_regs = []
  242. for reg in self.xrm.all_regs:
  243. if reg not in used:
  244. self.xrm.free_regs.append(reg)
  245. self.possibly_free_vars(list(inputargs))
  246. self.fm.finish_binding()
  247. self.rm._check_invariants()
  248. self.xrm._check_invariants()
  249. def perform(self, op, arglocs, result_loc):
  250. if not we_are_translated():
  251. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  252. self.assembler.regalloc_perform(op, arglocs, result_loc)
  253. def perform_llong(self, op, arglocs, result_loc):
  254. if not we_are_translated():
  255. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  256. self.assembler.regalloc_perform_llong(op, arglocs, result_loc)
  257. def perform_math(self, op, arglocs, result_loc):
  258. if not we_are_translated():
  259. self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
  260. self.assembler.regalloc_perform_math(op, arglocs, result_loc)
  261. def locs_for_fail(self, guard_op):
  262. return [self.loc(v) for v in guard_op.getfailargs()]
  263. def perform_with_guard(self, op, guard_op, arglocs, result_loc):
  264. faillocs = self.locs_for_fail(guard_op)
  265. self.rm.position += 1
  266. self.xrm.position += 1
  267. self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
  268. arglocs, result_loc,
  269. self.fm.get_frame_depth())
  270. self.possibly_free_vars(guard_op.getfailargs())
  271. def perform_guard(self, guard_op, arglocs, result_loc):
  272. faillocs = self.locs_for_fail(guard_op)
  273. if not we_are_translated():
  274. if result_loc is not None:
  275. self.assembler.dump('%s <- %s(%s)' % (result_loc, guard_op,
  276. arglocs))
  277. else:
  278. self.assembler.dump('%s(%s)' % (guard_op, arglocs))
  279. self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
  280. result_loc,
  281. self.fm.get_frame_depth())
  282. self.possibly_free_vars(guard_op.getfailargs())
  283. def perform_discard(self, op, arglocs):
  284. if not we_are_translated():
  285. self.assembler.dump('%s(%s)' % (op, arglocs))
  286. self.assembler.regalloc_perform_discard(op, arglocs)
  287. def walk_operations(self, inputargs, operations):
  288. i = 0
  289. #self.operations = operations
  290. while i < len(operations):
  291. op = operations[i]
  292. self.assembler.mc.mark_op(op)
  293. self.rm.position = i
  294. self.xrm.position = i
  295. if op.has_no_side_effect() and op.result not in self.longevity:
  296. i += 1
  297. self.possibly_free_vars_for_op(op)
  298. continue
  299. if self.can_merge_with_next_guard(op, i, operations):
  300. oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
  301. i += 1
  302. elif not we_are_translated() and op.getopnum() == -124:
  303. self._consider_force_spill(op)
  304. else:
  305. oplist[op.getopnum()](self, op)
  306. self.possibly_free_vars_for_op(op)
  307. self.rm._check_invariants()
  308. self.xrm._check_invariants()
  309. i += 1
  310. assert not self.rm.reg_bindings
  311. assert not self.xrm.reg_bindings
  312. self.flush_loop()
  313. self.assembler.mc.mark_op(None) # end of the loop
  314. for arg in inputargs:
  315. self.possibly_free_var(arg)
  316. def flush_loop(self):
  317. # rare case: if the loop is too short, or if we are just after
  318. # a GUARD_NOT_INVALIDATED, pad with NOPs. Important! This must
  319. # be called to ensure that there are enough bytes produced,
  320. # because GUARD_NOT_INVALIDATED or redirect_call_assembler()
  321. # will maybe overwrite them.
  322. mc = self.assembler.mc
  323. while mc.get_relative_pos() < self.min_bytes_before_label:
  324. mc.NOP()
  325. def loc(self, v):
  326. if v is None: # xxx kludgy
  327. return None
  328. if v.type == FLOAT:
  329. return self.xrm.loc(v)
  330. return self.rm.loc(v)
  331. def _consider_guard(self, op):
  332. loc = self.rm.make_sure_var_in_reg(op.getarg(0))
  333. self.perform_guard(op, [loc], None)
  334. consider_guard_true = _consider_guard
  335. consider_guard_false = _consider_guard
  336. consider_guard_nonnull = _consider_guard
  337. consider_guard_isnull = _consider_guard
  338. def consider_finish(self, op):
  339. # the frame is in ebp, but we have to point where in the frame is
  340. # the potential argument to FINISH
  341. descr = op.getdescr()
  342. fail_descr = cast_instance_to_gcref(descr)
  343. # we know it does not move, but well
  344. rgc._make_sure_does_not_move(fail_descr)
  345. fail_descr = rffi.cast(lltype.Signed, fail_descr)
  346. if op.numargs() == 1:
  347. loc = self.make_sure_var_in_reg(op.getarg(0))
  348. locs = [loc, imm(fail_descr)]
  349. else:
  350. locs = [imm(fail_descr)]
  351. self.perform(op, locs, None)
  352. def consider_guard_no_exception(self, op):
  353. self.perform_guard(op, [], None)
  354. def consider_guard_not_invalidated(self, op):
  355. mc = self.assembler.mc
  356. n = mc.get_relative_pos()
  357. self.perform_guard(op, [], None)
  358. assert n == mc.get_relative_pos()
  359. # ensure that the next label is at least 5 bytes farther than
  360. # the current position. Otherwise, when invalidating the guard,
  361. # we would overwrite randomly the next label's position.
  362. self.ensure_next_label_is_at_least_at_position(n + 5)
  363. def consider_guard_exception(self, op):
  364. loc = self.rm.make_sure_var_in_reg(op.getarg(0))
  365. box = TempBox()
  366. args = op.getarglist()
  367. loc1 = self.rm.force_allocate_reg(box, args)
  368. if op.result in self.longevity:
  369. # this means, is it ever used
  370. resloc = self.rm.force_allocate_reg(op.result, args + [box])
  371. else:
  372. resloc = None
  373. self.perform_guard(op, [loc, loc1], resloc)
  374. self.rm.possibly_free_var(box)
  375. consider_guard_no_overflow = consider_guard_no_exception
  376. consider_guard_overflow = consider_guard_no_exception
  377. def consider_guard_value(self, op):
  378. x = self.make_sure_var_in_reg(op.getarg(0))
  379. y = self.loc(op.getarg(1))
  380. self.perform_guard(op, [x, y], None)
  381. def consider_guard_class(self, op):
  382. assert isinstance(op.getarg(0), Box)
  383. x = self.rm.make_sure_var_in_reg(op.getarg(0))
  384. y = self.loc(op.getarg(1))
  385. self.perform_guard(op, [x, y], None)
  386. consider_guard_nonnull_class = consider_guard_class
  387. def _consider_binop_part(self, op):
  388. x = op.getarg(0)
  389. argloc = self.loc(op.getarg(1))
  390. args = op.getarglist()
  391. loc = self.rm.force_result_in_reg(op.result, x, args)
  392. return loc, argloc
  393. def _consider_binop(self, op):
  394. loc, argloc = self._consider_binop_part(op)
  395. self.perform(op, [loc, argloc], loc)
  396. def _consider_lea(self, op, loc):
  397. argloc = self.loc(op.getarg(1))
  398. resloc = self.force_allocate_reg(op.result)
  399. self.perform(op, [loc, argloc], resloc)
  400. def consider_int_add(self, op):
  401. loc = self.loc(op.getarg(0))
  402. y = op.getarg(1)
  403. if (isinstance(loc, RegLoc) and
  404. isinstance(y, ConstInt) and rx86.fits_in_32bits(y.value)):
  405. self._consider_lea(op, loc)
  406. else:
  407. self._consider_binop(op)
  408. def consider_int_sub(self, op):
  409. loc = self.loc(op.getarg(0))
  410. y = op.getarg(1)
  411. if (isinstance(loc, RegLoc) and
  412. isinstance(y, ConstInt) and rx86.fits_in_32bits(-y.value)):
  413. self._consider_lea(op, loc)
  414. else:
  415. self._consider_binop(op)
  416. consider_int_mul = _consider_binop
  417. consider_int_and = _consider_binop
  418. consider_int_or = _consider_binop
  419. consider_int_xor = _consider_binop
  420. def _consider_binop_with_guard(self, op, guard_op):
  421. loc, argloc = self._consider_binop_part(op)
  422. self.perform_with_guard(op, guard_op, [loc, argloc], loc)
  423. consider_int_mul_ovf = _consider_binop_with_guard
  424. consider_int_sub_ovf = _consider_binop_with_guard
  425. consider_int_add_ovf = _consider_binop_with_guard
  426. def consider_int_neg(self, op):
  427. res = self.rm.force_result_in_reg(op.result, op.getarg(0))
  428. self.perform(op, [res], res)
  429. consider_int_invert = consider_int_neg
  430. def consider_int_lshift(self, op):
  431. if isinstance(op.getarg(1), Const):
  432. loc2 = self.rm.convert_to_imm(op.getarg(1))
  433. else:
  434. loc2 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
  435. args = op.getarglist()
  436. loc1 = self.rm.force_result_in_reg(op.result, op.getarg(0), args)
  437. self.perform(op, [loc1, loc2], loc1)
  438. consider_int_rshift = consider_int_lshift
  439. consider_uint_rshift = consider_int_lshift
  440. def _consider_int_div_or_mod(self, op, resultreg, trashreg):
  441. l0 = self.rm.make_sure_var_in_reg(op.getarg(0), selected_reg=eax)
  442. l1 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
  443. l2 = self.rm.force_allocate_reg(op.result, selected_reg=resultreg)
  444. # the register (eax or edx) not holding what we are looking for
  445. # will be just trash after that operation
  446. tmpvar = TempBox()
  447. self.rm.force_allocate_reg(tmpvar, selected_reg=trashreg)
  448. assert l0 is eax
  449. assert l1 is ecx
  450. assert l2 is resultreg
  451. self.rm.possibly_free_var(tmpvar)
  452. def consider_int_mod(self, op):
  453. self._consider_int_div_or_mod(op, edx, eax)
  454. self.perform(op, [eax, ecx], edx)
  455. def consider_int_floordiv(self, op):
  456. self._consider_int_div_or_mod(op, eax, edx)
  457. self.perform(op, [eax, ecx], eax)
  458. consider_uint_floordiv = consider_int_floordiv
  459. def _consider_compop(self, op, guard_op):
  460. vx = op.getarg(0)
  461. vy = op.getarg(1)
  462. arglocs = [self.loc(vx), self.loc(vy)]
  463. args = op.getarglist()
  464. if (vx in self.rm.reg_bindings or vy in self.rm.reg_bindings or
  465. isinstance(vx, Const) or isinstance(vy, Const)):
  466. pass
  467. else:
  468. arglocs[0] = self.rm.make_sure_var_in_reg(vx)
  469. if guard_op is None:
  470. loc = self.rm.force_allocate_reg(op.result, args,
  471. need_lower_byte=True)
  472. self.perform(op, arglocs, loc)
  473. else:
  474. self.perform_with_guard(op, guard_op, arglocs, None)
  475. consider_int_lt = _consider_compop
  476. consider_int_gt = _consider_compop
  477. consider_int_ge = _consider_compop
  478. consider_int_le = _consider_compop
  479. consider_int_ne = _consider_compop
  480. consider_int_eq = _consider_compop
  481. consider_uint_gt = _consider_compop
  482. consider_uint_lt = _consider_compop
  483. consider_uint_le = _consider_compop
  484. consider_uint_ge = _consider_compop
  485. consider_ptr_eq = consider_instance_ptr_eq = _consider_compop
  486. consider_ptr_ne = consider_instance_ptr_ne = _consider_compop
  487. def _consider_float_op(self, op):
  488. loc1 = self.xrm.loc(op.getarg(1))
  489. args = op.getarglist()
  490. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
  491. self.perform(op, [loc0, loc1], loc0)
  492. consider_float_add = _consider_float_op
  493. consider_float_sub = _consider_float_op
  494. consider_float_mul = _consider_float_op
  495. consider_float_truediv = _consider_float_op
  496. def _consider_float_cmp(self, op, guard_op):
  497. vx = op.getarg(0)
  498. vy = op.getarg(1)
  499. arglocs = [self.loc(vx), self.loc(vy)]
  500. if not (isinstance(arglocs[0], RegLoc) or
  501. isinstance(arglocs[1], RegLoc)):
  502. if isinstance(vx, Const):
  503. arglocs[1] = self.xrm.make_sure_var_in_reg(vy)
  504. else:
  505. arglocs[0] = self.xrm.make_sure_var_in_reg(vx)
  506. if guard_op is None:
  507. res = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  508. self.perform(op, arglocs, res)
  509. else:
  510. self.perform_with_guard(op, guard_op, arglocs, None)
  511. consider_float_lt = _consider_float_cmp
  512. consider_float_le = _consider_float_cmp
  513. consider_float_eq = _consider_float_cmp
  514. consider_float_ne = _consider_float_cmp
  515. consider_float_gt = _consider_float_cmp
  516. consider_float_ge = _consider_float_cmp
  517. def _consider_float_unary_op(self, op):
  518. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0))
  519. self.perform(op, [loc0], loc0)
  520. consider_float_neg = _consider_float_unary_op
  521. consider_float_abs = _consider_float_unary_op
  522. def consider_cast_float_to_int(self, op):
  523. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  524. loc1 = self.rm.force_allocate_reg(op.result)
  525. self.perform(op, [loc0], loc1)
  526. def consider_cast_int_to_float(self, op):
  527. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
  528. loc1 = self.xrm.force_allocate_reg(op.result)
  529. self.perform(op, [loc0], loc1)
  530. def consider_cast_float_to_singlefloat(self, op):
  531. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  532. loc1 = self.rm.force_allocate_reg(op.result)
  533. tmpxvar = TempBox()
  534. loctmp = self.xrm.force_allocate_reg(tmpxvar) # may be equal to loc0
  535. self.xrm.possibly_free_var(tmpxvar)
  536. self.perform(op, [loc0, loctmp], loc1)
  537. consider_cast_singlefloat_to_float = consider_cast_int_to_float
  538. def consider_convert_float_bytes_to_longlong(self, op):
  539. if longlong.is_64_bit:
  540. loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
  541. loc1 = self.rm.force_allocate_reg(op.result)
  542. self.perform(op, [loc0], loc1)
  543. else:
  544. arg0 = op.getarg(0)
  545. loc0 = self.xrm.loc(arg0)
  546. loc1 = self.xrm.force_allocate_reg(op.result, forbidden_vars=[arg0])
  547. self.perform(op, [loc0], loc1)
  548. def consider_convert_longlong_bytes_to_float(self, op):
  549. if longlong.is_64_bit:
  550. loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
  551. loc1 = self.xrm.force_allocate_reg(op.result)
  552. self.perform(op, [loc0], loc1)
  553. else:
  554. arg0 = op.getarg(0)
  555. loc0 = self.xrm.make_sure_var_in_reg(arg0)
  556. loc1 = self.xrm.force_allocate_reg(op.result, forbidden_vars=[arg0])
  557. self.perform(op, [loc0], loc1)
  558. def _consider_llong_binop_xx(self, op):
  559. # must force both arguments into xmm registers, because we don't
  560. # know if they will be suitably aligned. Exception: if the second
  561. # argument is a constant, we can ask it to be aligned to 16 bytes.
  562. args = [op.getarg(1), op.getarg(2)]
  563. loc1 = self.load_xmm_aligned_16_bytes(args[1])
  564. loc0 = self.xrm.force_result_in_reg(op.result, args[0], args)
  565. self.perform_llong(op, [loc0, loc1], loc0)
  566. def _consider_llong_eq_ne_xx(self, op):
  567. # must force both arguments into xmm registers, because we don't
  568. # know if they will be suitably aligned. Exception: if they are
  569. # constants, we can ask them to be aligned to 16 bytes.
  570. args = [op.getarg(1), op.getarg(2)]
  571. loc1 = self.load_xmm_aligned_16_bytes(args[0])
  572. loc2 = self.load_xmm_aligned_16_bytes(args[1], args)
  573. tmpxvar = TempBox()
  574. loc3 = self.xrm.force_allocate_reg(tmpxvar, args)
  575. self.xrm.possibly_free_var(tmpxvar)
  576. loc0 = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  577. self.perform_llong(op, [loc1, loc2, loc3], loc0)
  578. def _maybe_consider_llong_lt(self, op):
  579. # XXX just a special case for now
  580. box = op.getarg(2)
  581. if not isinstance(box, ConstFloat):
  582. return False
  583. if box.getlonglong() != 0:
  584. return False
  585. # "x < 0"
  586. box = op.getarg(1)
  587. assert isinstance(box, BoxFloat)
  588. loc1 = self.xrm.make_sure_var_in_reg(box)
  589. loc0 = self.rm.force_allocate_reg(op.result)
  590. self.perform_llong(op, [loc1], loc0)
  591. return True
  592. def _consider_llong_to_int(self, op):
  593. # accept an argument in a xmm register or in the stack
  594. loc1 = self.xrm.loc(op.getarg(1))
  595. loc0 = self.rm.force_allocate_reg(op.result)
  596. self.perform_llong(op, [loc1], loc0)
  597. def _loc_of_const_longlong(self, value64):
  598. c = ConstFloat(value64)
  599. return self.xrm.convert_to_imm(c)
  600. def _consider_llong_from_int(self, op):
  601. assert IS_X86_32
  602. loc0 = self.xrm.force_allocate_reg(op.result)
  603. box = op.getarg(1)
  604. if isinstance(box, ConstInt):
  605. loc1 = self._loc_of_const_longlong(r_longlong(box.value))
  606. loc2 = None # unused
  607. else:
  608. loc1 = self.rm.make_sure_var_in_reg(box)
  609. tmpxvar = TempBox()
  610. loc2 = self.xrm.force_allocate_reg(tmpxvar, [op.result])
  611. self.xrm.possibly_free_var(tmpxvar)
  612. self.perform_llong(op, [loc1, loc2], loc0)
  613. def _consider_llong_from_uint(self, op):
  614. assert IS_X86_32
  615. loc0 = self.xrm.force_allocate_reg(op.result)
  616. loc1 = self.rm.make_sure_var_in_reg(op.getarg(1))
  617. self.perform_llong(op, [loc1], loc0)
  618. def _consider_math_sqrt(self, op):
  619. loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1))
  620. self.perform_math(op, [loc0], loc0)
  621. def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
  622. # we need to save registers on the stack:
  623. #
  624. # - at least the non-callee-saved registers
  625. #
  626. # - we assume that any call can collect, and we
  627. # save also the callee-saved registers that contain GC pointers
  628. #
  629. # - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
  630. # anyway, in case we need to do cpu.force(). The issue is that
  631. # grab_frame_values() would not be able to locate values in
  632. # callee-saved registers.
  633. #
  634. save_all_regs = guard_not_forced_op is not None
  635. self.xrm.before_call(force_store, save_all_regs=save_all_regs)
  636. if not save_all_regs:
  637. gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
  638. # we save all the registers for shadowstack and asmgcc for now
  639. # --- for asmgcc too: we can't say "register x is a gc ref"
  640. # without distinguishing call sites, which we don't do any
  641. # more for now.
  642. if gcrootmap: # and gcrootmap.is_shadow_stack:
  643. save_all_regs = 2
  644. self.rm.before_call(force_store, save_all_regs=save_all_regs)
  645. if op.result is not None:
  646. if op.result.type == FLOAT:
  647. resloc = self.xrm.after_call(op.result)
  648. else:
  649. resloc = self.rm.after_call(op.result)
  650. else:
  651. resloc = None
  652. if guard_not_forced_op is not None:
  653. self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc)
  654. else:
  655. self.perform(op, arglocs, resloc)
  656. def _consider_call(self, op, guard_not_forced_op=None):
  657. calldescr = op.getdescr()
  658. assert isinstance(calldescr, CallDescr)
  659. assert len(calldescr.arg_classes) == op.numargs() - 1
  660. size = calldescr.get_result_size()
  661. sign = calldescr.is_result_signed()
  662. if sign:
  663. sign_loc = imm1
  664. else:
  665. sign_loc = imm0
  666. self._call(op, [imm(size), sign_loc] +
  667. [self.loc(op.getarg(i)) for i in range(op.numargs())],
  668. guard_not_forced_op=guard_not_forced_op)
  669. def consider_call(self, op):
  670. effectinfo = op.getdescr().get_extra_info()
  671. oopspecindex = effectinfo.oopspecindex
  672. if oopspecindex != EffectInfo.OS_NONE:
  673. if IS_X86_32:
  674. # support for some of the llong operations,
  675. # which only exist on x86-32
  676. if oopspecindex in (EffectInfo.OS_LLONG_ADD,
  677. EffectInfo.OS_LLONG_SUB,
  678. EffectInfo.OS_LLONG_AND,
  679. EffectInfo.OS_LLONG_OR,
  680. EffectInfo.OS_LLONG_XOR):
  681. return self._consider_llong_binop_xx(op)
  682. if oopspecindex == EffectInfo.OS_LLONG_TO_INT:
  683. return self._consider_llong_to_int(op)
  684. if oopspecindex == EffectInfo.OS_LLONG_FROM_INT:
  685. return self._consider_llong_from_int(op)
  686. if oopspecindex == EffectInfo.OS_LLONG_FROM_UINT:
  687. return self._consider_llong_from_uint(op)
  688. if (oopspecindex == EffectInfo.OS_LLONG_EQ or
  689. oopspecindex == EffectInfo.OS_LLONG_NE):
  690. return self._consider_llong_eq_ne_xx(op)
  691. if oopspecindex == EffectInfo.OS_LLONG_LT:
  692. if self._maybe_consider_llong_lt(op):
  693. return
  694. if oopspecindex == EffectInfo.OS_MATH_SQRT:
  695. return self._consider_math_sqrt(op)
  696. self._consider_call(op)
  697. def consider_call_may_force(self, op, guard_op):
  698. assert guard_op is not None
  699. self._consider_call(op, guard_op)
  700. def consider_call_release_gil(self, op, guard_op):
  701. # We spill the arguments to the stack, because we need to do 3 calls:
  702. # call_release_gil(), the_real_c_function(), and call_reacquire_gil().
  703. # The arguments are used on the second call only. XXX we assume
  704. # that the XMM arguments won't be modified by call_release_gil().
  705. for i in range(op.numargs()):
  706. loc = self.loc(op.getarg(i))
  707. if loc in self.rm.save_around_call_regs:
  708. self.rm.force_spill_var(op.getarg(i))
  709. assert guard_op is not None
  710. self._consider_call(op, guard_op)
  711. def consider_call_malloc_gc(self, op):
  712. self._consider_call(op)
  713. def consider_call_assembler(self, op, guard_op):
  714. locs = self.locs_for_call_assembler(op, guard_op)
  715. self._call(op, locs, guard_not_forced_op=guard_op)
  716. def consider_cond_call_gc_wb(self, op):
  717. assert op.result is None
  718. args = op.getarglist()
  719. N = len(args)
  720. # we force all arguments in a reg (unless they are Consts),
  721. # because it will be needed anyway by the following setfield_gc
  722. # or setarrayitem_gc. It avoids loading it twice from the memory.
  723. arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
  724. for i in range(N)]
  725. self.perform_discard(op, arglocs)
  726. consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
  727. def consider_call_malloc_nursery(self, op):
  728. size_box = op.getarg(0)
  729. assert isinstance(size_box, ConstInt)
  730. size = size_box.getint()
  731. # looking at the result
  732. self.rm.force_allocate_reg(op.result, selected_reg=eax)
  733. #
  734. # We need edx as a temporary, but otherwise don't save any more
  735. # register. See comments in _build_malloc_slowpath().
  736. tmp_box = TempBox()
  737. self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
  738. gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
  739. self.rm.possibly_free_var(tmp_box)
  740. #
  741. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  742. self.assembler.malloc_cond(
  743. gc_ll_descr.get_nursery_free_addr(),
  744. gc_ll_descr.get_nursery_top_addr(),
  745. size, gcmap)
  746. def consider_call_malloc_nursery_varsize_small(self, op):
  747. size_box = op.getarg(0)
  748. assert isinstance(size_box, BoxInt) # we cannot have a const here!
  749. # looking at the result
  750. self.rm.force_allocate_reg(op.result, selected_reg=eax)
  751. #
  752. # We need edx as a temporary, but otherwise don't save any more
  753. # register. See comments in _build_malloc_slowpath().
  754. tmp_box = TempBox()
  755. self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
  756. sizeloc = self.rm.make_sure_var_in_reg(size_box, [op.result, tmp_box])
  757. gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
  758. self.rm.possibly_free_var(tmp_box)
  759. #
  760. gc_ll_descr = self.assembler.cpu.gc_ll_descr
  761. self.assembler.malloc_cond_varsize_small(
  762. gc_ll_descr.get_nursery_free_addr(),
  763. gc_ll_descr.get_nursery_top_addr(),
  764. sizeloc, gcmap)
  765. def get_gcmap(self, forbidden_regs=[], noregs=False):
  766. frame_depth = self.fm.get_frame_depth()
  767. gcmap = allocate_gcmap(self.assembler, frame_depth, JITFRAME_FIXED_SIZE)
  768. for box, loc in self.rm.reg_bindings.iteritems():
  769. if loc in forbidden_regs:
  770. continue
  771. if box.type == REF and self.rm.is_still_alive(box):
  772. assert not noregs
  773. assert isinstance(loc, RegLoc)
  774. val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
  775. gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
  776. for box, loc in self.fm.bindings.iteritems():
  777. if box.type == REF and self.rm.is_still_alive(box):
  778. assert isinstance(loc, FrameLoc)
  779. val = loc.position + JITFRAME_FIXED_SIZE
  780. gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
  781. return gcmap
  782. def consider_setfield_gc(self, op):
  783. ofs, size, _ = unpack_fielddescr(op.getdescr())
  784. ofs_loc = imm(ofs)
  785. size_loc = imm(size)
  786. assert isinstance(size_loc, ImmedLoc)
  787. if size_loc.value == 1:
  788. need_lower_byte = True
  789. else:
  790. need_lower_byte = False
  791. args = op.getarglist()
  792. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  793. value_loc = self.make_sure_var_in_reg(op.getarg(1), args,
  794. need_lower_byte=need_lower_byte)
  795. self.perform_discard(op, [base_loc, ofs_loc, size_loc, value_loc])
  796. consider_setfield_raw = consider_setfield_gc
  797. def consider_setinteriorfield_gc(self, op):
  798. t = unpack_interiorfielddescr(op.getdescr())
  799. ofs, itemsize, fieldsize = imm(t[0]), imm(t[1]), imm(t[2])
  800. args = op.getarglist()
  801. if fieldsize.value == 1:
  802. need_lower_byte = True
  803. else:
  804. need_lower_byte = False
  805. box_base, box_index, box_value = args
  806. base_loc = self.rm.make_sure_var_in_reg(box_base, args)
  807. index_loc = self.rm.make_sure_var_in_reg(box_index, args)
  808. value_loc = self.make_sure_var_in_reg(box_value, args,
  809. need_lower_byte=need_lower_byte)
  810. # If 'index_loc' is not an immediate, then we need a 'temp_loc' that
  811. # is a register whose value will be destroyed. It's fine to destroy
  812. # the same register as 'index_loc', but not the other ones.
  813. if not isinstance(index_loc, ImmedLoc):
  814. # ...that is, except in a corner case where 'index_loc' would be
  815. # in the same register as 'value_loc'...
  816. tempvar = TempBox()
  817. temp_loc = self.rm.force_allocate_reg(tempvar, [box_base,
  818. box_value])
  819. self.rm.possibly_free_var(tempvar)
  820. else:
  821. temp_loc = None
  822. self.rm.possibly_free_var(box_index)
  823. self.rm.possibly_free_var(box_base)
  824. self.possibly_free_var(box_value)
  825. self.perform_discard(op, [base_loc, ofs, itemsize, fieldsize,
  826. index_loc, temp_loc, value_loc])
  827. consider_setinteriorfield_raw = consider_setinteriorfield_gc
  828. def consider_strsetitem(self, op):
  829. args = op.getarglist()
  830. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  831. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  832. value_loc = self.rm.make_sure_var_in_reg(op.getarg(2), args,
  833. need_lower_byte=True)
  834. self.perform_discard(op, [base_loc, ofs_loc, value_loc])
  835. consider_unicodesetitem = consider_strsetitem
  836. def consider_setarrayitem_gc(self, op):
  837. itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
  838. args = op.getarglist()
  839. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  840. if itemsize == 1:
  841. need_lower_byte = True
  842. else:
  843. need_lower_byte = False
  844. value_loc = self.make_sure_var_in_reg(op.getarg(2), args,
  845. need_lower_byte=need_lower_byte)
  846. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  847. self.perform_discard(op, [base_loc, ofs_loc, value_loc,
  848. imm(itemsize), imm(ofs)])
  849. consider_setarrayitem_raw = consider_setarrayitem_gc
  850. consider_raw_store = consider_setarrayitem_gc
  851. def consider_getfield_gc(self, op):
  852. ofs, size, sign = unpack_fielddescr(op.getdescr())
  853. ofs_loc = imm(ofs)
  854. size_loc = imm(size)
  855. args = op.getarglist()
  856. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  857. result_loc = self.force_allocate_reg(op.result)
  858. if sign:
  859. sign_loc = imm1
  860. else:
  861. sign_loc = imm0
  862. self.perform(op, [base_loc, ofs_loc, size_loc, sign_loc], result_loc)
  863. consider_getfield_raw = consider_getfield_gc
  864. consider_getfield_raw_pure = consider_getfield_gc
  865. consider_getfield_gc_pure = consider_getfield_gc
  866. def consider_getarrayitem_gc(self, op):
  867. itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
  868. args = op.getarglist()
  869. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  870. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  871. result_loc = self.force_allocate_reg(op.result)
  872. if sign:
  873. sign_loc = imm1
  874. else:
  875. sign_loc = imm0
  876. self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
  877. sign_loc], result_loc)
  878. consider_getarrayitem_raw = consider_getarrayitem_gc
  879. consider_getarrayitem_gc_pure = consider_getarrayitem_gc
  880. consider_getarrayitem_raw_pure = consider_getarrayitem_gc
  881. consider_raw_load = consider_getarrayitem_gc
  882. def consider_getinteriorfield_gc(self, op):
  883. t = unpack_interiorfielddescr(op.getdescr())
  884. ofs, itemsize, fieldsize, sign = imm(t[0]), imm(t[1]), imm(t[2]), t[3]
  885. if sign:
  886. sign_loc = imm1
  887. else:
  888. sign_loc = imm0
  889. args = op.getarglist()
  890. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  891. index_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  892. # 'base' and 'index' are put in two registers (or one if 'index'
  893. # is an immediate). 'result' can be in the same register as
  894. # 'index' but must be in a different register than 'base'.
  895. result_loc = self.force_allocate_reg(op.result, [op.getarg(0)])
  896. assert isinstance(result_loc, RegLoc)
  897. # two cases: 1) if result_loc is a normal register, use it as temp_loc
  898. if not result_loc.is_xmm:
  899. temp_loc = result_loc
  900. else:
  901. # 2) if result_loc is an xmm register, we (likely) need another
  902. # temp_loc that is a normal register. It can be in the same
  903. # register as 'index' but not 'base'.
  904. tempvar = TempBox()
  905. temp_loc = self.rm.force_allocate_reg(tempvar, [op.getarg(0)])
  906. self.rm.possibly_free_var(tempvar)
  907. self.perform(op, [base_loc, ofs, itemsize, fieldsize,
  908. index_loc, temp_loc, sign_loc], result_loc)
  909. def consider_int_is_true(self, op, guard_op):
  910. # doesn't need arg to be in a register
  911. argloc = self.loc(op.getarg(0))
  912. if guard_op is not None:
  913. self.perform_with_guard(op, guard_op, [argloc], None)
  914. else:
  915. resloc = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
  916. self.perform(op, [argloc], resloc)
  917. consider_int_is_zero = consider_int_is_true
  918. def consider_same_as(self, op):
  919. argloc = self.loc(op.getarg(0))
  920. resloc = self.force_allocate_reg(op.result)
  921. self.perform(op, [argloc], resloc)
  922. consider_cast_ptr_to_int = consider_same_as
  923. consider_cast_int_to_ptr = consider_same_as
  924. def consider_int_force_ge_zero(self, op):
  925. argloc = self.make_sure_var_in_reg(op.getarg(0))
  926. resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
  927. self.perform(op, [argloc], resloc)
  928. def consider_strlen(self, op):
  929. args = op.getarglist()
  930. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  931. result_loc = self.rm.force_allocate_reg(op.result)
  932. self.perform(op, [base_loc], result_loc)
  933. consider_unicodelen = consider_strlen
  934. def consider_arraylen_gc(self, op):
  935. arraydescr = op.getdescr()
  936. assert isinstance(arraydescr, ArrayDescr)
  937. ofs = arraydescr.lendescr.offset
  938. args = op.getarglist()
  939. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  940. result_loc = self.rm.force_allocate_reg(op.result)
  941. self.perform(op, [base_loc, imm(ofs)], result_loc)
  942. def consider_strgetitem(self, op):
  943. args = op.getarglist()
  944. base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
  945. ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
  946. result_loc = self.rm.force_allocate_reg(op.result)
  947. self.perform(op, [base_loc, ofs_loc], result_loc)
  948. consider_unicodegetitem = consider_strgetitem
  949. def consider_copystrcontent(self, op):
  950. self._consider_copystrcontent(op, is_unicode=False)
  951. def consider_copyunicodecontent(self, op):
  952. self._consider_copystrcontent(op, is_unicode=True)
  953. def _consider_copystrcontent(self, op, is_unicode):
  954. # compute the source address
  955. args = op.getarglist()
  956. base_loc = self.rm.make_sure_var_in_reg(args[0], args)
  957. ofs_loc = self.rm.make_sure_var_in_reg(args[2], args)
  958. assert args[0] is not args[1] # forbidden case of aliasing
  959. srcaddr_box = TempBox()
  960. forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
  961. srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
  962. self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
  963. is_unicode=is_unicode)
  964. # compute the destination address
  965. base_loc = self.rm.make_sure_var_in_reg(args[1], forbidden_vars)
  966. ofs_loc = self.rm.make_sure_var_in_reg(args[3], forbidden_vars)
  967. forbidden_vars = [args[4], srcaddr_box]
  968. dstaddr_box = TempBox()
  969. dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
  970. self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
  971. is_unicode=is_unicode)
  972. # compute the length in bytes
  973. length_box = args[4]
  974. length_loc = self.loc(length_box)
  975. if is_unicode:
  976. forbidden_vars = [srcaddr_box, dstaddr_box]
  977. bytes_box = TempBox()
  978. bytes_loc = self.rm.force_allocate_reg(bytes_box, forbidden_vars)
  979. scale = self._get_unicode_item_scale()
  980. if not (isinstance(length_loc, ImmedLoc) or
  981. isinstance(length_loc, RegLoc)):
  982. self.assembler.mov(length_loc, bytes_loc)
  983. length_loc = bytes_loc
  984. self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc)
  985. length_box = bytes_box
  986. length_loc = bytes_loc
  987. # call memcpy()
  988. self.rm.before_call()
  989. self.xrm.before_call()
  990. self.assembler._emit_call(imm(self.assembler.memcpy_addr),
  991. [dstaddr_loc, srcaddr_loc, length_loc],
  992. can_collect=False)
  993. self.rm.possibly_free_var(length_box)
  994. self.rm.possibly_free_var(dstaddr_box)
  995. self.rm.possibly_free_var(srcaddr_box)
  996. def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
  997. if is_unicode:
  998. ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
  999. self.translate_support_code)
  1000. scale = self._get_unicode_item_scale()
  1001. else:
  1002. ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
  1003. self.translate_support_code)
  1004. assert itemsize == 1
  1005. scale = 0
  1006. self.assembler.load_effective_addr(ofsloc, ofs_items, scale,
  1007. resloc, baseloc)
  1008. def _get_unicode_item_scale(self):
  1009. _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
  1010. self.translate_support_code)
  1011. if itemsize == 4:
  1012. return 2
  1013. elif itemsize == 2:
  1014. return 1
  1015. else:
  1016. raise AssertionError("bad unicode item size")
  1017. def consider_read_timestamp(self, op):
  1018. tmpbox_high = TempBox()
  1019. self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax)
  1020. if longlong.is_64_bit:
  1021. # on 64-bit, use rax as temporary register and returns the
  1022. # result in rdx
  1023. result_loc = self.rm.force_allocate_reg(op.result,
  1024. selected_reg=edx)
  1025. self.perform(op, [], result_loc)
  1026. else:
  1027. # on 32-bit, use both eax and edx as temporary registers,
  1028. # use a temporary xmm register, and returns the result in
  1029. # another xmm register.
  1030. tmpbox_low = TempBox()
  1031. self.rm.force_allocate_reg(tmpbox_low, selected_reg=edx)
  1032. xmmtmpbox = TempBox()
  1033. xmmtmploc = self.xrm.force_allocate_reg(xmmtmpbox)
  1034. result_loc = self.xrm.force_allocate_reg(op.result)
  1035. self.perform(op, [xmmtmploc], result_loc)
  1036. self.xrm.possibly_free_var(xmmtmpbox)
  1037. self.rm.possibly_free_var(tmpbox_low)
  1038. self.rm.possibly_free_var(tmpbox_high)
  1039. def compute_hint_frame_locations(self, operations):
  1040. # optimization only: fill in the 'hint_frame_locations' dictionary
  1041. # of 'fm' based on the JUMP at the end of the loop, by looking
  1042. # at where we would like the boxes to be after the jump.
  1043. return # XXX disabled for now
  1044. op = operations[-1]
  1045. if op.getopnum() != rop.JUMP:
  1046. return
  1047. self.final_jump_op = op
  1048. descr = op.getdescr()
  1049. assert isinstance(descr, TargetToken)

Large files files are truncated, but you can click here to view the full file