PageRenderTime 670ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/x86/callbuilder.py

https://bitbucket.org/pypy/pypy/
Python | 684 lines | 585 code | 41 blank | 58 comment | 80 complexity | 12c479c5d788e7be79605e3ab6150cba MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import sys
  2. from rpython.rlib.clibffi import FFI_DEFAULT_ABI
  3. from rpython.rlib.objectmodel import we_are_translated
  4. from rpython.rlib.rarithmetic import intmask
  5. from rpython.jit.metainterp.history import INT, FLOAT
  6. from rpython.jit.backend.x86.arch import (WORD, IS_X86_64, IS_X86_32,
  7. PASS_ON_MY_FRAME, FRAME_FIXED_SIZE,
  8. THREADLOCAL_OFS)
  9. from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
  10. xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
  11. r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
  12. RegLoc, RawEspLoc, RawEbpLoc, imm, ImmedLoc)
  13. from rpython.jit.backend.x86.jump import remap_frame_layout
  14. from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
  15. from rpython.jit.backend.llsupport import llerrno
  16. from rpython.rtyper.lltypesystem import llmemory, rffi
  17. # darwin requires the stack to be 16 bytes aligned on calls.
  18. # Same for gcc 4.5.0, better safe than sorry
  19. CALL_ALIGN = 16 // WORD
  20. stdcall_or_cdecl = sys.platform == "win32"
  21. handle_lasterror = sys.platform == "win32"
  22. def align_stack_words(words):
  23. return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
  24. def follow_jump(addr):
  25. # If 'addr' is immediately starting with another JMP instruction,
  26. # follow it now. 'addr' is an absolute address here
  27. while rffi.cast(rffi.CCHARP, addr)[0] == '\xE9': # JMP <4 bytes>
  28. addr += 5
  29. addr += intmask(rffi.cast(rffi.INTP, addr - 4)[0])
  30. return addr
  31. class CallBuilderX86(AbstractCallBuilder):
  32. # max number of words we have room in esp; if we need more for
  33. # arguments, we need to decrease esp temporarily
  34. stack_max = PASS_ON_MY_FRAME
  35. tlofs_reg = None
  36. saved_stack_position_reg = None
  37. result_value_saved_early = False
  38. def __init__(self, assembler, fnloc, arglocs,
  39. resloc=eax, restype=INT, ressize=WORD):
  40. AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
  41. resloc, restype, ressize)
  42. # Avoid tons of issues with a non-immediate fnloc by sticking it
  43. # as an extra argument if needed
  44. if isinstance(fnloc, ImmedLoc):
  45. self.fnloc_is_immediate = True
  46. self.fnloc = imm(follow_jump(fnloc.value))
  47. else:
  48. self.fnloc_is_immediate = False
  49. self.fnloc = None
  50. self.arglocs = arglocs + [fnloc]
  51. self.start_frame_size = self.mc._frame_size
  52. def select_call_release_gil_mode(self):
  53. AbstractCallBuilder.select_call_release_gil_mode(self)
  54. if self.asm._is_asmgcc():
  55. from rpython.memory.gctransform import asmgcroot
  56. self.stack_max = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS
  57. assert self.stack_max >= 3
  58. def subtract_esp_aligned(self, count):
  59. if count > 0:
  60. align = align_stack_words(count)
  61. self.mc.SUB_ri(esp.value, align * WORD)
  62. def get_current_esp(self):
  63. return self.start_frame_size - self.mc._frame_size
  64. def restore_stack_pointer(self, target_esp=0):
  65. current_esp = self.get_current_esp()
  66. if current_esp != target_esp:
  67. self.mc.ADD_ri(esp.value, target_esp - current_esp)
  68. def load_result(self):
  69. """Overridden in CallBuilder32 and CallBuilder64"""
  70. if self.ressize == 0:
  71. return # void result
  72. # use the code in load_from_mem to do the zero- or sign-extension
  73. if self.restype == FLOAT:
  74. srcloc = xmm0
  75. else:
  76. srcloc = eax
  77. if self.ressize >= WORD and self.resloc is srcloc:
  78. return # no need for any MOV
  79. if self.ressize == 1 and isinstance(srcloc, RegLoc):
  80. srcloc = srcloc.lowest8bits()
  81. self.asm.load_from_mem(self.resloc, srcloc,
  82. imm(self.ressize), imm(self.ressign))
  83. def push_gcmap(self):
  84. # we push *now* the gcmap, describing the status of GC registers
  85. # after the rearrangements done just before, ignoring the return
  86. # value eax, if necessary
  87. assert not self.is_call_release_gil
  88. current_esp = self.get_current_esp()
  89. self.change_extra_stack_depth = (current_esp != 0)
  90. if self.change_extra_stack_depth:
  91. self.asm.set_extra_stack_depth(self.mc, -current_esp)
  92. noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
  93. gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
  94. self.asm.push_gcmap(self.mc, gcmap, store=True)
  95. def pop_gcmap(self):
  96. ssreg = None
  97. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  98. if gcrootmap:
  99. if gcrootmap.is_shadow_stack and self.is_call_release_gil:
  100. # in this mode, 'ebx' happens to contain the shadowstack
  101. # top at this point, so reuse it instead of loading it again
  102. ssreg = ebx
  103. self.asm._reload_frame_if_necessary(self.mc, shadowstack_reg=ssreg)
  104. if self.change_extra_stack_depth:
  105. self.asm.set_extra_stack_depth(self.mc, 0)
  106. self.asm.pop_gcmap(self.mc)
  107. def call_releasegil_addr_and_move_real_arguments(self, fastgil):
  108. from rpython.jit.backend.x86.assembler import heap
  109. assert self.is_call_release_gil
  110. #
  111. # Save this thread's shadowstack pointer into 'ebx',
  112. # for later comparison
  113. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  114. if gcrootmap:
  115. if gcrootmap.is_shadow_stack:
  116. rst = gcrootmap.get_root_stack_top_addr()
  117. self.mc.MOV(ebx, heap(rst))
  118. #
  119. if not self.asm._is_asmgcc():
  120. # shadowstack: change 'rpy_fastgil' to 0 (it should be
  121. # non-zero right now).
  122. self.change_extra_stack_depth = False
  123. css_value = imm(0)
  124. else:
  125. from rpython.memory.gctransform import asmgcroot
  126. # build a 'css' structure on the stack: 2 words for the linkage,
  127. # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
  128. # total size of JIT_USE_WORDS. This structure is found at
  129. # [ESP+css].
  130. css = -self.get_current_esp() + (
  131. WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
  132. assert css >= 2 * WORD
  133. # Save ebp
  134. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  135. self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
  136. # Save the "return address": we pretend that it's css
  137. self.mc.LEA_rs(eax.value, css) # LEA eax, [css]
  138. frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
  139. self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax
  140. # Set up jf_extra_stack_depth to pretend that the return address
  141. # was at css, and so our stack frame is supposedly shorter by
  142. # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
  143. delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
  144. self.change_extra_stack_depth = True
  145. self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
  146. css_value = eax
  147. #
  148. # <--here--> would come a memory fence, if the CPU needed one.
  149. self.mc.MOV(heap(fastgil), css_value)
  150. #
  151. if not we_are_translated(): # for testing: we should not access
  152. self.mc.ADD(ebp, imm(1)) # ebp any more
  153. def get_tlofs_reg(self):
  154. """Load the THREADLOCAL_OFS from the stack into a callee-saved
  155. register. Further calls just return the same register, by assuming
  156. it is indeed saved."""
  157. assert self.is_call_release_gil
  158. if self.tlofs_reg is None:
  159. # pick a register saved across calls
  160. if IS_X86_32:
  161. self.tlofs_reg = esi
  162. else:
  163. self.tlofs_reg = r12
  164. self.mc.MOV_rs(self.tlofs_reg.value,
  165. THREADLOCAL_OFS - self.get_current_esp())
  166. if self.asm._is_asmgcc():
  167. self.mc.AND_ri(self.tlofs_reg.value, ~1)
  168. return self.tlofs_reg
  169. def save_stack_position(self):
  170. """Load the current 'esp' value into a callee-saved register.
  171. Further calls just return the same register, by assuming it is
  172. indeed saved."""
  173. assert IS_X86_32
  174. assert stdcall_or_cdecl and self.is_call_release_gil
  175. if self.saved_stack_position_reg is None:
  176. # pick a register saved across calls
  177. self.saved_stack_position_reg = edi
  178. self.mc.MOV(self.saved_stack_position_reg, esp)
  179. def write_real_errno(self, save_err):
  180. """This occurs just before emit_raw_call().
  181. """
  182. mc = self.mc
  183. if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR):
  184. # must call SetLastError(). There are no registers to save
  185. # because we are on 32-bit in this case: no register contains
  186. # the arguments to the main function we want to call afterwards.
  187. from rpython.rlib.rwin32 import _SetLastError
  188. adr = llmemory.cast_ptr_to_adr(_SetLastError)
  189. SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
  190. assert isinstance(self, CallBuilder32) # Windows 32-bit only
  191. #
  192. if save_err & rffi.RFFI_ALT_ERRNO:
  193. lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
  194. else:
  195. lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
  196. tlofsreg = self.get_tlofs_reg() # => esi, callee-saved
  197. self.save_stack_position() # => edi, callee-saved
  198. mc.PUSH_m((tlofsreg.value, lasterror))
  199. mc.CALL(imm(follow_jump(SetLastError_addr)))
  200. # restore the stack position without assuming a particular
  201. # calling convention of _SetLastError()
  202. self.mc.stack_frame_size_delta(-WORD)
  203. self.mc.MOV(esp, self.saved_stack_position_reg)
  204. if save_err & rffi.RFFI_READSAVED_ERRNO:
  205. # Just before a call, read '*_errno' and write it into the
  206. # real 'errno'. Most registers are free here, including the
  207. # callee-saved ones, except 'ebx' and except the ones used to
  208. # pass the arguments on x86-64.
  209. if save_err & rffi.RFFI_ALT_ERRNO:
  210. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  211. else:
  212. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  213. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  214. tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved
  215. if IS_X86_32:
  216. tmpreg = edx
  217. else:
  218. tmpreg = r11 # edx is used for 3rd argument
  219. mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno))
  220. mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno))
  221. mc.MOV32_mr((tmpreg.value, 0), eax.value)
  222. elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
  223. # Same, but write zero.
  224. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  225. tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved
  226. mc.MOV_rm(eax.value, (tlofsreg.value, p_errno))
  227. mc.MOV32_mi((eax.value, 0), 0)
  228. def read_real_errno(self, save_err):
  229. """This occurs after emit_raw_call() and after restore_stack_pointer().
  230. """
  231. mc = self.mc
  232. if save_err & rffi.RFFI_SAVE_ERRNO:
  233. # Just after a call, read the real 'errno' and save a copy of
  234. # it inside our thread-local '*_errno'. Most registers are
  235. # free here, including the callee-saved ones, except 'ebx'.
  236. # The tlofs register might have been loaded earlier and is
  237. # callee-saved, so it does not need to be reloaded.
  238. if save_err & rffi.RFFI_ALT_ERRNO:
  239. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  240. else:
  241. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  242. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  243. tlofsreg = self.get_tlofs_reg() # => esi or r12 (possibly reused)
  244. mc.MOV_rm(ecx.value, (tlofsreg.value, p_errno))
  245. mc.MOV32_rm(ecx.value, (ecx.value, 0))
  246. mc.MOV32_mr((tlofsreg.value, rpy_errno), ecx.value)
  247. if handle_lasterror and (save_err & (rffi.RFFI_SAVE_LASTERROR |
  248. rffi.RFFI_SAVE_WSALASTERROR)):
  249. if save_err & rffi.RFFI_SAVE_LASTERROR:
  250. from rpython.rlib.rwin32 import _GetLastError
  251. adr = llmemory.cast_ptr_to_adr(_GetLastError)
  252. else:
  253. from rpython.rlib._rsocket_rffi import _WSAGetLastError
  254. adr = llmemory.cast_ptr_to_adr(_WSAGetLastError)
  255. GetLastError_addr = self.asm.cpu.cast_adr_to_int(adr)
  256. assert isinstance(self, CallBuilder32) # Windows 32-bit only
  257. #
  258. if save_err & rffi.RFFI_ALT_ERRNO:
  259. lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu)
  260. else:
  261. lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu)
  262. self.save_result_value(save_edx=True) # save eax/edx/xmm0
  263. self.result_value_saved_early = True
  264. mc.CALL(imm(follow_jump(GetLastError_addr)))
  265. #
  266. tlofsreg = self.get_tlofs_reg() # => esi (possibly reused)
  267. mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
  268. def move_real_result_and_call_reacqgil_addr(self, fastgil):
  269. from rpython.jit.backend.x86 import rx86
  270. #
  271. # check if we need to call the reacqgil() function or not
  272. # (to acquiring the GIL, remove the asmgcc head from
  273. # the chained list, etc.)
  274. mc = self.mc
  275. restore_edx = False
  276. if not self.asm._is_asmgcc():
  277. css = 0
  278. css_value = imm(0)
  279. old_value = ecx
  280. else:
  281. from rpython.memory.gctransform import asmgcroot
  282. css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
  283. if IS_X86_32:
  284. assert css >= 16
  285. if self.restype == 'L': # long long result: eax/edx
  286. if not self.result_value_saved_early:
  287. mc.MOV_sr(12, edx.value)
  288. restore_edx = True
  289. css_value = edx
  290. old_value = ecx
  291. elif IS_X86_64:
  292. css_value = edi
  293. old_value = esi
  294. mc.LEA_rs(css_value.value, css)
  295. #
  296. # Use XCHG as an atomic test-and-set-lock. It also implicitly
  297. # does a memory barrier.
  298. mc.MOV(old_value, imm(1))
  299. if rx86.fits_in_32bits(fastgil):
  300. mc.XCHG_rj(old_value.value, fastgil)
  301. else:
  302. mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
  303. mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
  304. mc.CMP(old_value, css_value)
  305. #
  306. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  307. if bool(gcrootmap) and gcrootmap.is_shadow_stack:
  308. from rpython.jit.backend.x86.assembler import heap
  309. #
  310. # When doing a call_release_gil with shadowstack, there
  311. # is the risk that the 'rpy_fastgil' was free but the
  312. # current shadowstack can be the one of a different
  313. # thread. So here we check if the shadowstack pointer
  314. # is still the same as before we released the GIL (saved
  315. # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
  316. mc.J_il8(rx86.Conditions['NE'], 0)
  317. jne_location = mc.get_relative_pos()
  318. # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
  319. # state before the XCHG, but the XCHG acquired it by writing 1)
  320. rst = gcrootmap.get_root_stack_top_addr()
  321. mc = self.mc
  322. mc.CMP(ebx, heap(rst))
  323. mc.J_il8(rx86.Conditions['E'], 0)
  324. je_location = mc.get_relative_pos()
  325. # revert the rpy_fastgil acquired above, so that the
  326. # general 'reacqgil_addr' below can acquire it again...
  327. mc.MOV(heap(fastgil), ecx)
  328. # patch the JNE above
  329. offset = mc.get_relative_pos() - jne_location
  330. assert 0 < offset <= 127
  331. mc.overwrite(jne_location-1, chr(offset))
  332. else:
  333. mc.J_il8(rx86.Conditions['E'], 0)
  334. je_location = mc.get_relative_pos()
  335. #
  336. # Yes, we need to call the reacqgil() function
  337. if not self.result_value_saved_early:
  338. self.save_result_value(save_edx=False)
  339. if self.asm._is_asmgcc():
  340. if IS_X86_32:
  341. mc.MOV_sr(4, old_value.value)
  342. mc.MOV_sr(0, css_value.value)
  343. # on X86_64, they are already in the right registers
  344. mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
  345. if not self.result_value_saved_early:
  346. self.restore_result_value(save_edx=False)
  347. #
  348. # patch the JE above
  349. offset = mc.get_relative_pos() - je_location
  350. assert 0 < offset <= 127
  351. mc.overwrite(je_location-1, chr(offset))
  352. #
  353. if restore_edx:
  354. mc.MOV_rs(edx.value, 12) # restore this
  355. #
  356. if self.result_value_saved_early:
  357. self.restore_result_value(save_edx=True)
  358. #
  359. if not we_are_translated(): # for testing: now we can accesss
  360. mc.SUB(ebp, imm(1)) # ebp again
  361. #
  362. # Now that we required the GIL, we can reload a possibly modified ebp
  363. if self.asm._is_asmgcc():
  364. # special-case: reload ebp from the css
  365. from rpython.memory.gctransform import asmgcroot
  366. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  367. mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
  368. #else:
  369. # for shadowstack, done for us by _reload_frame_if_necessary()
  370. def save_result_value(self, save_edx):
  371. """Overridden in CallBuilder32 and CallBuilder64"""
  372. raise NotImplementedError
  373. def restore_result_value(self, save_edx):
  374. """Overridden in CallBuilder32 and CallBuilder64"""
  375. raise NotImplementedError
  376. class CallBuilder32(CallBuilderX86):
  377. def prepare_arguments(self):
  378. arglocs = self.arglocs
  379. stack_depth = 0
  380. n = len(arglocs)
  381. for i in range(n):
  382. loc = arglocs[i]
  383. stack_depth += loc.get_width() // WORD
  384. self.subtract_esp_aligned(stack_depth - self.stack_max)
  385. #
  386. p = 0
  387. for i in range(n):
  388. loc = arglocs[i]
  389. if isinstance(loc, RegLoc):
  390. if loc.is_xmm:
  391. self.mc.MOVSD_sx(p, loc.value)
  392. else:
  393. self.mc.MOV_sr(p, loc.value)
  394. p += loc.get_width()
  395. p = 0
  396. for i in range(n):
  397. loc = arglocs[i]
  398. if not isinstance(loc, RegLoc):
  399. if loc.get_width() == 8:
  400. self.mc.MOVSD(xmm0, loc)
  401. self.mc.MOVSD_sx(p, xmm0.value)
  402. elif isinstance(loc, ImmedLoc):
  403. self.mc.MOV_si(p, loc.value)
  404. else:
  405. self.mc.MOV(eax, loc)
  406. self.mc.MOV_sr(p, eax.value)
  407. p += loc.get_width()
  408. self.total_stack_used_by_arguments = p
  409. #
  410. if not self.fnloc_is_immediate: # the last "argument" pushed above
  411. self.fnloc = RawEspLoc(p - WORD, INT)
  412. def emit_raw_call(self):
  413. if stdcall_or_cdecl and self.is_call_release_gil:
  414. # Dynamically accept both stdcall and cdecl functions.
  415. # We could try to detect from pyjitpl which calling
  416. # convention this particular function takes, which would
  417. # avoid these two extra MOVs... but later.
  418. self.save_stack_position() # => edi (possibly reused)
  419. self.mc.CALL(self.fnloc)
  420. self.mc.MOV(esp, self.saved_stack_position_reg)
  421. else:
  422. self.mc.CALL(self.fnloc)
  423. if self.callconv != FFI_DEFAULT_ABI:
  424. # in the STDCALL ABI, the CALL above has an effect on
  425. # the stack depth. Adjust 'mc._frame_size'.
  426. delta = self._fix_stdcall(self.callconv)
  427. self.mc.stack_frame_size_delta(-delta)
  428. def _fix_stdcall(self, callconv):
  429. from rpython.rlib.clibffi import FFI_STDCALL
  430. assert callconv == FFI_STDCALL
  431. return self.total_stack_used_by_arguments
  432. def load_result(self):
  433. resloc = self.resloc
  434. if resloc is not None and resloc.is_float():
  435. # a float or a long long return
  436. if self.restype == 'L': # long long
  437. # move eax/edx -> xmm0
  438. self.mc.MOVD32_xr(resloc.value^1, edx.value)
  439. self.mc.MOVD32_xr(resloc.value, eax.value)
  440. self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1)
  441. else:
  442. # float: we have to go via the stack
  443. self.mc.FSTPL_s(0)
  444. self.mc.MOVSD_xs(resloc.value, 0)
  445. #
  446. elif self.restype == 'S':
  447. # singlefloat return: must convert ST(0) to a 32-bit singlefloat
  448. # and load it into self.resloc. mess mess mess
  449. self.mc.FSTPS_s(0)
  450. self.mc.MOV_rs(resloc.value, 0)
  451. else:
  452. CallBuilderX86.load_result(self)
  453. def save_result_value(self, save_edx):
  454. # Temporarily save the result value into [ESP+8]. We use "+8"
  455. # in order to leave the two initial words free, in case it's needed.
  456. # Also note that in this 32-bit case, a long long return value is
  457. # in eax/edx, but we already saved the value of edx in
  458. # move_real_result_and_call_reacqgil_addr().
  459. if self.ressize == 0: # void return
  460. return
  461. if self.resloc.is_float():
  462. # a float or a long long return
  463. if self.restype == 'L':
  464. self.mc.MOV_sr(8, eax.value) # long long
  465. if save_edx:
  466. self.mc.MOV_sr(12, edx.value)
  467. else:
  468. self.mc.FSTPL_s(8) # float return
  469. else:
  470. if self.restype == 'S':
  471. self.mc.FSTPS_s(8)
  472. else:
  473. assert self.restype == INT
  474. assert self.ressize <= WORD
  475. self.mc.MOV_sr(8, eax.value)
  476. def restore_result_value(self, save_edx):
  477. # Opposite of save_result_value()
  478. if self.ressize == 0: # void return
  479. return
  480. if self.resloc.is_float():
  481. # a float or a long long return
  482. if self.restype == 'L':
  483. self.mc.MOV_rs(eax.value, 8) # long long
  484. if save_edx:
  485. self.mc.MOV_rs(edx.value, 12)
  486. else:
  487. self.mc.FLDL_s(8) # float return
  488. else:
  489. if self.restype == 'S':
  490. self.mc.FLDS_s(8)
  491. else:
  492. assert self.restype == INT
  493. assert self.ressize <= WORD
  494. self.mc.MOV_rs(eax.value, 8)
  495. class CallBuilder64(CallBuilderX86):
  496. ARGUMENTS_GPR = [edi, esi, edx, ecx, r8, r9]
  497. ARGUMENTS_XMM = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
  498. _ALL_CALLEE_SAVE_GPR = [ebx, r12, r13, r14, r15]
  499. next_arg_gpr = 0
  500. next_arg_xmm = 0
  501. def _unused_gpr(self, hint):
  502. i = self.next_arg_gpr
  503. self.next_arg_gpr = i + 1
  504. try:
  505. res = self.ARGUMENTS_GPR[i]
  506. except IndexError:
  507. return None
  508. return res
  509. def _unused_xmm(self):
  510. i = self.next_arg_xmm
  511. self.next_arg_xmm = i + 1
  512. try:
  513. return self.ARGUMENTS_XMM[i]
  514. except IndexError:
  515. return None
  516. def prepare_arguments(self):
  517. src_locs = []
  518. dst_locs = []
  519. xmm_src_locs = []
  520. xmm_dst_locs = []
  521. singlefloats = None
  522. arglocs = self.arglocs
  523. argtypes = self.argtypes
  524. on_stack = 0
  525. for i in range(len(arglocs)):
  526. loc = arglocs[i]
  527. if loc.is_float():
  528. tgt = self._unused_xmm()
  529. if tgt is None:
  530. tgt = RawEspLoc(on_stack * WORD, FLOAT)
  531. on_stack += 1
  532. xmm_src_locs.append(loc)
  533. xmm_dst_locs.append(tgt)
  534. elif i < len(argtypes) and argtypes[i] == 'S':
  535. # Singlefloat argument
  536. if singlefloats is None:
  537. singlefloats = []
  538. tgt = self._unused_xmm()
  539. if tgt is None:
  540. tgt = RawEspLoc(on_stack * WORD, INT)
  541. on_stack += 1
  542. singlefloats.append((loc, tgt))
  543. else:
  544. tgt = self._unused_gpr(hint=loc)
  545. if tgt is None:
  546. tgt = RawEspLoc(on_stack * WORD, INT)
  547. on_stack += 1
  548. src_locs.append(loc)
  549. dst_locs.append(tgt)
  550. if not self.fnloc_is_immediate:
  551. self.fnloc = dst_locs[-1] # the last "argument" prepared above
  552. if not we_are_translated(): # assert that we got the right stack depth
  553. floats = 0
  554. for i in range(len(arglocs)):
  555. arg = arglocs[i]
  556. if arg.is_float() or (i < len(argtypes) and argtypes[i]=='S'):
  557. floats += 1
  558. all_args = len(arglocs)
  559. stack_depth = (max(all_args - floats - len(self.ARGUMENTS_GPR), 0)
  560. + max(floats - len(self.ARGUMENTS_XMM), 0))
  561. assert stack_depth == on_stack
  562. self.subtract_esp_aligned(on_stack - self.stack_max)
  563. # Handle register arguments: first remap the xmm arguments
  564. remap_frame_layout(self.asm, xmm_src_locs, xmm_dst_locs,
  565. X86_64_XMM_SCRATCH_REG)
  566. # Load the singlefloat arguments from main regs or stack to xmm regs
  567. if singlefloats is not None:
  568. for src, dst in singlefloats:
  569. if isinstance(dst, RawEspLoc):
  570. # XXX too much special logic
  571. if isinstance(src, RawEbpLoc):
  572. self.mc.MOV32(X86_64_SCRATCH_REG, src)
  573. self.mc.MOV32(dst, X86_64_SCRATCH_REG)
  574. else:
  575. self.mc.MOV32(dst, src)
  576. continue
  577. if isinstance(src, ImmedLoc):
  578. self.mc.MOV(X86_64_SCRATCH_REG, src)
  579. src = X86_64_SCRATCH_REG
  580. self.mc.MOVD32(dst, src)
  581. # Finally remap the arguments in the main regs
  582. remap_frame_layout(self.asm, src_locs, dst_locs, X86_64_SCRATCH_REG)
  583. def emit_raw_call(self):
  584. assert self.callconv == FFI_DEFAULT_ABI
  585. self.mc.CALL(self.fnloc)
  586. def load_result(self):
  587. if self.restype == 'S':
  588. # singlefloat return: use MOVD to load the target register
  589. # from the lower 32 bits of XMM0
  590. self.mc.MOVD32(self.resloc, xmm0)
  591. else:
  592. CallBuilderX86.load_result(self)
  593. def save_result_value(self, save_edx):
  594. # Temporarily save the result value into [ESP].
  595. if self.ressize == 0: # void return
  596. return
  597. #
  598. if self.restype == FLOAT: # and not 'S'
  599. self.mc.MOVSD_sx(0, xmm0.value)
  600. return
  601. #
  602. if self.restype == 'S':
  603. # singlefloat return: use MOVD to store the lower 32 bits
  604. # of XMM0 into [ESP]
  605. self.mc.MOVD32_sx(0, xmm0.value)
  606. else:
  607. assert self.restype == INT
  608. self.mc.MOV_sr(0, eax.value)
  609. def restore_result_value(self, save_edx):
  610. # Opposite of save_result_value()
  611. if self.ressize == 0: # void return
  612. return
  613. #
  614. if self.restype == FLOAT: # and not 'S'
  615. self.mc.MOVSD_xs(xmm0.value, 0)
  616. return
  617. #
  618. if self.restype == 'S':
  619. self.mc.MOVD32_xs(xmm0.value, 0)
  620. else:
  621. assert self.restype == INT
  622. self.mc.MOV_rs(eax.value, 0)
  623. if IS_X86_32:
  624. CallBuilder = CallBuilder32
  625. if IS_X86_64:
  626. CallBuilder = CallBuilder64