/Modules/_ctypes/libffi/src/x86/darwin64.S

http://unladen-swallow.googlecode.com/ · Assembly · 416 lines · 318 code · 34 blank · 64 comment · 0 complexity · 241e0adeeeba9eb067412930a0229857 MD5 · raw file

  1. /* -----------------------------------------------------------------------
  2. darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
  3. Copyright (c) 2008 Red Hat, Inc.
  4. derived from unix64.S
  5. x86-64 Foreign Function Interface for Darwin.
  6. Permission is hereby granted, free of charge, to any person obtaining
  7. a copy of this software and associated documentation files (the
  8. ``Software''), to deal in the Software without restriction, including
  9. without limitation the rights to use, copy, modify, merge, publish,
  10. distribute, sublicense, and/or sell copies of the Software, and to
  11. permit persons to whom the Software is furnished to do so, subject to
  12. the following conditions:
  13. The above copyright notice and this permission notice shall be included
  14. in all copies or substantial portions of the Software.
  15. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
  16. OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  18. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. OTHER DEALINGS IN THE SOFTWARE.
  22. ----------------------------------------------------------------------- */
  23. #ifdef __x86_64__
  24. #define LIBFFI_ASM
  25. #include <fficonfig.h>
  26. #include <ffi.h>
  27. .file "darwin64.S"
  28. .text
  29. /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
  30. void *raddr, void (*fnaddr)(void));
  31. Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
  32. for this function. This has been allocated by ffi_call. We also
  33. deallocate some of the stack that has been alloca'd. */
  34. .align 3
  35. .globl _ffi_call_unix64
  36. _ffi_call_unix64:
  37. LUW0:
  38. movq (%rsp), %r10 /* Load return address. */
  39. leaq (%rdi, %rsi), %rax /* Find local stack base. */
  40. movq %rdx, (%rax) /* Save flags. */
  41. movq %rcx, 8(%rax) /* Save raddr. */
  42. movq %rbp, 16(%rax) /* Save old frame pointer. */
  43. movq %r10, 24(%rax) /* Relocate return address. */
  44. movq %rax, %rbp /* Finalize local stack frame. */
  45. LUW1:
  46. movq %rdi, %r10 /* Save a copy of the register area. */
  47. movq %r8, %r11 /* Save a copy of the target fn. */
  48. movl %r9d, %eax /* Set number of SSE registers. */
  49. /* Load up all argument registers. */
  50. movq (%r10), %rdi
  51. movq 8(%r10), %rsi
  52. movq 16(%r10), %rdx
  53. movq 24(%r10), %rcx
  54. movq 32(%r10), %r8
  55. movq 40(%r10), %r9
  56. testl %eax, %eax
  57. jnz Lload_sse
  58. Lret_from_load_sse:
  59. /* Deallocate the reg arg area. */
  60. leaq 176(%r10), %rsp
  61. /* Call the user function. */
  62. call *%r11
  63. /* Deallocate stack arg area; local stack frame in redzone. */
  64. leaq 24(%rbp), %rsp
  65. movq 0(%rbp), %rcx /* Reload flags. */
  66. movq 8(%rbp), %rdi /* Reload raddr. */
  67. movq 16(%rbp), %rbp /* Reload old frame pointer. */
  68. LUW2:
  69. /* The first byte of the flags contains the FFI_TYPE. */
  70. movzbl %cl, %r10d
  71. leaq Lstore_table(%rip), %r11
  72. movslq (%r11, %r10, 4), %r10
  73. addq %r11, %r10
  74. jmp *%r10
  75. Lstore_table:
  76. .long Lst_void-Lstore_table /* FFI_TYPE_VOID */
  77. .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */
  78. .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */
  79. .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */
  80. .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */
  81. .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */
  82. .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */
  83. .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */
  84. .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */
  85. .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */
  86. .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */
  87. .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */
  88. .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */
  89. .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */
  90. .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */
  91. .text
  92. .align 3
  93. Lst_void:
  94. ret
  95. .align 3
  96. Lst_uint8:
  97. movzbq %al, %rax
  98. movq %rax, (%rdi)
  99. ret
  100. .align 3
  101. Lst_sint8:
  102. movsbq %al, %rax
  103. movq %rax, (%rdi)
  104. ret
  105. .align 3
  106. Lst_uint16:
  107. movzwq %ax, %rax
  108. movq %rax, (%rdi)
  109. .align 3
  110. Lst_sint16:
  111. movswq %ax, %rax
  112. movq %rax, (%rdi)
  113. ret
  114. .align 3
  115. Lst_uint32:
  116. movl %eax, %eax
  117. movq %rax, (%rdi)
  118. .align 3
  119. Lst_sint32:
  120. cltq
  121. movq %rax, (%rdi)
  122. ret
  123. .align 3
  124. Lst_int64:
  125. movq %rax, (%rdi)
  126. ret
  127. .align 3
  128. Lst_float:
  129. movss %xmm0, (%rdi)
  130. ret
  131. .align 3
  132. Lst_double:
  133. movsd %xmm0, (%rdi)
  134. ret
  135. Lst_ldouble:
  136. fstpt (%rdi)
  137. ret
  138. .align 3
  139. Lst_struct:
  140. leaq -20(%rsp), %rsi /* Scratch area in redzone. */
  141. /* We have to locate the values now, and since we don't want to
  142. write too much data into the user's return value, we spill the
  143. value to a 16 byte scratch area first. Bits 8, 9, and 10
  144. control where the values are located. Only one of the three
  145. bits will be set; see ffi_prep_cif_machdep for the pattern. */
  146. movd %xmm0, %r10
  147. movd %xmm1, %r11
  148. testl $0x100, %ecx
  149. cmovnz %rax, %rdx
  150. cmovnz %r10, %rax
  151. testl $0x200, %ecx
  152. cmovnz %r10, %rdx
  153. testl $0x400, %ecx
  154. cmovnz %r10, %rax
  155. cmovnz %r11, %rdx
  156. movq %rax, (%rsi)
  157. movq %rdx, 8(%rsi)
  158. /* Bits 12-31 contain the true size of the structure. Copy from
  159. the scratch area to the true destination. */
  160. shrl $12, %ecx
  161. rep movsb
  162. ret
  163. /* Many times we can avoid loading any SSE registers at all.
  164. It's not worth an indirect jump to load the exact set of
  165. SSE registers needed; zero or all is a good compromise. */
  166. .align 3
  167. LUW3:
  168. Lload_sse:
  169. movdqa 48(%r10), %xmm0
  170. movdqa 64(%r10), %xmm1
  171. movdqa 80(%r10), %xmm2
  172. movdqa 96(%r10), %xmm3
  173. movdqa 112(%r10), %xmm4
  174. movdqa 128(%r10), %xmm5
  175. movdqa 144(%r10), %xmm6
  176. movdqa 160(%r10), %xmm7
  177. jmp Lret_from_load_sse
  178. LUW4:
  179. .align 3
  180. .globl _ffi_closure_unix64
  181. _ffi_closure_unix64:
  182. LUW5:
  183. /* The carry flag is set by the trampoline iff SSE registers
  184. are used. Don't clobber it before the branch instruction. */
  185. leaq -200(%rsp), %rsp
  186. LUW6:
  187. movq %rdi, (%rsp)
  188. movq %rsi, 8(%rsp)
  189. movq %rdx, 16(%rsp)
  190. movq %rcx, 24(%rsp)
  191. movq %r8, 32(%rsp)
  192. movq %r9, 40(%rsp)
  193. jc Lsave_sse
  194. Lret_from_save_sse:
  195. movq %r10, %rdi
  196. leaq 176(%rsp), %rsi
  197. movq %rsp, %rdx
  198. leaq 208(%rsp), %rcx
  199. call _ffi_closure_unix64_inner
  200. /* Deallocate stack frame early; return value is now in redzone. */
  201. addq $200, %rsp
  202. LUW7:
  203. /* The first byte of the return value contains the FFI_TYPE. */
  204. movzbl %al, %r10d
  205. leaq Lload_table(%rip), %r11
  206. movslq (%r11, %r10, 4), %r10
  207. addq %r11, %r10
  208. jmp *%r10
  209. Lload_table:
  210. .long Lld_void-Lload_table /* FFI_TYPE_VOID */
  211. .long Lld_int32-Lload_table /* FFI_TYPE_INT */
  212. .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */
  213. .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */
  214. .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */
  215. .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */
  216. .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */
  217. .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */
  218. .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */
  219. .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */
  220. .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */
  221. .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */
  222. .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */
  223. .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */
  224. .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */
  225. .text
  226. .align 3
  227. Lld_void:
  228. ret
  229. .align 3
  230. Lld_int8:
  231. movzbl -24(%rsp), %eax
  232. ret
  233. .align 3
  234. Lld_int16:
  235. movzwl -24(%rsp), %eax
  236. ret
  237. .align 3
  238. Lld_int32:
  239. movl -24(%rsp), %eax
  240. ret
  241. .align 3
  242. Lld_int64:
  243. movq -24(%rsp), %rax
  244. ret
  245. .align 3
  246. Lld_float:
  247. movss -24(%rsp), %xmm0
  248. ret
  249. .align 3
  250. Lld_double:
  251. movsd -24(%rsp), %xmm0
  252. ret
  253. .align 3
  254. Lld_ldouble:
  255. fldt -24(%rsp)
  256. ret
  257. .align 3
  258. Lld_struct:
  259. /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
  260. %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
  261. both rdx and xmm1 with the second word. For the remaining,
  262. bit 8 set means xmm0 gets the second word, and bit 9 means
  263. that rax gets the second word. */
  264. movq -24(%rsp), %rcx
  265. movq -16(%rsp), %rdx
  266. movq -16(%rsp), %xmm1
  267. testl $0x100, %eax
  268. cmovnz %rdx, %rcx
  269. movd %rcx, %xmm0
  270. testl $0x200, %eax
  271. movq -24(%rsp), %rax
  272. cmovnz %rdx, %rax
  273. ret
  274. /* See the comment above Lload_sse; the same logic applies here. */
  275. .align 3
  276. LUW8:
  277. Lsave_sse:
  278. movdqa %xmm0, 48(%rsp)
  279. movdqa %xmm1, 64(%rsp)
  280. movdqa %xmm2, 80(%rsp)
  281. movdqa %xmm3, 96(%rsp)
  282. movdqa %xmm4, 112(%rsp)
  283. movdqa %xmm5, 128(%rsp)
  284. movdqa %xmm6, 144(%rsp)
  285. movdqa %xmm7, 160(%rsp)
  286. jmp Lret_from_save_sse
  287. LUW9:
  288. .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
  289. EH_frame1:
  290. .set L$set$0,LECIE1-LSCIE1 /* CIE Length */
  291. .long L$set$0
  292. LSCIE1:
  293. .long 0x0 /* CIE Identifier Tag */
  294. .byte 0x1 /* CIE Version */
  295. .ascii "zR\0" /* CIE Augmentation */
  296. .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */
  297. .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */
  298. .byte 0x10 /* CIE RA Column */
  299. .byte 0x1 /* uleb128 0x1; Augmentation size */
  300. .byte 0x10 /* FDE Encoding (pcrel sdata4) */
  301. .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
  302. .byte 0x7 /* uleb128 0x7 */
  303. .byte 0x8 /* uleb128 0x8 */
  304. .byte 0x90 /* DW_CFA_offset, column 0x10 */
  305. .byte 0x1
  306. .align 3
  307. LECIE1:
  308. .globl _ffi_call_unix64.eh
  309. _ffi_call_unix64.eh:
  310. LSFDE1:
  311. .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */
  312. .long L$set$1
  313. LASFDE1:
  314. .long LASFDE1-EH_frame1 /* FDE CIE offset */
  315. .quad LUW0-. /* FDE initial location */
  316. .set L$set$2,LUW4-LUW0 /* FDE address range */
  317. .quad L$set$2
  318. .byte 0x0 /* Augmentation size */
  319. .byte 0x4 /* DW_CFA_advance_loc4 */
  320. .set L$set$3,LUW1-LUW0
  321. .long L$set$3
  322. /* New stack frame based off rbp. This is a itty bit of unwind
  323. trickery in that the CFA *has* changed. There is no easy way
  324. to describe it correctly on entry to the function. Fortunately,
  325. it doesn't matter too much since at all points we can correctly
  326. unwind back to ffi_call. Note that the location to which we
  327. moved the return address is (the new) CFA-8, so from the
  328. perspective of the unwind info, it hasn't moved. */
  329. .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
  330. .byte 0x6
  331. .byte 0x20
  332. .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
  333. .byte 0x2
  334. .byte 0xa /* DW_CFA_remember_state */
  335. .byte 0x4 /* DW_CFA_advance_loc4 */
  336. .set L$set$4,LUW2-LUW1
  337. .long L$set$4
  338. .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
  339. .byte 0x7
  340. .byte 0x8
  341. .byte 0xc0+6 /* DW_CFA_restore, %rbp */
  342. .byte 0x4 /* DW_CFA_advance_loc4 */
  343. .set L$set$5,LUW3-LUW2
  344. .long L$set$5
  345. .byte 0xb /* DW_CFA_restore_state */
  346. .align 3
  347. LEFDE1:
  348. .globl _ffi_closure_unix64.eh
  349. _ffi_closure_unix64.eh:
  350. LSFDE3:
  351. .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */
  352. .long L$set$6
  353. LASFDE3:
  354. .long LASFDE3-EH_frame1 /* FDE CIE offset */
  355. .quad LUW5-. /* FDE initial location */
  356. .set L$set$7,LUW9-LUW5 /* FDE address range */
  357. .quad L$set$7
  358. .byte 0x0 /* Augmentation size */
  359. .byte 0x4 /* DW_CFA_advance_loc4 */
  360. .set L$set$8,LUW6-LUW5
  361. .long L$set$8
  362. .byte 0xe /* DW_CFA_def_cfa_offset */
  363. .byte 208,1 /* uleb128 208 */
  364. .byte 0xa /* DW_CFA_remember_state */
  365. .byte 0x4 /* DW_CFA_advance_loc4 */
  366. .set L$set$9,LUW7-LUW6
  367. .long L$set$9
  368. .byte 0xe /* DW_CFA_def_cfa_offset */
  369. .byte 0x8
  370. .byte 0x4 /* DW_CFA_advance_loc4 */
  371. .set L$set$10,LUW8-LUW7
  372. .long L$set$10
  373. .byte 0xb /* DW_CFA_restore_state */
  374. .align 3
  375. LEFDE3:
  376. .subsections_via_symbols
  377. #endif /* __x86_64__ */