PageRenderTime 41ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/backend/arm/codebuilder.py

https://bitbucket.org/pypy/pypy/
Python | 465 lines | 387 code | 46 blank | 32 comment | 24 complexity | db4686d8c92e4fd0969ca8321dda347f MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.backend.arm import conditions as cond
  2. from rpython.jit.backend.arm import registers as reg
  3. from rpython.jit.backend.arm.arch import WORD, PC_OFFSET
  4. from rpython.jit.backend.arm.instruction_builder import define_instructions
  5. from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
  6. from rpython.rlib.objectmodel import we_are_translated
  7. from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
  8. from rpython.tool.udir import udir
  9. from rpython.jit.backend.detect_cpu import autodetect
  10. clear_cache = rffi.llexternal(
  11. "__clear_cache",
  12. [llmemory.Address, llmemory.Address],
  13. lltype.Void,
  14. _nowrapper=True,
  15. sandboxsafe=True)
  16. class AbstractARMBuilder(object):
  17. def __init__(self, arch_version=7):
  18. self.arch_version = arch_version
  19. def NOP(self):
  20. self.MOV_rr(0, 0)
  21. def PUSH(self, regs, cond=cond.AL):
  22. assert reg.sp.value not in regs
  23. instr = 0
  24. if len(regs) == 1:
  25. instr = cond << 28 | 0x52D0004 | (regs[0] & 0xF) << 12
  26. else:
  27. instr = self._encode_reg_list(cond << 28 | 0x92D << 16, regs)
  28. self.write32(instr)
  29. def STM(self, base, regs, write_back=False, cond=cond.AL):
  30. assert len(regs) > 0
  31. instr = (cond << 28
  32. | 0x11 << 23
  33. | (1 if write_back else 0) << 21
  34. | (base & 0xF) << 16)
  35. instr = self._encode_reg_list(instr, regs)
  36. self.write32(instr)
  37. def LDM(self, base, regs, write_back=False, cond=cond.AL):
  38. assert len(regs) > 0
  39. instr = (cond << 28
  40. | 0x11 << 23
  41. | (1 if write_back else 0) << 21
  42. | 1 << 20
  43. | (base & 0xF) << 16)
  44. instr = self._encode_reg_list(instr, regs)
  45. self.write32(instr)
  46. def VSTM(self, base, regs, write_back=False, cond=cond.AL):
  47. # encoding T1
  48. P = 0
  49. U = 1
  50. nregs = len(regs)
  51. assert nregs > 0 and nregs <= 16
  52. freg = regs[0]
  53. D = (freg & 0x10) >> 4
  54. Dd = (freg & 0xF)
  55. nregs *= 2
  56. instr = (cond << 28
  57. | 3 << 26
  58. | P << 24
  59. | U << 23
  60. | D << 22
  61. | (1 if write_back else 0) << 21
  62. | (base & 0xF) << 16
  63. | Dd << 12
  64. | 0xB << 8
  65. | nregs)
  66. self.write32(instr)
  67. def VLDM(self, base, regs, write_back=False, cond=cond.AL):
  68. # encoding T1
  69. P = 0
  70. U = 1
  71. nregs = len(regs)
  72. assert nregs > 0 and nregs <= 16
  73. freg = regs[0]
  74. D = (freg & 0x10) >> 4
  75. Dd = (freg & 0xF)
  76. nregs *= 2
  77. instr = (cond << 28
  78. | 3 << 26
  79. | P << 24
  80. | U << 23
  81. | D << 22
  82. | (1 if write_back else 0) << 21
  83. | 1 << 20
  84. | (base & 0xF) << 16
  85. | Dd << 12
  86. | 0xB << 8
  87. | nregs)
  88. self.write32(instr)
  89. def VPUSH(self, regs, cond=cond.AL):
  90. nregs = len(regs)
  91. assert nregs > 0 and nregs <= 16
  92. freg = regs[0]
  93. D = (freg & 0x10) >> 4
  94. Dd = (freg & 0xF)
  95. nregs *= 2
  96. instr = (cond << 28
  97. | 0xD2D << 16
  98. | D << 22
  99. | Dd << 12
  100. | 0xB << 8
  101. | nregs)
  102. self.write32(instr)
  103. def VPOP(self, regs, cond=cond.AL):
  104. nregs = len(regs)
  105. assert nregs > 0 and nregs <= 16
  106. freg = regs[0]
  107. D = (freg & 0x10) >> 4
  108. Dd = (freg & 0xF)
  109. nregs *= 2
  110. instr = (cond << 28
  111. | 0xCBD << 16
  112. | D << 22
  113. | Dd << 12
  114. | 0xB << 8
  115. | nregs)
  116. self.write32(instr)
  117. def VMOV_rc(self, rt, rt2, dm, cond=cond.AL):
  118. """This instruction copies two words from two ARM core registers into a
  119. doubleword extension register, or from a doubleword extension register
  120. to two ARM core registers.
  121. """
  122. op = 1
  123. instr = (cond << 28
  124. | 0xC << 24
  125. | 0x4 << 20
  126. | op << 20
  127. | (rt2 & 0xF) << 16
  128. | (rt & 0xF) << 12
  129. | 0xB << 8
  130. | 0x1 << 4
  131. | (dm & 0xF))
  132. self.write32(instr)
  133. # VMOV<c> <Dm>, <Rt>, <Rt2>
  134. def VMOV_cr(self, dm, rt, rt2, cond=cond.AL):
  135. """This instruction copies two words from two ARM core registers into a
  136. doubleword extension register, or from a doubleword extension register
  137. to two ARM core registers.
  138. """
  139. op = 0
  140. instr = (cond << 28
  141. | 0xC << 24
  142. | 0x4 << 20
  143. | op << 20
  144. | (rt2 & 0xF) << 16
  145. | (rt & 0xF) << 12
  146. | 0xB << 8
  147. | 0x1 << 4
  148. | (dm & 0xF))
  149. self.write32(instr)
  150. def VMOV_sc(self, dest, src):
  151. """move a single precision vfp register[src] to a core reg[dest]"""
  152. self._VMOV_32bit(src, dest, to_arm_register=1)
  153. def VMOV_cs(self, dest, src):
  154. """move a core register[src] to a single precision vfp
  155. register[dest]"""
  156. self._VMOV_32bit(dest, src, to_arm_register=0)
  157. def _VMOV_32bit(self, float_reg, core_reg, to_arm_register, cond=cond.AL):
  158. """This instruction transfers the contents of a single-precision VFP
  159. register to an ARM core register, or the contents of an ARM core
  160. register to a single-precision VFP register.
  161. """
  162. instr = (cond << 28
  163. | 0xE << 24
  164. | to_arm_register << 20
  165. | ((float_reg >> 1) & 0xF) << 16
  166. | core_reg << 12
  167. | 0xA << 8
  168. | (float_reg & 0x1) << 7
  169. | 1 << 4)
  170. self.write32(instr)
  171. def VMOV_cc(self, dd, dm, cond=cond.AL):
  172. sz = 1 # for 64-bit mode
  173. instr = (cond << 28
  174. | 0xEB << 20
  175. | (dd & 0xF) << 12
  176. | 0x5 << 9
  177. | (sz & 0x1) << 8
  178. | 0x1 << 6
  179. | (dm & 0xF))
  180. self.write32(instr)
  181. def VCVT_float_to_int(self, target, source, cond=cond.AL):
  182. opc2 = 0x5
  183. sz = 1
  184. self._VCVT(target, source, cond, opc2, sz)
  185. def VCVT_int_to_float(self, target, source, cond=cond.AL):
  186. self._VCVT(target, source, cond, 0, 1)
  187. def _VCVT(self, target, source, cond, opc2, sz):
  188. # A8.6.295
  189. to_integer = (opc2 >> 2) & 1
  190. if to_integer:
  191. D = target & 1
  192. target >>= 1
  193. M = (source >> 4) & 1
  194. else:
  195. M = source & 1
  196. source >>= 1
  197. D = (target >> 4) & 1
  198. op = 1
  199. instr = (cond << 28
  200. | 0xEB8 << 16
  201. | D << 22
  202. | opc2 << 16
  203. | (target & 0xF) << 12
  204. | 0x5 << 9
  205. | sz << 8
  206. | op << 7
  207. | 1 << 6
  208. | M << 5
  209. | (source & 0xF))
  210. self.write32(instr)
  211. def _VCVT_single_double(self, target, source, cond, sz):
  212. # double_to_single = (sz == '1');
  213. D = target & 1 if sz else (target >> 4) & 1
  214. M = (source >> 4) & 1 if sz else source & 1
  215. instr = (cond << 28
  216. | 0xEB7 << 16
  217. | 0xAC << 4
  218. | D << 22
  219. | (target & 0xF) << 12
  220. | sz << 8
  221. | M << 5
  222. | (source & 0xF))
  223. self.write32(instr)
  224. def VCVT_f64_f32(self, target, source, cond=cond.AL):
  225. self._VCVT_single_double(target, source, cond, 1)
  226. def VCVT_f32_f64(self, target, source, cond=cond.AL):
  227. self._VCVT_single_double(target, source, cond, 0)
  228. def POP(self, regs, cond=cond.AL):
  229. instr = self._encode_reg_list(cond << 28 | 0x8BD << 16, regs)
  230. self.write32(instr)
  231. def BKPT(self):
  232. """Unconditional breakpoint"""
  233. self.write32(cond.AL << 28 | 0x1200070)
  234. # corresponds to the instruction vmrs APSR_nzcv, fpscr
  235. def VMRS(self, cond=cond.AL):
  236. self.write32(cond << 28 | 0xEF1FA10)
  237. def B(self, target, c=cond.AL):
  238. self.gen_load_int(reg.ip.value, target, cond=c)
  239. self.BX(reg.ip.value, c=c)
  240. def BX(self, reg, c=cond.AL):
  241. self.write32(c << 28 | 0x12FFF1 << 4 | (reg & 0xF))
  242. def B_offs(self, target_ofs, c=cond.AL):
  243. pos = self.currpos()
  244. target_ofs = target_ofs - (pos + PC_OFFSET)
  245. assert target_ofs & 0x3 == 0
  246. self.write32(c << 28 | 0xA << 24 | (target_ofs >> 2) & 0xFFFFFF)
  247. def BL(self, addr, c=cond.AL):
  248. target = rffi.cast(rffi.INT, addr)
  249. self.gen_load_int(reg.ip.value, target, cond=c)
  250. self.BLX(reg.ip.value, c)
  251. def BLX(self, reg, c=cond.AL):
  252. self.write32(c << 28 | 0x12FFF3 << 4 | (reg & 0xF))
  253. def MOVT_ri(self, rd, imm16, c=cond.AL):
  254. """Move Top writes an immediate value to the top halfword of the
  255. destination register. It does not affect the contents of the bottom
  256. halfword."""
  257. self.write32(c << 28
  258. | 0x3 << 24
  259. | (1 << 22)
  260. | ((imm16 >> 12) & 0xF) << 16
  261. | (rd & 0xF) << 12
  262. | imm16 & 0xFFF)
  263. def MOVW_ri(self, rd, imm16, c=cond.AL):
  264. """Encoding A2 of MOV, that allow to load a 16 bit constant"""
  265. self.write32(c << 28
  266. | 0x3 << 24
  267. | ((imm16 >> 12) & 0xF) << 16
  268. | (rd & 0xF) << 12
  269. | imm16 & 0xFFF)
  270. def SXTB_rr(self, rd, rm, c=cond.AL):
  271. self.write32(c << 28
  272. | 0x06AF0070
  273. | (rd & 0xF) << 12
  274. | (rm & 0xF))
  275. def SXTH_rr(self, rd, rm, c=cond.AL):
  276. self.write32(c << 28
  277. | 0x06BF0070
  278. | (rd & 0xF) << 12
  279. | (rm & 0xF))
  280. def LDREX(self, rt, rn, c=cond.AL):
  281. self.write32(c << 28
  282. | 0x01900f9f
  283. | (rt & 0xF) << 12
  284. | (rn & 0xF) << 16)
  285. def STREX(self, rd, rt, rn, c=cond.AL):
  286. """rd must not be the same register as rt or rn"""
  287. self.write32(c << 28
  288. | 0x01800f90
  289. | (rt & 0xF)
  290. | (rd & 0xF) << 12
  291. | (rn & 0xF) << 16)
  292. def DMB(self):
  293. # ARMv7 only. I guess ARMv6 CPUs cannot be used in symmetric
  294. # multi-processing at all? That would make this instruction unneeded.
  295. # note: 'cond' is only permitted on Thumb here, but don't
  296. # write literally 0xf57ff05f, because it's larger than 31 bits
  297. c = cond.AL
  298. self.write32(c << 28
  299. | 0x157ff05f)
  300. FMDRR = VMOV_cr # uh, there are synonyms?
  301. FMRRD = VMOV_rc
  302. def _encode_reg_list(self, instr, regs):
  303. for reg in regs:
  304. instr |= 0x1 << reg
  305. return instr
  306. def _encode_imm(self, imm):
  307. u = 1
  308. if imm < 0:
  309. u = 0
  310. imm = -imm
  311. return u, imm
  312. def write32(self, word):
  313. self.writechar(chr(word & 0xFF))
  314. self.writechar(chr((word >> 8) & 0xFF))
  315. self.writechar(chr((word >> 16) & 0xFF))
  316. self.writechar(chr((word >> 24) & 0xFF))
  317. def writechar(self, char):
  318. raise NotImplementedError
  319. def currpos(self):
  320. raise NotImplementedError
  321. def gen_load_int(self, r, value, cond=cond.AL):
  322. if self.arch_version < 7:
  323. self.gen_load_int_v6(r, value, cond)
  324. else:
  325. self.gen_load_int_v7(r, value, cond)
  326. def gen_load_int_v7(self, r, value, cond=cond.AL):
  327. """r is the register number, value is the value to be loaded to the
  328. register"""
  329. bottom = value & 0xFFFF
  330. top = value >> 16
  331. self.MOVW_ri(r, bottom, cond)
  332. if top:
  333. self.MOVT_ri(r, top, cond)
  334. def gen_load_int_v6(self, r, value, cond=cond.AL):
  335. from rpython.jit.backend.arm.conditions import AL
  336. if cond != AL or 0 <= value <= 0xFFFF:
  337. self._load_by_shifting(r, value, cond)
  338. else:
  339. self.LDR_ri(r, reg.pc.value)
  340. self.MOV_rr(reg.pc.value, reg.pc.value)
  341. self.write32(value)
  342. def get_max_size_of_gen_load_int(self):
  343. return 4 if self.arch_version < 7 else 2
  344. ofs_shift = zip(range(8, 25, 8), range(12, 0, -4))
  345. def _load_by_shifting(self, r, value, c=cond.AL):
  346. # to be sure it is only called for the correct cases
  347. assert c != cond.AL or 0 <= value <= 0xFFFF
  348. self.MOV_ri(r, (value & 0xFF), cond=c)
  349. for offset, shift in self.ofs_shift:
  350. b = (value >> offset) & 0xFF
  351. if b == 0:
  352. continue
  353. t = b | (shift << 8)
  354. self.ORR_ri(r, r, imm=t, cond=c)
  355. class OverwritingBuilder(AbstractARMBuilder):
  356. def __init__(self, cb, start, size):
  357. AbstractARMBuilder.__init__(self, cb.arch_version)
  358. self.cb = cb
  359. self.index = start
  360. self.end = start + size
  361. def currpos(self):
  362. return self.index
  363. def writechar(self, char):
  364. assert self.index <= self.end
  365. self.cb.overwrite(self.index, char)
  366. self.index += 1
  367. class InstrBuilder(BlockBuilderMixin, AbstractARMBuilder):
  368. def __init__(self, arch_version=7):
  369. AbstractARMBuilder.__init__(self, arch_version)
  370. self.init_block_builder()
  371. #
  372. # ResOperation --> offset in the assembly.
  373. # ops_offset[None] represents the beginning of the code after the last op
  374. # (i.e., the tail of the loop)
  375. self.ops_offset = {}
  376. def mark_op(self, op):
  377. pos = self.get_relative_pos()
  378. self.ops_offset[op] = pos
  379. def _dump_trace(self, addr, name, formatter=-1):
  380. if not we_are_translated():
  381. if formatter != -1:
  382. name = name % formatter
  383. dir = udir.ensure('asm', dir=True)
  384. f = dir.join(name).open('wb')
  385. data = rffi.cast(rffi.CCHARP, addr)
  386. for i in range(self.currpos()):
  387. f.write(data[i])
  388. f.close()
  389. def clear_cache(self, addr):
  390. if we_are_translated():
  391. startaddr = rffi.cast(llmemory.Address, addr)
  392. endaddr = rffi.cast(llmemory.Address,
  393. addr + self.get_relative_pos())
  394. clear_cache(startaddr, endaddr)
  395. def copy_to_raw_memory(self, addr):
  396. self._copy_to_raw_memory(addr)
  397. self.clear_cache(addr)
  398. self._dump(addr, "jit-backend-dump", 'arm')
  399. def currpos(self):
  400. return self.get_relative_pos()
  401. define_instructions(AbstractARMBuilder)