PageRenderTime 53ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/runtime/vm/jit/vasm-arm.cpp

https://gitlab.com/iranjith4/hhvm
C++ | 492 lines | 374 code | 68 blank | 50 comment | 73 complexity | 58dfed1733b6290319e3cffcb545029b MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "hphp/runtime/vm/jit/vasm-emit.h"
  17. #include "hphp/runtime/vm/jit/abi-arm.h"
  18. #include "hphp/runtime/vm/jit/ir-instruction.h"
  19. #include "hphp/runtime/vm/jit/mc-generator.h"
  20. #include "hphp/runtime/vm/jit/print.h"
  21. #include "hphp/runtime/vm/jit/reg-algorithms.h"
  22. #include "hphp/runtime/vm/jit/service-requests.h"
  23. #include "hphp/runtime/vm/jit/smashable-instr-arm.h"
  24. #include "hphp/runtime/vm/jit/timer.h"
  25. #include "hphp/runtime/vm/jit/vasm-gen.h"
  26. #include "hphp/runtime/vm/jit/vasm-instr.h"
  27. #include "hphp/runtime/vm/jit/vasm-internal.h"
  28. #include "hphp/runtime/vm/jit/vasm-lower.h"
  29. #include "hphp/runtime/vm/jit/vasm-print.h"
  30. #include "hphp/runtime/vm/jit/vasm-reg.h"
  31. #include "hphp/runtime/vm/jit/vasm-unit.h"
  32. #include "hphp/runtime/vm/jit/vasm.h"
  33. #include "hphp/vixl/a64/macro-assembler-a64.h"
  34. TRACE_SET_MOD(vasm);
  35. namespace HPHP { namespace jit {
  36. ///////////////////////////////////////////////////////////////////////////////
  37. using namespace arm;
  38. using namespace vixl;
  39. namespace arm { struct ImmFolder; }
  40. namespace {
  41. ///////////////////////////////////////////////////////////////////////////////
  42. const TCA kEndOfTargetChain = reinterpret_cast<TCA>(0xf00ffeeffaaff11f);
  43. vixl::Register W(Vreg32 r) {
  44. PhysReg pr(r.asReg());
  45. return x2a(pr).W();
  46. }
  47. vixl::Register W(Vreg8 r) {
  48. PhysReg pr(r.asReg());
  49. return x2a(pr).W();
  50. }
  51. vixl::Register X(Vreg64 r) {
  52. PhysReg pr(r.asReg());
  53. return x2a(pr);
  54. }
  55. vixl::FPRegister D(Vreg r) {
  56. return x2simd(r);
  57. }
  58. // convert Vptr to MemOperand
  59. vixl::MemOperand M(Vptr p) {
  60. assertx(p.base.isValid() && !p.index.isValid());
  61. return X(p.base)[p.disp];
  62. }
  63. vixl::Condition C(ConditionCode cc) {
  64. return arm::convertCC(cc);
  65. }
  66. ///////////////////////////////////////////////////////////////////////////////
  67. struct Vgen {
  68. explicit Vgen(Venv& env)
  69. : env(env)
  70. , text(env.text)
  71. , codeBlock(env.cb)
  72. , assem(*codeBlock)
  73. , a(&assem)
  74. , current(env.current)
  75. , next(env.next)
  76. , jmps(env.jmps)
  77. , jccs(env.jccs)
  78. , bccs(env.bccs)
  79. , catches(env.catches)
  80. {}
  81. static void patch(Venv& env);
  82. static void pad(CodeBlock& cb) {}
  83. /////////////////////////////////////////////////////////////////////////////
  84. template<class Inst> void emit(Inst& i) {
  85. always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
  86. vinst_names[Vinstr(i).op], size_t(current));
  87. }
  88. // intrinsics
  89. void emit(const copy& i);
  90. void emit(const copy2& i);
  91. void emit(const debugtrap& i) { a->Brk(0); }
  92. void emit(const hostcall& i) { a->HostCall(i.argc); }
  93. void emit(const ldimmq& i);
  94. void emit(const ldimml& i);
  95. void emit(const ldimmb& i);
  96. void emit(const ldimmqs& i) { not_implemented(); }
  97. void emit(const load& i);
  98. void emit(const store& i);
  99. // functions
  100. void emit(const callr& i) { a->Blr(X(i.target)); }
  101. void emit(const ret& i) { a->Ret(); }
  102. void emit(const callphp& i);
  103. // exceptions
  104. void emit(const nothrow& i);
  105. void emit(const syncpoint& i);
  106. void emit(const unwind& i);
  107. // instructions
  108. void emit(const addli& i) { a->Add(W(i.d), W(i.s1), i.s0.l(), SetFlags); }
  109. void emit(const addq& i) { a->Add(X(i.d), X(i.s1), X(i.s0), SetFlags); }
  110. void emit(const addqi& i) { a->Add(X(i.d), X(i.s1), i.s0.l(), SetFlags); }
  111. void emit(const andq& i) { a->And(X(i.d), X(i.s1), X(i.s0) /* flags? */); }
  112. void emit(const andqi& i) { a->And(X(i.d), X(i.s1), i.s0.l() /* flags? */); }
  113. void emit(const sar& i) { a->asrv(X(i.d), X(i.s0), X(i.s1)); }
  114. void emit(const brk& i) { a->Brk(i.code); }
  115. void emit(cbcc i);
  116. void emit(const cmpl& i) { a->Cmp(W(i.s1), W(i.s0)); }
  117. void emit(const cmpli& i) { a->Cmp(W(i.s1), i.s0.l()); }
  118. void emit(const cmpq& i) { a->Cmp(X(i.s1), X(i.s0)); }
  119. void emit(const cmpqi& i) { a->Cmp(X(i.s1), i.s0.l()); }
  120. void emit(const decq& i) { a->Sub(X(i.d), X(i.s), 1LL, SetFlags); }
  121. void emit(const incq& i) { a->Add(X(i.d), X(i.s), 1LL, SetFlags); }
  122. void emit(jcc i);
  123. void emit(jmp i);
  124. void emit(const lea& i);
  125. void emit(const loadl& i) { a->Ldr(W(i.d), M(i.s)); /* 0-extends? */ }
  126. void emit(const loadzbl& i) { a->Ldrb(W(i.d), M(i.s)); }
  127. void emit(const shl& i) { a->lslv(X(i.d), X(i.s0), X(i.s1)); }
  128. void emit(const movzbl& i) { a->Uxtb(W(i.d), W(i.s)); }
  129. void emit(const movzbq& i) { a->Uxtb(W(Vreg32(size_t(i.d))), W(i.s)); }
  130. void emit(const imul& i) { a->Mul(X(i.d), X(i.s0), X(i.s1)); }
  131. void emit(const neg& i) { a->Neg(X(i.d), X(i.s), vixl::SetFlags); }
  132. void emit(const not& i) { a->Mvn(X(i.d), X(i.s)); }
  133. void emit(const orq& i) { a->Orr(X(i.d), X(i.s1), X(i.s0) /* flags? */); }
  134. void emit(const orqi& i) { a->Orr(X(i.d), X(i.s1), i.s0.l() /* flags? */); }
  135. void emit(const storeb& i) { a->Strb(W(i.s), M(i.m)); }
  136. void emit(const storel& i) { a->Str(W(i.s), M(i.m)); }
  137. void emit(const setcc& i) { PhysReg r(i.d.asReg()); a->Cset(X(r), C(i.cc)); }
  138. void emit(const subli& i) { a->Sub(W(i.d), W(i.s1), i.s0.l(), SetFlags); }
  139. void emit(const subq& i) { a->Sub(X(i.d), X(i.s1), X(i.s0), SetFlags); }
  140. void emit(const subqi& i) { a->Sub(X(i.d), X(i.s1), i.s0.l(), SetFlags); }
  141. void emit(tbcc i);
  142. void emit(const testl& i) { a->Tst(W(i.s1), W(i.s0)); }
  143. void emit(const testli& i) { a->Tst(W(i.s1), i.s0.l()); }
  144. void emit(const ud2& i) { a->Brk(1); }
  145. void emit(const xorq& i) { a->Eor(X(i.d), X(i.s1), X(i.s0) /* flags? */); }
  146. void emit(const xorqi& i) { a->Eor(X(i.d), X(i.s1), i.s0.l() /* flags? */); }
  147. void emit(const conjure& i) { always_assert(false); }
  148. void emit(const conjureuse& i) { always_assert(false); }
  149. void emit_nop() { not_implemented(); }
  150. private:
  151. CodeBlock& frozen() { return text.frozen().code; }
  152. private:
  153. Venv& env;
  154. Vtext& text;
  155. CodeBlock* codeBlock;
  156. vixl::MacroAssembler assem;
  157. vixl::MacroAssembler* a;
  158. const Vlabel current;
  159. const Vlabel next;
  160. jit::vector<Venv::LabelPatch>& jmps;
  161. jit::vector<Venv::LabelPatch>& jccs;
  162. jit::vector<Venv::LabelPatch>& bccs;
  163. jit::vector<Venv::LabelPatch>& catches;
  164. };
  165. ///////////////////////////////////////////////////////////////////////////////
  166. void Vgen::patch(Venv& env) {
  167. for (auto& p : env.jmps) {
  168. assertx(env.addrs[p.target]);
  169. smashJmp(p.instr, env.addrs[p.target]);
  170. }
  171. for (auto& p : env.jccs) {
  172. assertx(env.addrs[p.target]);
  173. smashJcc(p.instr, env.addrs[p.target]);
  174. }
  175. for (auto& p : env.bccs) {
  176. assertx(env.addrs[p.target]);
  177. auto link = (Instruction*) p.instr;
  178. link->SetImmPCOffsetTarget(Instruction::Cast(env.addrs[p.target]));
  179. }
  180. }
  181. ///////////////////////////////////////////////////////////////////////////////
  182. void Vgen::emit(const callphp& i) {
  183. emitSmashableCall(*codeBlock, env.meta, i.stub);
  184. }
  185. void Vgen::emit(const copy& i) {
  186. if (i.s.isGP() && i.d.isGP()) {
  187. a->Mov(X(i.d), X(i.s));
  188. } else if (i.s.isSIMD() && i.d.isGP()) {
  189. a->Fmov(X(i.d), D(i.s));
  190. } else if (i.s.isGP() && i.d.isSIMD()) {
  191. a->Fmov(D(i.d), X(i.s));
  192. } else {
  193. assertx(i.s.isSIMD() && i.d.isSIMD());
  194. a->Fmov(D(i.d), D(i.s));
  195. }
  196. }
  197. void Vgen::emit(const copy2& i) {
  198. MovePlan moves;
  199. Reg64 d0 = i.d0, d1 = i.d1, s0 = i.s0, s1 = i.s1;
  200. moves[d0] = s0;
  201. moves[d1] = s1;
  202. auto howTo = doRegMoves(moves, rAsm); // rAsm isn't used.
  203. for (auto& how : howTo) {
  204. if (how.m_kind == MoveInfo::Kind::Move) {
  205. a->Mov(X(how.m_dst), X(how.m_src));
  206. } else {
  207. auto const d = X(how.m_dst);
  208. auto const s = X(how.m_src);
  209. a->Eor(d, d, s);
  210. a->Eor(s, d, s);
  211. a->Eor(d, d, s);
  212. }
  213. }
  214. }
  215. void Vgen::emit(const ldimmq& i) {
  216. union { double dval; int64_t ival; };
  217. ival = i.s.q();
  218. if (i.d.isSIMD()) {
  219. // Assembler::fmov (which you'd think shouldn't be a macro instruction)
  220. // will emit a ldr from a literal pool if IsImmFP64 is false. vixl's
  221. // literal pools don't work well with our codegen pattern, so if that
  222. // would happen, emit the raw bits into a GPR first and then move them
  223. // unmodified into a SIMD.
  224. if (vixl::Assembler::IsImmFP64(dval)) {
  225. a->Fmov(D(i.d), dval);
  226. } else if (ival == 0) { // careful: dval==0.0 is true for -0.0
  227. // 0.0 is not encodeable as an immediate to Fmov, but this works.
  228. a->Fmov(D(i.d), vixl::xzr);
  229. } else {
  230. a->Mov(rAsm, ival); // XXX avoid scratch register somehow.
  231. a->Fmov(D(i.d), rAsm);
  232. }
  233. } else {
  234. a->Mov(X(i.d), ival);
  235. }
  236. }
  237. void emitSimdImmInt(vixl::MacroAssembler* a, int64_t val, Vreg d) {
  238. if (val == 0) {
  239. a->Fmov(D(d), vixl::xzr);
  240. } else {
  241. a->Mov(rAsm, val); // XXX avoid scratch register somehow.
  242. a->Fmov(D(d), rAsm);
  243. }
  244. }
  245. void Vgen::emit(const ldimml& i) {
  246. if (i.d.isSIMD()) {
  247. emitSimdImmInt(a, i.s.q(), i.d);
  248. } else {
  249. Vreg32 d = i.d;
  250. a->Mov(W(d), i.s.l());
  251. }
  252. }
  253. void Vgen::emit(const ldimmb& i) {
  254. if (i.d.isSIMD()) {
  255. emitSimdImmInt(a, i.s.q(), i.d);
  256. } else {
  257. Vreg8 d = i.d;
  258. a->Mov(W(d), i.s.b());
  259. }
  260. }
  261. void Vgen::emit(const load& i) {
  262. if (i.d.isGP()) {
  263. a->Ldr(X(i.d), M(i.s));
  264. } else {
  265. a->Ldr(D(i.d), M(i.s));
  266. }
  267. }
  268. void Vgen::emit(const store& i) {
  269. if (i.s.isGP()) {
  270. a->Str(X(i.s), M(i.d));
  271. } else {
  272. a->Str(D(i.s), M(i.d));
  273. }
  274. }
  275. ///////////////////////////////////////////////////////////////////////////////
  276. void Vgen::emit(const nothrow& i) {
  277. env.meta.catches.emplace_back(a->frontier(), nullptr);
  278. }
  279. void Vgen::emit(const syncpoint& i) {
  280. FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a->frontier(),
  281. i.fix.pcOffset, i.fix.spOffset);
  282. env.meta.fixups.emplace_back(a->frontier(), i.fix);
  283. }
  284. void Vgen::emit(const unwind& i) {
  285. catches.push_back({a->frontier(), i.targets[1]});
  286. emit(jmp{i.targets[0]});
  287. }
  288. ///////////////////////////////////////////////////////////////////////////////
  289. void Vgen::emit(jmp i) {
  290. if (next == i.target) return;
  291. jmps.push_back({a->frontier(), i.target});
  292. // B range is +/- 128MB but this uses BR
  293. emitSmashableJmp(*codeBlock, env.meta, kEndOfTargetChain);
  294. }
  295. void Vgen::emit(jcc i) {
  296. assertx(i.cc != CC_None);
  297. if (i.targets[1] != i.targets[0]) {
  298. if (next == i.targets[1]) {
  299. // the taken branch is the fall-through block, invert the branch.
  300. i = jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}};
  301. }
  302. jccs.push_back({a->frontier(), i.targets[1]});
  303. // B.cond range is +/- 1MB but this uses BR
  304. emitSmashableJcc(*codeBlock, env.meta, kEndOfTargetChain, i.cc);
  305. }
  306. emit(jmp{i.targets[0]});
  307. }
  308. void Vgen::emit(const lea& i) {
  309. assertx(!i.s.index.isValid());
  310. assertx(i.s.scale == 1);
  311. a->Add(X(i.d), X(i.s.base), i.s.disp);
  312. }
  313. void Vgen::emit(cbcc i) {
  314. assertx(i.cc == vixl::ne || i.cc == vixl::eq);
  315. if (i.targets[1] != i.targets[0]) {
  316. if (next == i.targets[1]) {
  317. // the taken branch is the fall-through block, invert the branch.
  318. i = cbcc{i.cc == vixl::ne ? vixl::eq : vixl::ne, i.s,
  319. {i.targets[1], i.targets[0]}};
  320. }
  321. bccs.push_back({a->frontier(), i.targets[1]});
  322. // offset range +/- 1MB
  323. if (i.cc == vixl::ne) {
  324. a->cbnz(X(i.s), 0);
  325. } else {
  326. a->cbz(X(i.s), 0);
  327. }
  328. }
  329. emit(jmp{i.targets[0]});
  330. }
  331. void Vgen::emit(tbcc i) {
  332. assertx(i.cc == vixl::ne || i.cc == vixl::eq);
  333. if (i.targets[1] != i.targets[0]) {
  334. if (next == i.targets[1]) {
  335. // the taken branch is the fall-through block, invert the branch.
  336. i = tbcc{i.cc == vixl::ne ? vixl::eq : vixl::ne, i.bit, i.s,
  337. {i.targets[1], i.targets[0]}};
  338. }
  339. bccs.push_back({a->frontier(), i.targets[1]});
  340. // offset range +/- 32KB
  341. if (i.cc == vixl::ne) {
  342. a->tbnz(X(i.s), i.bit, 0);
  343. } else {
  344. a->tbz(X(i.s), i.bit, 0);
  345. }
  346. }
  347. emit(jmp{i.targets[0]});
  348. }
  349. ///////////////////////////////////////////////////////////////////////////////
  350. /*
  351. * Some vasm opcodes don't have equivalent single instructions on ARM, and the
  352. * equivalent instruction sequences require scratch registers. We have to
  353. * lower these to ARM-suitable vasm opcodes before register allocation.
  354. */
  355. template<typename Inst>
  356. void lower(Inst& i, Vout& v) {
  357. v << i;
  358. }
  359. void lower(cmpbim& i, Vout& v) {
  360. auto scratch = v.makeReg();
  361. v << loadzbl{i.s1, scratch};
  362. v << cmpli{i.s0, scratch, i.sf};
  363. }
  364. void lower(cmplim& i, Vout& v) {
  365. auto scratch = v.makeReg();
  366. v << loadl{i.s1, scratch};
  367. v << cmpli{i.s0, scratch, i.sf};
  368. }
  369. void lower(cmpqm& i, Vout& v) {
  370. auto scratch = v.makeReg();
  371. v << load{i.s1, scratch};
  372. v << cmpq{i.s0, scratch, i.sf};
  373. }
  374. void lower(testbim& i, Vout& v) {
  375. auto scratch = v.makeReg();
  376. v << loadzbl{i.s1, scratch};
  377. v << testli{i.s0, scratch, i.sf};
  378. }
  379. void lowerForARM(Vunit& unit) {
  380. assertx(check(unit));
  381. // block order doesn't matter, but only visit reachable blocks.
  382. auto blocks = sortBlocks(unit);
  383. for (auto b : blocks) {
  384. auto oldCode = std::move(unit.blocks[b].code);
  385. Vout v{unit, b};
  386. for (auto& inst : oldCode) {
  387. v.setOrigin(inst.origin);
  388. switch (inst.op) {
  389. #define O(nm, imm, use, def) \
  390. case Vinstr::nm: \
  391. lower(inst.nm##_, v); \
  392. break;
  393. VASM_OPCODES
  394. #undef O
  395. }
  396. }
  397. }
  398. assertx(check(unit));
  399. printUnit(kVasmARMFoldLevel, "after lowerForARM", unit);
  400. }
  401. ///////////////////////////////////////////////////////////////////////////////
  402. }
  403. void optimizeARM(Vunit& unit, const Abi& abi, bool regalloc) {
  404. Timer timer(Timer::vasm_optimize);
  405. optimizeExits(unit);
  406. simplify(unit);
  407. if (!unit.constToReg.empty()) {
  408. foldImms<arm::ImmFolder>(unit);
  409. }
  410. vlower(unit);
  411. lowerForARM(unit);
  412. if (unit.needsRegAlloc()) {
  413. removeDeadCode(unit);
  414. if (regalloc) allocateRegisters(unit, abi);
  415. }
  416. if (unit.blocks.size() > 1) {
  417. optimizeJmps(unit);
  418. }
  419. }
  420. void emitARM(const Vunit& unit, Vtext& text, CGMeta& fixups, AsmInfo* asmInfo) {
  421. vasm_emit<Vgen>(unit, text, fixups, asmInfo);
  422. }
  423. ///////////////////////////////////////////////////////////////////////////////
  424. }}