PageRenderTime 68ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/arm/cgcpu.pas

https://github.com/slibre/freepascal
Pascal | 4152 lines | 3511 code | 343 blank | 298 comment | 422 complexity | 4ffa7e926bb091829f38cfbc8d1d4d75 MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-2.1, LGPL-3.0
  1. {
  2. Copyright (c) 2003 by Florian Klaempfl
  3. Member of the Free Pascal development team
  4. This unit implements the code generator for the ARM
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit cgcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. globtype,symtype,symdef,
  23. cgbase,cgutils,cgobj,
  24. aasmbase,aasmcpu,aasmtai,aasmdata,
  25. parabase,
  26. cpubase,cpuinfo,cg64f32,rgcpu;
  27. type
  28. tcgarm = class(tcg)
  29. { true, if the next arithmetic operation should modify the flags }
  30. cgsetflags : boolean;
  31. procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
  32. procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
  33. procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
  34. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  35. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  36. procedure a_call_ref(list : TAsmList;ref: treference);override;
  37. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  38. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  39. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  40. size: tcgsize; a: tcgint; src, dst: tregister); override;
  41. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  42. size: tcgsize; src1, src2, dst: tregister); override;
  43. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  44. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  45. { move instructions }
  46. procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
  47. procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
  48. function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  49. function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  50. { fpu move instructions }
  51. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  52. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  53. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  54. procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
  55. { comparison operations }
  56. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  57. l : tasmlabel);override;
  58. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  59. procedure a_jmp_name(list : TAsmList;const s : string); override;
  60. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  61. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  62. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  63. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  64. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  65. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  66. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  67. procedure g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);override;
  68. procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  69. procedure g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  70. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override;
  71. procedure g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);override;
  72. procedure g_save_registers(list : TAsmList);override;
  73. procedure g_restore_registers(list : TAsmList);override;
  74. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  75. procedure fixref(list : TAsmList;var ref : treference);
  76. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
  77. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  78. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
  79. procedure g_stackpointer_alloc(list : TAsmList;size : longint);override;
  80. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  81. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  82. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  83. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  84. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  85. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
  86. { Transform unsupported methods into Internal errors }
  87. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
  88. { try to generate optimized 32 Bit multiplication, returns true if successful generated }
  89. function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  90. { clear out potential overflow bits from 8 or 16 bit operations }
  91. { the upper 24/16 bits of a register after an operation }
  92. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  93. function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  94. end;
  95. tarmcgarm = class(tcgarm)
  96. procedure init_register_allocators;override;
  97. procedure done_register_allocators;override;
  98. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  99. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  100. end;
  101. tcg64farm = class(tcg64f32)
  102. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  103. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  104. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  105. procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  106. procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  107. procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  108. procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  109. procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  110. end;
  111. Tthumb2cgarm = class(tcgarm)
  112. procedure init_register_allocators;override;
  113. procedure done_register_allocators;override;
  114. procedure a_call_reg(list : TAsmList;reg: tregister);override;
  115. procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
  116. procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
  117. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  118. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
  119. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
  120. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  121. procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
  122. function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
  123. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  124. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  125. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  126. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  127. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
  128. end;
  129. tthumb2cg64farm = class(tcg64farm)
  130. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  131. end;
  132. const
  133. OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  134. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
  135. winstackpagesize = 4096;
  136. function get_fpu_postfix(def : tdef) : toppostfix;
  137. procedure create_codegen;
  138. implementation
  139. uses
  140. globals,verbose,systems,cutils,
  141. aopt,aoptcpu,
  142. fmodule,
  143. symconst,symsym,symtable,
  144. tgobj,
  145. procinfo,cpupi,
  146. paramgr;
  147. function get_fpu_postfix(def : tdef) : toppostfix;
  148. begin
  149. if def.typ=floatdef then
  150. begin
  151. case tfloatdef(def).floattype of
  152. s32real:
  153. result:=PF_S;
  154. s64real:
  155. result:=PF_D;
  156. s80real:
  157. result:=PF_E;
  158. else
  159. internalerror(200401272);
  160. end;
  161. end
  162. else
  163. internalerror(200401271);
  164. end;
  165. procedure tarmcgarm.init_register_allocators;
  166. begin
  167. inherited init_register_allocators;
  168. { currently, we always save R14, so we can use it }
  169. if (target_info.system<>system_arm_darwin) then
  170. begin
  171. if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
  172. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  173. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  174. RS_R9,RS_R10,RS_R11,RS_R14],first_int_imreg,[])
  175. else
  176. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  177. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  178. RS_R9,RS_R10,RS_R14],first_int_imreg,[])
  179. end
  180. else
  181. { r7 is not available on Darwin, it's used as frame pointer (always,
  182. for backtrace support -- also in gcc/clang -> R11 can be used).
  183. r9 is volatile }
  184. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  185. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8,
  186. RS_R10,RS_R11,RS_R14],first_int_imreg,[]);
  187. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  188. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  189. { The register allocator currently cannot deal with multiple
  190. non-overlapping subregs per register, so we can only use
  191. half the single precision registers for now (as sub registers of the
  192. double precision ones). }
  193. if current_settings.fputype=fpu_vfpv3 then
  194. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  195. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  196. RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
  197. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  198. ],first_mm_imreg,[])
  199. else
  200. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  201. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15],first_mm_imreg,[]);
  202. end;
  203. procedure tarmcgarm.done_register_allocators;
  204. begin
  205. rg[R_INTREGISTER].free;
  206. rg[R_FPUREGISTER].free;
  207. rg[R_MMREGISTER].free;
  208. inherited done_register_allocators;
  209. end;
  210. procedure tarmcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  211. var
  212. imm_shift : byte;
  213. l : tasmlabel;
  214. hr : treference;
  215. imm1, imm2: DWord;
  216. begin
  217. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  218. internalerror(2002090902);
  219. if is_shifter_const(a,imm_shift) then
  220. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  221. else if is_shifter_const(not(a),imm_shift) then
  222. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  223. { loading of constants with mov and orr }
  224. else if (split_into_shifter_const(a,imm1, imm2)) then
  225. begin
  226. list.concat(taicpu.op_reg_const(A_MOV,reg, imm1));
  227. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg, imm2));
  228. end
  229. { loading of constants with mvn and bic }
  230. else if (split_into_shifter_const(not(a), imm1, imm2)) then
  231. begin
  232. list.concat(taicpu.op_reg_const(A_MVN,reg, imm1));
  233. list.concat(taicpu.op_reg_reg_const(A_BIC,reg,reg, imm2));
  234. end
  235. else
  236. begin
  237. reference_reset(hr,4);
  238. current_asmdata.getjumplabel(l);
  239. cg.a_label(current_procinfo.aktlocaldata,l);
  240. hr.symboldata:=current_procinfo.aktlocaldata.last;
  241. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  242. hr.symbol:=l;
  243. hr.base:=NR_PC;
  244. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  245. end;
  246. end;
  247. procedure tarmcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  248. var
  249. oppostfix:toppostfix;
  250. usedtmpref: treference;
  251. tmpreg,tmpreg2 : tregister;
  252. so : tshifterop;
  253. dir : integer;
  254. begin
  255. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  256. FromSize := ToSize;
  257. case FromSize of
  258. { signed integer registers }
  259. OS_8:
  260. oppostfix:=PF_B;
  261. OS_S8:
  262. oppostfix:=PF_SB;
  263. OS_16:
  264. oppostfix:=PF_H;
  265. OS_S16:
  266. oppostfix:=PF_SH;
  267. OS_32,
  268. OS_S32:
  269. oppostfix:=PF_None;
  270. else
  271. InternalError(200308297);
  272. end;
  273. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  274. begin
  275. if target_info.endian=endian_big then
  276. dir:=-1
  277. else
  278. dir:=1;
  279. case FromSize of
  280. OS_16,OS_S16:
  281. begin
  282. { only complicated references need an extra loadaddr }
  283. if assigned(ref.symbol) or
  284. (ref.index<>NR_NO) or
  285. (ref.offset<-4095) or
  286. (ref.offset>4094) or
  287. { sometimes the compiler reused registers }
  288. (reg=ref.index) or
  289. (reg=ref.base) then
  290. begin
  291. tmpreg2:=getintregister(list,OS_INT);
  292. a_loadaddr_ref_reg(list,ref,tmpreg2);
  293. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  294. end
  295. else
  296. usedtmpref:=ref;
  297. if target_info.endian=endian_big then
  298. inc(usedtmpref.offset,1);
  299. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  300. tmpreg:=getintregister(list,OS_INT);
  301. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  302. inc(usedtmpref.offset,dir);
  303. if FromSize=OS_16 then
  304. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  305. else
  306. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  307. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  308. end;
  309. OS_32,OS_S32:
  310. begin
  311. tmpreg:=getintregister(list,OS_INT);
  312. { only complicated references need an extra loadaddr }
  313. if assigned(ref.symbol) or
  314. (ref.index<>NR_NO) or
  315. (ref.offset<-4095) or
  316. (ref.offset>4092) or
  317. { sometimes the compiler reused registers }
  318. (reg=ref.index) or
  319. (reg=ref.base) then
  320. begin
  321. tmpreg2:=getintregister(list,OS_INT);
  322. a_loadaddr_ref_reg(list,ref,tmpreg2);
  323. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  324. end
  325. else
  326. usedtmpref:=ref;
  327. shifterop_reset(so);so.shiftmode:=SM_LSL;
  328. if ref.alignment=2 then
  329. begin
  330. if target_info.endian=endian_big then
  331. inc(usedtmpref.offset,2);
  332. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  333. inc(usedtmpref.offset,dir*2);
  334. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  335. so.shiftimm:=16;
  336. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  337. end
  338. else
  339. begin
  340. tmpreg2:=getintregister(list,OS_INT);
  341. if target_info.endian=endian_big then
  342. inc(usedtmpref.offset,3);
  343. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  344. inc(usedtmpref.offset,dir);
  345. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  346. inc(usedtmpref.offset,dir);
  347. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
  348. so.shiftimm:=8;
  349. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  350. inc(usedtmpref.offset,dir);
  351. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  352. so.shiftimm:=16;
  353. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
  354. so.shiftimm:=24;
  355. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  356. end;
  357. end
  358. else
  359. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  360. end;
  361. end
  362. else
  363. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  364. if (fromsize=OS_S8) and (tosize = OS_16) then
  365. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  366. end;
  367. procedure tcgarm.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
  368. var
  369. ref: treference;
  370. begin
  371. paraloc.check_simple_location;
  372. paramanager.allocparaloc(list,paraloc.location);
  373. case paraloc.location^.loc of
  374. LOC_REGISTER,LOC_CREGISTER:
  375. a_load_const_reg(list,size,a,paraloc.location^.register);
  376. LOC_REFERENCE:
  377. begin
  378. reference_reset(ref,paraloc.alignment);
  379. ref.base:=paraloc.location^.reference.index;
  380. ref.offset:=paraloc.location^.reference.offset;
  381. a_load_const_ref(list,size,a,ref);
  382. end;
  383. else
  384. internalerror(2002081101);
  385. end;
  386. end;
  387. procedure tcgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
  388. var
  389. tmpref, ref: treference;
  390. location: pcgparalocation;
  391. sizeleft: aint;
  392. begin
  393. location := paraloc.location;
  394. tmpref := r;
  395. sizeleft := paraloc.intsize;
  396. while assigned(location) do
  397. begin
  398. paramanager.allocparaloc(list,location);
  399. case location^.loc of
  400. LOC_REGISTER,LOC_CREGISTER:
  401. a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  402. LOC_REFERENCE:
  403. begin
  404. reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
  405. { doubles in softemu mode have a strange order of registers and references }
  406. if location^.size=OS_32 then
  407. g_concatcopy(list,tmpref,ref,4)
  408. else
  409. begin
  410. g_concatcopy(list,tmpref,ref,sizeleft);
  411. if assigned(location^.next) then
  412. internalerror(2005010710);
  413. end;
  414. end;
  415. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  416. case location^.size of
  417. OS_F32, OS_F64:
  418. a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
  419. else
  420. internalerror(2002072801);
  421. end;
  422. LOC_VOID:
  423. begin
  424. // nothing to do
  425. end;
  426. else
  427. internalerror(2002081103);
  428. end;
  429. inc(tmpref.offset,tcgsize2size[location^.size]);
  430. dec(sizeleft,tcgsize2size[location^.size]);
  431. location := location^.next;
  432. end;
  433. end;
  434. procedure tcgarm.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
  435. var
  436. ref: treference;
  437. tmpreg: tregister;
  438. begin
  439. paraloc.check_simple_location;
  440. paramanager.allocparaloc(list,paraloc.location);
  441. case paraloc.location^.loc of
  442. LOC_REGISTER,LOC_CREGISTER:
  443. a_loadaddr_ref_reg(list,r,paraloc.location^.register);
  444. LOC_REFERENCE:
  445. begin
  446. reference_reset(ref,paraloc.alignment);
  447. ref.base := paraloc.location^.reference.index;
  448. ref.offset := paraloc.location^.reference.offset;
  449. tmpreg := getintregister(list,OS_ADDR);
  450. a_loadaddr_ref_reg(list,r,tmpreg);
  451. a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
  452. end;
  453. else
  454. internalerror(2002080701);
  455. end;
  456. end;
  457. procedure tcgarm.a_call_name(list : TAsmList;const s : string; weak: boolean);
  458. var
  459. branchopcode: tasmop;
  460. begin
  461. { check not really correct: should only be used for non-Thumb cpus }
  462. if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
  463. branchopcode:=A_BLX
  464. else
  465. branchopcode:=A_BL;
  466. if target_info.system<>system_arm_darwin then
  467. if not weak then
  468. list.concat(taicpu.op_sym(branchopcode,current_asmdata.RefAsmSymbol(s)))
  469. else
  470. list.concat(taicpu.op_sym(branchopcode,current_asmdata.WeakRefAsmSymbol(s)))
  471. else
  472. list.concat(taicpu.op_sym(branchopcode,get_darwin_call_stub(s,weak)));
  473. {
  474. the compiler does not properly set this flag anymore in pass 1, and
  475. for now we only need it after pass 2 (I hope) (JM)
  476. if not(pi_do_call in current_procinfo.flags) then
  477. internalerror(2003060703);
  478. }
  479. include(current_procinfo.flags,pi_do_call);
  480. end;
  481. procedure tcgarm.a_call_reg(list : TAsmList;reg: tregister);
  482. begin
  483. { check not really correct: should only be used for non-Thumb cpus }
  484. if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
  485. begin
  486. list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
  487. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
  488. end
  489. else
  490. list.concat(taicpu.op_reg(A_BLX, reg));
  491. {
  492. the compiler does not properly set this flag anymore in pass 1, and
  493. for now we only need it after pass 2 (I hope) (JM)
  494. if not(pi_do_call in current_procinfo.flags) then
  495. internalerror(2003060703);
  496. }
  497. include(current_procinfo.flags,pi_do_call);
  498. end;
  499. procedure tcgarm.a_call_ref(list : TAsmList;ref: treference);
  500. begin
  501. a_reg_alloc(list,NR_R12);
  502. a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,NR_R12);
  503. a_call_reg(list,NR_R12);
  504. a_reg_dealloc(list,NR_R12);
  505. include(current_procinfo.flags,pi_do_call);
  506. end;
  507. procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  508. begin
  509. a_op_const_reg_reg(list,op,size,a,reg,reg);
  510. end;
  511. procedure tcgarm.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  512. var
  513. so : tshifterop;
  514. begin
  515. if op = OP_NEG then
  516. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,src,0))
  517. else if op = OP_NOT then
  518. begin
  519. if size in [OS_8, OS_16, OS_S8, OS_S16] then
  520. begin
  521. shifterop_reset(so);
  522. so.shiftmode:=SM_LSL;
  523. if size in [OS_8, OS_S8] then
  524. so.shiftimm:=24
  525. else
  526. so.shiftimm:=16;
  527. list.concat(taicpu.op_reg_reg_shifterop(A_MVN,dst,src,so));
  528. {Using a shift here allows this to be folded into another instruction}
  529. if size in [OS_S8, OS_S16] then
  530. so.shiftmode:=SM_ASR
  531. else
  532. so.shiftmode:=SM_LSR;
  533. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
  534. end
  535. else
  536. list.concat(taicpu.op_reg_reg(A_MVN,dst,src));
  537. end
  538. else
  539. a_op_reg_reg_reg(list,op,OS_32,src,dst,dst);
  540. end;
  541. const
  542. op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
  543. (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
  544. A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
  545. procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  546. size: tcgsize; a: tcgint; src, dst: tregister);
  547. var
  548. ovloc : tlocation;
  549. begin
  550. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,ovloc);
  551. end;
  552. procedure tcgarm.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  553. size: tcgsize; src1, src2, dst: tregister);
  554. var
  555. ovloc : tlocation;
  556. begin
  557. a_op_reg_reg_reg_checkoverflow(list,op,size,src1,src2,dst,false,ovloc);
  558. end;
  559. function opshift2shiftmode(op: TOpCg): tshiftmode;
  560. begin
  561. case op of
  562. OP_SHL: Result:=SM_LSL;
  563. OP_SHR: Result:=SM_LSR;
  564. OP_ROR: Result:=SM_ROR;
  565. OP_ROL: Result:=SM_ROR;
  566. OP_SAR: Result:=SM_ASR;
  567. else internalerror(2012070501);
  568. end
  569. end;
  570. function tcgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
  571. var
  572. multiplier : dword;
  573. power : longint;
  574. shifterop : tshifterop;
  575. bitsset : byte;
  576. negative : boolean;
  577. first : boolean;
  578. b,
  579. cycles : byte;
  580. maxeffort : byte;
  581. begin
  582. result:=true;
  583. cycles:=0;
  584. negative:=a<0;
  585. shifterop.rs:=NR_NO;
  586. shifterop.shiftmode:=SM_LSL;
  587. if negative then
  588. inc(cycles);
  589. multiplier:=dword(abs(a));
  590. bitsset:=popcnt(multiplier and $fffffffe);
  591. { heuristics to estimate how much instructions are reasonable to replace the mul,
  592. this is currently based on XScale timings }
  593. { in the simplest case, we need a mov to load the constant and a mul to carry out the
  594. actual multiplication, this requires min. 1+4 cycles
  595. because the first shift imm. might cause a stall and because we need more instructions
  596. when replacing the mul we generate max. 3 instructions to replace this mul }
  597. maxeffort:=3;
  598. { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
  599. a ldr, so generating one more operation to replace this is beneficial }
  600. if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
  601. inc(maxeffort);
  602. { if the upper 5 bits are all set or clear, mul is one cycle faster }
  603. if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
  604. dec(maxeffort);
  605. { if the upper 17 bits are all set or clear, mul is another cycle faster }
  606. if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
  607. dec(maxeffort);
  608. { most simple cases }
  609. if a=1 then
  610. a_load_reg_reg(list,OS_32,OS_32,src,dst)
  611. else if a=0 then
  612. a_load_const_reg(list,OS_32,0,dst)
  613. else if a=-1 then
  614. a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
  615. { add up ?
  616. basically, one add is needed for each bit being set in the constant factor
  617. however, the least significant bit is for free, it can be hidden in the initial
  618. instruction
  619. }
  620. else if (bitsset+cycles<=maxeffort) and
  621. (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
  622. begin
  623. first:=true;
  624. while multiplier<>0 do
  625. begin
  626. shifterop.shiftimm:=BsrDWord(multiplier);
  627. if odd(multiplier) then
  628. begin
  629. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
  630. dec(multiplier);
  631. end
  632. else
  633. if first then
  634. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  635. else
  636. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
  637. first:=false;
  638. dec(multiplier,1 shl shifterop.shiftimm);
  639. end;
  640. if negative then
  641. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  642. end
  643. { subtract from the next greater power of two? }
  644. else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
  645. begin
  646. first:=true;
  647. while multiplier<>0 do
  648. begin
  649. if first then
  650. begin
  651. multiplier:=(1 shl power)-multiplier;
  652. shifterop.shiftimm:=power;
  653. end
  654. else
  655. shifterop.shiftimm:=BsrDWord(multiplier);
  656. if odd(multiplier) then
  657. begin
  658. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
  659. dec(multiplier);
  660. end
  661. else
  662. if first then
  663. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
  664. else
  665. begin
  666. list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
  667. dec(multiplier,1 shl shifterop.shiftimm);
  668. end;
  669. first:=false;
  670. end;
  671. if negative then
  672. list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
  673. end
  674. else
  675. result:=false;
  676. end;
  677. procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  678. var
  679. shift : byte;
  680. tmpreg : tregister;
  681. so : tshifterop;
  682. l1 : longint;
  683. imm1, imm2: DWord;
  684. begin
  685. ovloc.loc:=LOC_VOID;
  686. if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
  687. case op of
  688. OP_ADD:
  689. begin
  690. op:=OP_SUB;
  691. a:=aint(dword(-a));
  692. end;
  693. OP_SUB:
  694. begin
  695. op:=OP_ADD;
  696. a:=aint(dword(-a));
  697. end
  698. end;
  699. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  700. case op of
  701. OP_NEG,OP_NOT:
  702. internalerror(200308281);
  703. OP_SHL,
  704. OP_SHR,
  705. OP_ROL,
  706. OP_ROR,
  707. OP_SAR:
  708. begin
  709. if a>32 then
  710. internalerror(200308294);
  711. if a<>0 then
  712. begin
  713. shifterop_reset(so);
  714. so.shiftmode:=opshift2shiftmode(op);
  715. if op = OP_ROL then
  716. so.shiftimm:=32-a
  717. else
  718. so.shiftimm:=a;
  719. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  720. end
  721. else
  722. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  723. end;
  724. else
  725. {if (op in [OP_SUB, OP_ADD]) and
  726. ((a < 0) or
  727. (a > 4095)) then
  728. begin
  729. tmpreg:=getintregister(list,size);
  730. list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
  731. list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
  732. list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
  733. ));
  734. end
  735. else}
  736. begin
  737. if cgsetflags or setflags then
  738. a_reg_alloc(list,NR_DEFAULTFLAGS);
  739. list.concat(setoppostfix(
  740. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  741. end;
  742. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  743. begin
  744. ovloc.loc:=LOC_FLAGS;
  745. case op of
  746. OP_ADD:
  747. ovloc.resflags:=F_CS;
  748. OP_SUB:
  749. ovloc.resflags:=F_CC;
  750. end;
  751. end;
  752. end
  753. else
  754. begin
  755. { there could be added some more sophisticated optimizations }
  756. if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
  757. a_load_reg_reg(list,size,size,src,dst)
  758. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  759. a_load_const_reg(list,size,0,dst)
  760. else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
  761. a_op_reg_reg(list,OP_NEG,size,src,dst)
  762. { we do this here instead in the peephole optimizer because
  763. it saves us a register }
  764. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  765. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  766. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  767. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  768. begin
  769. if l1>32 then{roozbeh does this ever happen?}
  770. internalerror(200308296);
  771. shifterop_reset(so);
  772. so.shiftmode:=SM_LSL;
  773. so.shiftimm:=l1;
  774. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  775. end
  776. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  777. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  778. begin
  779. if l1>32 then{does this ever happen?}
  780. internalerror(201205181);
  781. shifterop_reset(so);
  782. so.shiftmode:=SM_LSL;
  783. so.shiftimm:=l1;
  784. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  785. end
  786. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  787. begin
  788. { nothing to do on success }
  789. end
  790. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  791. Just using mov x, #0 might allow some easier optimizations down the line. }
  792. else if (op = OP_AND) and (dword(a)=0) then
  793. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  794. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  795. else if (op = OP_AND) and (not(dword(a))=0) then
  796. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  797. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  798. broader range of shifterconstants.}
  799. else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  800. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  801. else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
  802. begin
  803. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
  804. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
  805. end
  806. else if (op in [OP_ADD, OP_SUB, OP_OR]) and
  807. not(cgsetflags or setflags) and
  808. split_into_shifter_const(a, imm1, imm2) then
  809. begin
  810. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
  811. list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
  812. end
  813. else
  814. begin
  815. tmpreg:=getintregister(list,size);
  816. a_load_const_reg(list,size,a,tmpreg);
  817. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  818. end;
  819. end;
  820. maybeadjustresult(list,op,size,dst);
  821. end;
  822. procedure tcgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  823. var
  824. so : tshifterop;
  825. tmpreg,overflowreg : tregister;
  826. asmop : tasmop;
  827. begin
  828. ovloc.loc:=LOC_VOID;
  829. case op of
  830. OP_NEG,OP_NOT,
  831. OP_DIV,OP_IDIV:
  832. internalerror(200308281);
  833. OP_SHL,
  834. OP_SHR,
  835. OP_SAR,
  836. OP_ROR:
  837. begin
  838. if (op = OP_ROR) and not(size in [OS_32,OS_S32]) then
  839. internalerror(2008072801);
  840. shifterop_reset(so);
  841. so.rs:=src1;
  842. so.shiftmode:=opshift2shiftmode(op);
  843. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  844. end;
  845. OP_ROL:
  846. begin
  847. if not(size in [OS_32,OS_S32]) then
  848. internalerror(2008072801);
  849. { simulate ROL by ror'ing 32-value }
  850. tmpreg:=getintregister(list,OS_32);
  851. list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
  852. shifterop_reset(so);
  853. so.rs:=tmpreg;
  854. so.shiftmode:=SM_ROR;
  855. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src2,so));
  856. end;
  857. OP_IMUL,
  858. OP_MUL:
  859. begin
  860. if cgsetflags or setflags then
  861. begin
  862. overflowreg:=getintregister(list,size);
  863. if op=OP_IMUL then
  864. asmop:=A_SMULL
  865. else
  866. asmop:=A_UMULL;
  867. { the arm doesn't allow that rd and rm are the same }
  868. if dst=src2 then
  869. begin
  870. if dst<>src1 then
  871. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  872. else
  873. begin
  874. tmpreg:=getintregister(list,size);
  875. a_load_reg_reg(list,size,size,src2,dst);
  876. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  877. end;
  878. end
  879. else
  880. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  881. a_reg_alloc(list,NR_DEFAULTFLAGS);
  882. if op=OP_IMUL then
  883. begin
  884. shifterop_reset(so);
  885. so.shiftmode:=SM_ASR;
  886. so.shiftimm:=31;
  887. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  888. end
  889. else
  890. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  891. ovloc.loc:=LOC_FLAGS;
  892. ovloc.resflags:=F_NE;
  893. end
  894. else
  895. begin
  896. { the arm doesn't allow that rd and rm are the same }
  897. if dst=src2 then
  898. begin
  899. if dst<>src1 then
  900. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  901. else
  902. begin
  903. tmpreg:=getintregister(list,size);
  904. a_load_reg_reg(list,size,size,src2,dst);
  905. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  906. end;
  907. end
  908. else
  909. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  910. end;
  911. end;
  912. else
  913. begin
  914. if cgsetflags or setflags then
  915. a_reg_alloc(list,NR_DEFAULTFLAGS);
  916. list.concat(setoppostfix(
  917. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  918. end;
  919. end;
  920. maybeadjustresult(list,op,size,dst);
  921. end;
  922. function tcgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  923. var
  924. tmpreg : tregister;
  925. tmpref : treference;
  926. l : tasmlabel;
  927. begin
  928. tmpreg:=NR_NO;
  929. { Be sure to have a base register }
  930. if (ref.base=NR_NO) then
  931. begin
  932. if ref.shiftmode<>SM_None then
  933. internalerror(200308294);
  934. ref.base:=ref.index;
  935. ref.index:=NR_NO;
  936. end;
  937. { absolute symbols can't be handled directly, we've to store the symbol reference
  938. in the text segment and access it pc relative
  939. For now, we assume that references where base or index equals to PC are already
  940. relative, all other references are assumed to be absolute and thus they need
  941. to be handled extra.
  942. A proper solution would be to change refoptions to a set and store the information
  943. if the symbol is absolute or relative there.
  944. }
  945. if (assigned(ref.symbol) and
  946. not(is_pc(ref.base)) and
  947. not(is_pc(ref.index))
  948. ) or
  949. { [#xxx] isn't a valid address operand }
  950. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  951. (ref.offset<-4095) or
  952. (ref.offset>4095) or
  953. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  954. ((ref.offset<-255) or
  955. (ref.offset>255)
  956. )
  957. ) or
  958. ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) and
  959. ((ref.offset<-1020) or
  960. (ref.offset>1020) or
  961. ((abs(ref.offset) mod 4)<>0)
  962. )
  963. ) then
  964. begin
  965. fixref(list,ref);
  966. end;
  967. { fold if there is base, index and offset, however, don't fold
  968. for vfp memory instructions because we later fold the index }
  969. if not(op in [A_FLDS,A_FLDD,A_FSTS,A_FSTD]) and
  970. (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  971. begin
  972. if tmpreg<>NR_NO then
  973. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  974. else
  975. begin
  976. tmpreg:=getintregister(list,OS_ADDR);
  977. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  978. ref.base:=tmpreg;
  979. end;
  980. ref.offset:=0;
  981. end;
  982. { floating point operations have only limited references
  983. we expect here, that a base is already set }
  984. if (op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) and (ref.index<>NR_NO) then
  985. begin
  986. if ref.shiftmode<>SM_none then
  987. internalerror(200309121);
  988. if tmpreg<>NR_NO then
  989. begin
  990. if ref.base=tmpreg then
  991. begin
  992. if ref.signindex<0 then
  993. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  994. else
  995. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  996. ref.index:=NR_NO;
  997. end
  998. else
  999. begin
  1000. if ref.index<>tmpreg then
  1001. internalerror(200403161);
  1002. if ref.signindex<0 then
  1003. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  1004. else
  1005. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  1006. ref.base:=tmpreg;
  1007. ref.index:=NR_NO;
  1008. end;
  1009. end
  1010. else
  1011. begin
  1012. tmpreg:=getintregister(list,OS_ADDR);
  1013. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  1014. ref.base:=tmpreg;
  1015. ref.index:=NR_NO;
  1016. end;
  1017. end;
  1018. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  1019. Result := ref;
  1020. end;
  1021. procedure tcgarm.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);
  1022. var
  1023. oppostfix:toppostfix;
  1024. usedtmpref: treference;
  1025. tmpreg : tregister;
  1026. so : tshifterop;
  1027. dir : integer;
  1028. begin
  1029. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  1030. FromSize := ToSize;
  1031. case ToSize of
  1032. { signed integer registers }
  1033. OS_8,
  1034. OS_S8:
  1035. oppostfix:=PF_B;
  1036. OS_16,
  1037. OS_S16:
  1038. oppostfix:=PF_H;
  1039. OS_32,
  1040. OS_S32,
  1041. { for vfp value stored in integer register }
  1042. OS_F32:
  1043. oppostfix:=PF_None;
  1044. else
  1045. InternalError(200308299);
  1046. end;
  1047. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize]) then
  1048. begin
  1049. if target_info.endian=endian_big then
  1050. dir:=-1
  1051. else
  1052. dir:=1;
  1053. case FromSize of
  1054. OS_16,OS_S16:
  1055. begin
  1056. shifterop_reset(so);so.shiftmode:=SM_LSR;so.shiftimm:=8;
  1057. tmpreg:=getintregister(list,OS_INT);
  1058. usedtmpref:=ref;
  1059. if target_info.endian=endian_big then
  1060. inc(usedtmpref.offset,1);
  1061. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1062. inc(usedtmpref.offset,dir);
  1063. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,tmpreg,reg,so));
  1064. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1065. end;
  1066. OS_32,OS_S32:
  1067. begin
  1068. tmpreg:=getintregister(list,OS_INT);
  1069. usedtmpref:=ref;
  1070. shifterop_reset(so);so.shiftmode:=SM_LSR;
  1071. if ref.alignment=2 then
  1072. begin
  1073. so.shiftimm:=16;
  1074. if target_info.endian=endian_big then
  1075. inc(usedtmpref.offset,2);
  1076. usedtmpref:=a_internal_load_reg_ref(list,OS_16,OS_16,reg,usedtmpref);
  1077. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,tmpreg,reg,so));
  1078. inc(usedtmpref.offset,dir*2);
  1079. a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
  1080. end
  1081. else
  1082. begin
  1083. so.shiftimm:=8;
  1084. if target_info.endian=endian_big then
  1085. inc(usedtmpref.offset,3);
  1086. usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
  1087. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,tmpreg,reg,so));
  1088. inc(usedtmpref.offset,dir);
  1089. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1090. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,tmpreg,tmpreg,so));
  1091. inc(usedtmpref.offset,dir);
  1092. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1093. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,tmpreg,tmpreg,so));
  1094. inc(usedtmpref.offset,dir);
  1095. a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
  1096. end;
  1097. end
  1098. else
  1099. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1100. end;
  1101. end
  1102. else
  1103. handle_load_store(list,A_STR,oppostfix,reg,ref);
  1104. end;
  1105. function tcgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
  1106. var
  1107. oppostfix:toppostfix;
  1108. begin
  1109. case ToSize of
  1110. { signed integer registers }
  1111. OS_8,
  1112. OS_S8:
  1113. oppostfix:=PF_B;
  1114. OS_16,
  1115. OS_S16:
  1116. oppostfix:=PF_H;
  1117. OS_32,
  1118. OS_S32:
  1119. oppostfix:=PF_None;
  1120. else
  1121. InternalError(2003082910);
  1122. end;
  1123. result:=handle_load_store(list,A_STR,oppostfix,reg,ref);
  1124. end;
  1125. function tcgarm.a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
  1126. var
  1127. oppostfix:toppostfix;
  1128. begin
  1129. case FromSize of
  1130. { signed integer registers }
  1131. OS_8:
  1132. oppostfix:=PF_B;
  1133. OS_S8:
  1134. oppostfix:=PF_SB;
  1135. OS_16:
  1136. oppostfix:=PF_H;
  1137. OS_S16:
  1138. oppostfix:=PF_SH;
  1139. OS_32,
  1140. OS_S32:
  1141. oppostfix:=PF_None;
  1142. else
  1143. InternalError(200308291);
  1144. end;
  1145. result:=handle_load_store(list,A_LDR,oppostfix,reg,ref);
  1146. end;
  1147. procedure tcgarm.a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);
  1148. var
  1149. so : tshifterop;
  1150. procedure do_shift(shiftmode : tshiftmode; shiftimm : byte; reg : tregister);
  1151. begin
  1152. so.shiftmode:=shiftmode;
  1153. so.shiftimm:=shiftimm;
  1154. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,reg2,reg,so));
  1155. end;
  1156. var
  1157. instr: taicpu;
  1158. conv_done: boolean;
  1159. begin
  1160. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  1161. internalerror(2002090901);
  1162. conv_done:=false;
  1163. if tosize<>fromsize then
  1164. begin
  1165. shifterop_reset(so);
  1166. conv_done:=true;
  1167. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  1168. fromsize:=tosize;
  1169. if current_settings.cputype<cpu_armv6 then
  1170. case fromsize of
  1171. OS_8:
  1172. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1173. OS_S8:
  1174. begin
  1175. do_shift(SM_LSL,24,reg1);
  1176. if tosize=OS_16 then
  1177. begin
  1178. do_shift(SM_ASR,8,reg2);
  1179. do_shift(SM_LSR,16,reg2);
  1180. end
  1181. else
  1182. do_shift(SM_ASR,24,reg2);
  1183. end;
  1184. OS_16:
  1185. begin
  1186. do_shift(SM_LSL,16,reg1);
  1187. do_shift(SM_LSR,16,reg2);
  1188. end;
  1189. OS_S16:
  1190. begin
  1191. do_shift(SM_LSL,16,reg1);
  1192. do_shift(SM_ASR,16,reg2)
  1193. end;
  1194. else
  1195. conv_done:=false;
  1196. end
  1197. else
  1198. case fromsize of
  1199. OS_8:
  1200. list.concat(taicpu.op_reg_reg_const(A_AND,reg2,reg1,$ff));
  1201. OS_S8:
  1202. begin
  1203. if tosize=OS_16 then
  1204. begin
  1205. so.shiftmode:=SM_ROR;
  1206. so.shiftimm:=16;
  1207. list.concat(taicpu.op_reg_reg_shifterop(A_SXTB16,reg2,reg1,so));
  1208. do_shift(SM_LSR,16,reg2);
  1209. end
  1210. else
  1211. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,reg1));
  1212. end;
  1213. OS_16:
  1214. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,reg1));
  1215. OS_S16:
  1216. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,reg1));
  1217. else
  1218. conv_done:=false;
  1219. end
  1220. end;
  1221. if not conv_done and (reg1<>reg2) then
  1222. begin
  1223. { same size, only a register mov required }
  1224. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1225. list.Concat(instr);
  1226. { Notify the register allocator that we have written a move instruction so
  1227. it can try to eliminate it. }
  1228. add_move_instruction(instr);
  1229. end;
  1230. end;
  1231. procedure tcgarm.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);
  1232. var
  1233. href,href2 : treference;
  1234. hloc : pcgparalocation;
  1235. begin
  1236. href:=ref;
  1237. hloc:=paraloc.location;
  1238. while assigned(hloc) do
  1239. begin
  1240. case hloc^.loc of
  1241. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  1242. begin
  1243. paramanager.allocparaloc(list,paraloc.location);
  1244. a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
  1245. end;
  1246. LOC_REGISTER :
  1247. case hloc^.size of
  1248. OS_32,
  1249. OS_F32:
  1250. begin
  1251. paramanager.allocparaloc(list,paraloc.location);
  1252. a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
  1253. end;
  1254. OS_64,
  1255. OS_F64:
  1256. cg64.a_load64_ref_cgpara(list,href,paraloc);
  1257. else
  1258. a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
  1259. end;
  1260. LOC_REFERENCE :
  1261. begin
  1262. reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
  1263. { concatcopy should choose the best way to copy the data }
  1264. g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
  1265. end;
  1266. else
  1267. internalerror(200408241);
  1268. end;
  1269. inc(href.offset,tcgsize2size[hloc^.size]);
  1270. hloc:=hloc^.next;
  1271. end;
  1272. end;
  1273. procedure tcgarm.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister);
  1274. begin
  1275. list.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,reg2,reg1),cgsize2fpuoppostfix[tosize]));
  1276. end;
  1277. procedure tcgarm.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister);
  1278. var
  1279. oppostfix:toppostfix;
  1280. begin
  1281. case fromsize of
  1282. OS_32,
  1283. OS_F32:
  1284. oppostfix:=PF_S;
  1285. OS_64,
  1286. OS_F64:
  1287. oppostfix:=PF_D;
  1288. OS_F80:
  1289. oppostfix:=PF_E;
  1290. else
  1291. InternalError(200309021);
  1292. end;
  1293. handle_load_store(list,A_LDF,oppostfix,reg,ref);
  1294. if fromsize<>tosize then
  1295. a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
  1296. end;
  1297. procedure tcgarm.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1298. var
  1299. oppostfix:toppostfix;
  1300. begin
  1301. case tosize of
  1302. OS_F32:
  1303. oppostfix:=PF_S;
  1304. OS_F64:
  1305. oppostfix:=PF_D;
  1306. OS_F80:
  1307. oppostfix:=PF_E;
  1308. else
  1309. InternalError(200309022);
  1310. end;
  1311. handle_load_store(list,A_STF,oppostfix,reg,ref);
  1312. end;
  1313. { comparison operations }
  1314. procedure tcgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1315. l : tasmlabel);
  1316. var
  1317. tmpreg : tregister;
  1318. b : byte;
  1319. begin
  1320. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1321. if is_shifter_const(a,b) then
  1322. list.concat(taicpu.op_reg_const(A_CMP,reg,a))
  1323. { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
  1324. and CMP reg,$7fffffff regarding the flags according to the ARM manual }
  1325. else if (a<>$7fffffff) and (a<>-1) and is_shifter_const(-a,b) then
  1326. list.concat(taicpu.op_reg_const(A_CMN,reg,-a))
  1327. else
  1328. begin
  1329. tmpreg:=getintregister(list,size);
  1330. a_load_const_reg(list,size,a,tmpreg);
  1331. list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
  1332. end;
  1333. a_jmp_cond(list,cmp_op,l);
  1334. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1335. end;
  1336. procedure tcgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
  1337. begin
  1338. if reverse then
  1339. begin
  1340. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
  1341. list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
  1342. list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
  1343. end
  1344. { it is decided during the compilation of the system unit if this code is used or not
  1345. so no additional check for rbit is needed }
  1346. else
  1347. begin
  1348. list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
  1349. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1350. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1351. list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
  1352. if current_settings.cputype in cpu_thumb2 then
  1353. list.Concat(taicpu.op_cond(A_IT, C_EQ));
  1354. list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
  1355. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1356. end;
  1357. end;
  1358. procedure tcgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
  1359. begin
  1360. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1361. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1362. a_jmp_cond(list,cmp_op,l);
  1363. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1364. end;
  1365. procedure tcgarm.a_jmp_name(list : TAsmList;const s : string);
  1366. var
  1367. ai : taicpu;
  1368. begin
  1369. ai:=taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
  1370. ai.is_jmp:=true;
  1371. list.concat(ai);
  1372. end;
  1373. procedure tcgarm.a_jmp_always(list : TAsmList;l: tasmlabel);
  1374. var
  1375. ai : taicpu;
  1376. begin
  1377. ai:=taicpu.op_sym(A_B,l);
  1378. ai.is_jmp:=true;
  1379. list.concat(ai);
  1380. end;
  1381. procedure tcgarm.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1382. var
  1383. ai : taicpu;
  1384. begin
  1385. ai:=setcondition(taicpu.op_sym(A_B,l),flags_to_cond(f));
  1386. ai.is_jmp:=true;
  1387. list.concat(ai);
  1388. end;
  1389. procedure tcgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  1390. begin
  1391. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  1392. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  1393. end;
  1394. procedure tcgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1395. var
  1396. ref : treference;
  1397. shift : byte;
  1398. firstfloatreg,lastfloatreg,
  1399. r : byte;
  1400. mmregs,
  1401. regs, saveregs : tcpuregisterset;
  1402. r7offset,
  1403. stackmisalignment : pint;
  1404. postfix: toppostfix;
  1405. imm1, imm2: DWord;
  1406. begin
  1407. LocalSize:=align(LocalSize,4);
  1408. { call instruction does not put anything on the stack }
  1409. stackmisalignment:=0;
  1410. if not(nostackframe) then
  1411. begin
  1412. firstfloatreg:=RS_NO;
  1413. mmregs:=[];
  1414. case current_settings.fputype of
  1415. fpu_fpa,
  1416. fpu_fpa10,
  1417. fpu_fpa11:
  1418. begin
  1419. { save floating point registers? }
  1420. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1421. for r:=RS_F0 to RS_F7 do
  1422. if r in regs then
  1423. begin
  1424. if firstfloatreg=RS_NO then
  1425. firstfloatreg:=r;
  1426. lastfloatreg:=r;
  1427. inc(stackmisalignment,12);
  1428. end;
  1429. end;
  1430. fpu_vfpv2,
  1431. fpu_vfpv3,
  1432. fpu_vfpv3_d16:
  1433. begin;
  1434. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1435. end;
  1436. end;
  1437. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1438. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1439. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  1440. { save int registers }
  1441. reference_reset(ref,4);
  1442. ref.index:=NR_STACK_POINTER_REG;
  1443. ref.addressmode:=AM_PREINDEXED;
  1444. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  1445. if not(target_info.system in systems_darwin) then
  1446. begin
  1447. a_reg_alloc(list,NR_STACK_POINTER_REG);
  1448. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1449. begin
  1450. a_reg_alloc(list,NR_R12);
  1451. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  1452. end;
  1453. { the (old) ARM APCS requires saving both the stack pointer (to
  1454. crawl the stack) and the PC (to identify the function this
  1455. stack frame belongs to) -> also save R12 (= copy of R13 on entry)
  1456. and R15 -- still needs updating for EABI and Darwin, they don't
  1457. need that }
  1458. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1459. regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15]
  1460. else
  1461. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1462. include(regs,RS_R14);
  1463. if regs<>[] then
  1464. begin
  1465. for r:=RS_R0 to RS_R15 do
  1466. if r in regs then
  1467. inc(stackmisalignment,4);
  1468. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1469. end;
  1470. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  1471. begin
  1472. { the framepointer now points to the saved R15, so the saved
  1473. framepointer is at R11-12 (for get_caller_frame) }
  1474. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  1475. a_reg_dealloc(list,NR_R12);
  1476. end;
  1477. end
  1478. else
  1479. begin
  1480. { always save r14 if we use r7 as the framepointer, because
  1481. the parameter offsets are hardcoded in advance and always
  1482. assume that r14 sits on the stack right behind the saved r7
  1483. }
  1484. if current_procinfo.framepointer=NR_FRAME_POINTER_REG then
  1485. include(regs,RS_FRAME_POINTER_REG);
  1486. if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  1487. include(regs,RS_R14);
  1488. if regs<>[] then
  1489. begin
  1490. { on Darwin, you first have to save [r4-r7,lr], and then
  1491. [r8,r10,r11] and make r7 point to the previously saved
  1492. r7 so that you can perform a stack crawl based on it
  1493. ([r7] is previous stack frame, [r7+4] is return address
  1494. }
  1495. include(regs,RS_FRAME_POINTER_REG);
  1496. saveregs:=regs-[RS_R8,RS_R10,RS_R11];
  1497. r7offset:=0;
  1498. for r:=RS_R0 to RS_R15 do
  1499. if r in saveregs then
  1500. begin
  1501. inc(stackmisalignment,4);
  1502. if r<RS_FRAME_POINTER_REG then
  1503. inc(r7offset,4);
  1504. end;
  1505. { save the registers }
  1506. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1507. { make r7 point to the saved r7 (regardless of whether this
  1508. frame uses the framepointer, for backtrace purposes) }
  1509. if r7offset<>0 then
  1510. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset))
  1511. else
  1512. list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13));
  1513. { now save the rest (if any) }
  1514. saveregs:=regs-saveregs;
  1515. if saveregs<>[] then
  1516. begin
  1517. for r:=RS_R8 to RS_R11 do
  1518. if r in saveregs then
  1519. inc(stackmisalignment,4);
  1520. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1521. end;
  1522. end;
  1523. end;
  1524. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  1525. if (LocalSize<>0) or
  1526. ((stackmisalignment<>0) and
  1527. ((pi_do_call in current_procinfo.flags) or
  1528. (po_assembler in current_procinfo.procdef.procoptions))) then
  1529. begin
  1530. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1531. if is_shifter_const(localsize,shift) then
  1532. begin
  1533. a_reg_dealloc(list,NR_R12);
  1534. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  1535. end
  1536. else if split_into_shifter_const(localsize, imm1, imm2) then
  1537. begin
  1538. a_reg_dealloc(list,NR_R12);
  1539. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1540. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1541. end
  1542. else
  1543. begin
  1544. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1545. a_reg_alloc(list,NR_R12);
  1546. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1547. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1548. a_reg_dealloc(list,NR_R12);
  1549. end;
  1550. end;
  1551. if (mmregs<>[]) or
  1552. (firstfloatreg<>RS_NO) then
  1553. begin
  1554. reference_reset(ref,4);
  1555. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1556. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1557. begin
  1558. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1559. begin
  1560. a_reg_alloc(list,NR_R12);
  1561. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1562. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1563. a_reg_dealloc(list,NR_R12);
  1564. end
  1565. else
  1566. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1567. ref.base:=NR_R12;
  1568. end
  1569. else
  1570. begin
  1571. ref.base:=current_procinfo.framepointer;
  1572. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1573. end;
  1574. case current_settings.fputype of
  1575. fpu_fpa,
  1576. fpu_fpa10,
  1577. fpu_fpa11:
  1578. begin
  1579. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1580. lastfloatreg-firstfloatreg+1,ref));
  1581. end;
  1582. fpu_vfpv2,
  1583. fpu_vfpv3,
  1584. fpu_vfpv3_d16:
  1585. begin
  1586. ref.index:=ref.base;
  1587. ref.base:=NR_NO;
  1588. { FSTMX is deprecated on ARMv6 and later }
  1589. if (current_settings.cputype<cpu_armv6) then
  1590. postfix:=PF_IAX
  1591. else
  1592. postfix:=PF_IAD;
  1593. list.concat(setoppostfix(taicpu.op_ref_regset(A_FSTM,ref,R_MMREGISTER,R_SUBFD,mmregs),postfix));
  1594. end;
  1595. end;
  1596. end;
  1597. end;
  1598. end;
  1599. procedure tcgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  1600. var
  1601. ref : treference;
  1602. LocalSize : longint;
  1603. firstfloatreg,lastfloatreg,
  1604. r,
  1605. shift : byte;
  1606. mmregs,
  1607. saveregs,
  1608. regs : tcpuregisterset;
  1609. stackmisalignment: pint;
  1610. mmpostfix: toppostfix;
  1611. imm1, imm2: DWord;
  1612. begin
  1613. if not(nostackframe) then
  1614. begin
  1615. stackmisalignment:=0;
  1616. firstfloatreg:=RS_NO;
  1617. mmregs:=[];
  1618. case current_settings.fputype of
  1619. fpu_fpa,
  1620. fpu_fpa10,
  1621. fpu_fpa11:
  1622. begin
  1623. { restore floating point registers? }
  1624. regs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
  1625. for r:=RS_F0 to RS_F7 do
  1626. if r in regs then
  1627. begin
  1628. if firstfloatreg=RS_NO then
  1629. firstfloatreg:=r;
  1630. lastfloatreg:=r;
  1631. { floating point register space is already included in
  1632. localsize below by calc_stackframe_size
  1633. inc(stackmisalignment,12);
  1634. }
  1635. end;
  1636. end;
  1637. fpu_vfpv2,
  1638. fpu_vfpv3,
  1639. fpu_vfpv3_d16:
  1640. begin;
  1641. { restore vfp registers? }
  1642. mmregs:=rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
  1643. end;
  1644. end;
  1645. if (firstfloatreg<>RS_NO) or
  1646. (mmregs<>[]) then
  1647. begin
  1648. reference_reset(ref,4);
  1649. if (tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023) or
  1650. (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
  1651. begin
  1652. if not is_shifter_const(tarmprocinfo(current_procinfo).floatregstart,shift) then
  1653. begin
  1654. a_reg_alloc(list,NR_R12);
  1655. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  1656. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  1657. a_reg_dealloc(list,NR_R12);
  1658. end
  1659. else
  1660. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_R12,current_procinfo.framepointer,-tarmprocinfo(current_procinfo).floatregstart));
  1661. ref.base:=NR_R12;
  1662. end
  1663. else
  1664. begin
  1665. ref.base:=current_procinfo.framepointer;
  1666. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  1667. end;
  1668. case current_settings.fputype of
  1669. fpu_fpa,
  1670. fpu_fpa10,
  1671. fpu_fpa11:
  1672. begin
  1673. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  1674. lastfloatreg-firstfloatreg+1,ref));
  1675. end;
  1676. fpu_vfpv2,
  1677. fpu_vfpv3,
  1678. fpu_vfpv3_d16:
  1679. begin
  1680. ref.index:=ref.base;
  1681. ref.base:=NR_NO;
  1682. { FLDMX is deprecated on ARMv6 and later }
  1683. if (current_settings.cputype<cpu_armv6) then
  1684. mmpostfix:=PF_IAX
  1685. else
  1686. mmpostfix:=PF_IAD;
  1687. list.concat(setoppostfix(taicpu.op_ref_regset(A_FLDM,ref,R_MMREGISTER,R_SUBFD,mmregs),mmpostfix));
  1688. end;
  1689. end;
  1690. end;
  1691. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall) ;
  1692. if (pi_do_call in current_procinfo.flags) or
  1693. (regs<>[]) or
  1694. ((target_info.system in systems_darwin) and
  1695. (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then
  1696. begin
  1697. exclude(regs,RS_R14);
  1698. include(regs,RS_R15);
  1699. if (target_info.system in systems_darwin) then
  1700. include(regs,RS_FRAME_POINTER_REG);
  1701. end;
  1702. if not(target_info.system in systems_darwin) then
  1703. begin
  1704. { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
  1705. The saved PC came after that but is discarded, since we restore
  1706. the stack pointer }
  1707. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  1708. regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15];
  1709. end
  1710. else
  1711. begin
  1712. { restore R8-R11 already if necessary (they've been stored
  1713. before the others) }
  1714. saveregs:=regs*[RS_R8,RS_R10,RS_R11];
  1715. if saveregs<>[] then
  1716. begin
  1717. reference_reset(ref,4);
  1718. ref.index:=NR_STACK_POINTER_REG;
  1719. ref.addressmode:=AM_PREINDEXED;
  1720. for r:=RS_R8 to RS_R11 do
  1721. if r in saveregs then
  1722. inc(stackmisalignment,4);
  1723. regs:=regs-saveregs;
  1724. end;
  1725. end;
  1726. for r:=RS_R0 to RS_R15 do
  1727. if r in regs then
  1728. inc(stackmisalignment,4);
  1729. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  1730. if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
  1731. (target_info.system in systems_darwin) then
  1732. begin
  1733. LocalSize:=current_procinfo.calc_stackframe_size;
  1734. if (LocalSize<>0) or
  1735. ((stackmisalignment<>0) and
  1736. ((pi_do_call in current_procinfo.flags) or
  1737. (po_assembler in current_procinfo.procdef.procoptions))) then
  1738. begin
  1739. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  1740. if is_shifter_const(LocalSize,shift) then
  1741. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
  1742. else if split_into_shifter_const(localsize, imm1, imm2) then
  1743. begin
  1744. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
  1745. list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
  1746. end
  1747. else
  1748. begin
  1749. a_reg_alloc(list,NR_R12);
  1750. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  1751. list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  1752. a_reg_dealloc(list,NR_R12);
  1753. end;
  1754. end;
  1755. if (target_info.system in systems_darwin) and
  1756. (saveregs<>[]) then
  1757. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD));
  1758. if regs=[] then
  1759. begin
  1760. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  1761. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  1762. else
  1763. list.concat(taicpu.op_reg(A_BX,NR_R14))
  1764. end
  1765. else
  1766. begin
  1767. reference_reset(ref,4);
  1768. ref.index:=NR_STACK_POINTER_REG;
  1769. ref.addressmode:=AM_PREINDEXED;
  1770. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  1771. end;
  1772. end
  1773. else
  1774. begin
  1775. { restore int registers and return }
  1776. reference_reset(ref,4);
  1777. ref.index:=NR_FRAME_POINTER_REG;
  1778. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
  1779. end;
  1780. end
  1781. else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  1782. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
  1783. else
  1784. list.concat(taicpu.op_reg(A_BX,NR_R14))
  1785. end;
  1786. procedure tcgarm.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  1787. var
  1788. b : byte;
  1789. tmpref : treference;
  1790. instr : taicpu;
  1791. begin
  1792. if ref.addressmode<>AM_OFFSET then
  1793. internalerror(200309071);
  1794. tmpref:=ref;
  1795. { Be sure to have a base register }
  1796. if (tmpref.base=NR_NO) then
  1797. begin
  1798. if tmpref.shiftmode<>SM_None then
  1799. internalerror(200308294);
  1800. if tmpref.signindex<0 then
  1801. internalerror(200312023);
  1802. tmpref.base:=tmpref.index;
  1803. tmpref.index:=NR_NO;
  1804. end;
  1805. if assigned(tmpref.symbol) or
  1806. not((is_shifter_const(tmpref.offset,b)) or
  1807. (is_shifter_const(-tmpref.offset,b))
  1808. ) then
  1809. fixref(list,tmpref);
  1810. { expect a base here if there is an index }
  1811. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  1812. internalerror(200312022);
  1813. if tmpref.index<>NR_NO then
  1814. begin
  1815. if tmpref.shiftmode<>SM_None then
  1816. internalerror(200312021);
  1817. if tmpref.signindex<0 then
  1818. a_op_reg_reg_reg(list,OP_SUB,OS_ADDR,tmpref.base,tmpref.index,r)
  1819. else
  1820. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  1821. if tmpref.offset<>0 then
  1822. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  1823. end
  1824. else
  1825. begin
  1826. if tmpref.base=NR_NO then
  1827. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  1828. else
  1829. if tmpref.offset<>0 then
  1830. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  1831. else
  1832. begin
  1833. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  1834. list.concat(instr);
  1835. add_move_instruction(instr);
  1836. end;
  1837. end;
  1838. end;
  1839. procedure tcgarm.fixref(list : TAsmList;var ref : treference);
  1840. var
  1841. tmpreg : tregister;
  1842. tmpref : treference;
  1843. l : tasmlabel;
  1844. indirection_done : boolean;
  1845. begin
  1846. { absolute symbols can't be handled directly, we've to store the symbol reference
  1847. in the text segment and access it pc relative
  1848. For now, we assume that references where base or index equals to PC are already
  1849. relative, all other references are assumed to be absolute and thus they need
  1850. to be handled extra.
  1851. A proper solution would be to change refoptions to a set and store the information
  1852. if the symbol is absolute or relative there.
  1853. }
  1854. { create consts entry }
  1855. reference_reset(tmpref,4);
  1856. current_asmdata.getjumplabel(l);
  1857. cg.a_label(current_procinfo.aktlocaldata,l);
  1858. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  1859. indirection_done:=false;
  1860. if assigned(ref.symbol) then
  1861. begin
  1862. if (target_info.system=system_arm_darwin) and
  1863. (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
  1864. begin
  1865. tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  1866. if ref.offset<>0 then
  1867. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
  1868. indirection_done:=true;
  1869. end
  1870. else
  1871. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  1872. end
  1873. else
  1874. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  1875. { load consts entry }
  1876. if not indirection_done then
  1877. begin
  1878. tmpreg:=getintregister(list,OS_INT);
  1879. tmpref.symbol:=l;
  1880. tmpref.base:=NR_PC;
  1881. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  1882. end;
  1883. { This routine can be called with PC as base/index in case the offset
  1884. was too large to encode in a load/store. In that case, the entire
  1885. absolute expression has been re-encoded in a new constpool entry, and
  1886. we have to remove the use of PC from the original reference (the code
  1887. above made everything relative to the value loaded from the new
  1888. constpool entry) }
  1889. if is_pc(ref.base) then
  1890. ref.base:=NR_NO;
  1891. if is_pc(ref.index) then
  1892. ref.index:=NR_NO;
  1893. if (ref.base<>NR_NO) then
  1894. begin
  1895. if ref.index<>NR_NO then
  1896. begin
  1897. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  1898. ref.base:=tmpreg;
  1899. end
  1900. else
  1901. if ref.base<>NR_PC then
  1902. begin
  1903. ref.index:=tmpreg;
  1904. ref.shiftimm:=0;
  1905. ref.signindex:=1;
  1906. ref.shiftmode:=SM_None;
  1907. end
  1908. else
  1909. ref.base:=tmpreg;
  1910. end
  1911. else
  1912. ref.base:=tmpreg;
  1913. ref.offset:=0;
  1914. ref.symbol:=nil;
  1915. end;
  1916. procedure tcgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1917. var
  1918. paraloc1,paraloc2,paraloc3 : TCGPara;
  1919. pd : tprocdef;
  1920. begin
  1921. pd:=search_system_proc('MOVE');
  1922. paraloc1.init;
  1923. paraloc2.init;
  1924. paraloc3.init;
  1925. paramanager.getintparaloc(pd,1,paraloc1);
  1926. paramanager.getintparaloc(pd,2,paraloc2);
  1927. paramanager.getintparaloc(pd,3,paraloc3);
  1928. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1929. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1930. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1931. paramanager.freecgpara(list,paraloc3);
  1932. paramanager.freecgpara(list,paraloc2);
  1933. paramanager.freecgpara(list,paraloc1);
  1934. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1935. alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  1936. a_call_name(list,'FPC_MOVE',false);
  1937. dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
  1938. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1939. paraloc3.done;
  1940. paraloc2.done;
  1941. paraloc1.done;
  1942. end;
  1943. procedure tcgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : tcgint;aligned : boolean);
  1944. const
  1945. maxtmpreg=10;{roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
  1946. var
  1947. srcref,dstref,usedtmpref,usedtmpref2:treference;
  1948. srcreg,destreg,countreg,r,tmpreg:tregister;
  1949. helpsize:aint;
  1950. copysize:byte;
  1951. cgsize:Tcgsize;
  1952. tmpregisters:array[1..maxtmpreg] of tregister;
  1953. tmpregi,tmpregi2:byte;
  1954. { will never be called with count<=4 }
  1955. procedure genloop(count : aword;size : byte);
  1956. const
  1957. size2opsize : array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
  1958. var
  1959. l : tasmlabel;
  1960. begin
  1961. current_asmdata.getjumplabel(l);
  1962. if count<size then size:=1;
  1963. a_load_const_reg(list,OS_INT,count div size,countreg);
  1964. cg.a_label(list,l);
  1965. srcref.addressmode:=AM_POSTINDEXED;
  1966. dstref.addressmode:=AM_POSTINDEXED;
  1967. srcref.offset:=size;
  1968. dstref.offset:=size;
  1969. r:=getintregister(list,size2opsize[size]);
  1970. a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
  1971. a_reg_alloc(list,NR_DEFAULTFLAGS);
  1972. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
  1973. a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
  1974. a_jmp_flags(list,F_NE,l);
  1975. a_reg_dealloc(list,NR_DEFAULTFLAGS);
  1976. srcref.offset:=1;
  1977. dstref.offset:=1;
  1978. case count mod size of
  1979. 1:
  1980. begin
  1981. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  1982. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  1983. end;
  1984. 2:
  1985. if aligned then
  1986. begin
  1987. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  1988. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  1989. end
  1990. else
  1991. begin
  1992. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  1993. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  1994. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  1995. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  1996. end;
  1997. 3:
  1998. if aligned then
  1999. begin
  2000. srcref.offset:=2;
  2001. dstref.offset:=2;
  2002. a_load_ref_reg(list,OS_16,OS_16,srcref,r);
  2003. a_load_reg_ref(list,OS_16,OS_16,r,dstref);
  2004. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2005. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2006. end
  2007. else
  2008. begin
  2009. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2010. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2011. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2012. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2013. a_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2014. a_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2015. end;
  2016. end;
  2017. { keep the registers alive }
  2018. list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
  2019. list.concat(taicpu.op_reg_reg(A_MOV,srcreg,srcreg));
  2020. list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
  2021. end;
  2022. begin
  2023. if len=0 then
  2024. exit;
  2025. helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
  2026. dstref:=dest;
  2027. srcref:=source;
  2028. if cs_opt_size in current_settings.optimizerswitches then
  2029. helpsize:=8;
  2030. if aligned and (len=4) then
  2031. begin
  2032. tmpreg:=getintregister(list,OS_32);
  2033. a_load_ref_reg(list,OS_32,OS_32,source,tmpreg);
  2034. a_load_reg_ref(list,OS_32,OS_32,tmpreg,dest);
  2035. end
  2036. else if (len<=helpsize) and aligned then
  2037. begin
  2038. tmpregi:=0;
  2039. srcreg:=getintregister(list,OS_ADDR);
  2040. { explicit pc relative addressing, could be
  2041. e.g. a floating point constant }
  2042. if source.base=NR_PC then
  2043. begin
  2044. { ... then we don't need a loadaddr }
  2045. srcref:=source;
  2046. end
  2047. else
  2048. begin
  2049. a_loadaddr_ref_reg(list,source,srcreg);
  2050. reference_reset_base(srcref,srcreg,0,source.alignment);
  2051. end;
  2052. while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
  2053. begin
  2054. inc(tmpregi);
  2055. tmpregisters[tmpregi]:=getintregister(list,OS_32);
  2056. a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
  2057. inc(srcref.offset,4);
  2058. dec(len,4);
  2059. end;
  2060. destreg:=getintregister(list,OS_ADDR);
  2061. a_loadaddr_ref_reg(list,dest,destreg);
  2062. reference_reset_base(dstref,destreg,0,dest.alignment);
  2063. tmpregi2:=1;
  2064. while (tmpregi2<=tmpregi) do
  2065. begin
  2066. a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
  2067. inc(dstref.offset,4);
  2068. inc(tmpregi2);
  2069. end;
  2070. copysize:=4;
  2071. cgsize:=OS_32;
  2072. while len<>0 do
  2073. begin
  2074. if len<2 then
  2075. begin
  2076. copysize:=1;
  2077. cgsize:=OS_8;
  2078. end
  2079. else if len<4 then
  2080. begin
  2081. copysize:=2;
  2082. cgsize:=OS_16;
  2083. end;
  2084. dec(len,copysize);
  2085. r:=getintregister(list,cgsize);
  2086. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  2087. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  2088. inc(srcref.offset,copysize);
  2089. inc(dstref.offset,copysize);
  2090. end;{end of while}
  2091. end
  2092. else
  2093. begin
  2094. cgsize:=OS_32;
  2095. if (len<=4) then{len<=4 and not aligned}
  2096. begin
  2097. r:=getintregister(list,cgsize);
  2098. usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
  2099. if Len=1 then
  2100. a_load_reg_ref(list,OS_8,OS_8,r,dstref)
  2101. else
  2102. begin
  2103. tmpreg:=getintregister(list,cgsize);
  2104. usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
  2105. inc(usedtmpref.offset,1);
  2106. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2107. inc(usedtmpref2.offset,1);
  2108. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2109. if len>2 then
  2110. begin
  2111. inc(usedtmpref.offset,1);
  2112. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2113. inc(usedtmpref2.offset,1);
  2114. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2115. if len>3 then
  2116. begin
  2117. inc(usedtmpref.offset,1);
  2118. a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  2119. inc(usedtmpref2.offset,1);
  2120. a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
  2121. end;
  2122. end;
  2123. end;
  2124. end{end of if len<=4}
  2125. else
  2126. begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
  2127. destreg:=getintregister(list,OS_ADDR);
  2128. a_loadaddr_ref_reg(list,dest,destreg);
  2129. reference_reset_base(dstref,destreg,0,dest.alignment);
  2130. srcreg:=getintregister(list,OS_ADDR);
  2131. a_loadaddr_ref_reg(list,source,srcreg);
  2132. reference_reset_base(srcref,srcreg,0,source.alignment);
  2133. countreg:=getintregister(list,OS_32);
  2134. // if cs_opt_size in current_settings.optimizerswitches then
  2135. { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
  2136. {if aligned then
  2137. genloop(len,4)
  2138. else}
  2139. genloop(len,1);
  2140. end;
  2141. end;
  2142. end;
  2143. procedure tcgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
  2144. begin
  2145. g_concatcopy_internal(list,source,dest,len,false);
  2146. end;
  2147. procedure tcgarm.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
  2148. begin
  2149. if (source.alignment in [1..3]) or
  2150. (dest.alignment in [1..3]) then
  2151. g_concatcopy_internal(list,source,dest,len,false)
  2152. else
  2153. g_concatcopy_internal(list,source,dest,len,true);
  2154. end;
  2155. procedure tcgarm.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
  2156. var
  2157. ovloc : tlocation;
  2158. begin
  2159. ovloc.loc:=LOC_VOID;
  2160. g_overflowCheck_loc(list,l,def,ovloc);
  2161. end;
  2162. procedure tcgarm.g_overflowCheck_loc(List:TAsmList;const Loc:TLocation;def:TDef;ovloc : tlocation);
  2163. var
  2164. hl : tasmlabel;
  2165. ai:TAiCpu;
  2166. hflags : tresflags;
  2167. begin
  2168. if not(cs_check_overflow in current_settings.localswitches) then
  2169. exit;
  2170. current_asmdata.getjumplabel(hl);
  2171. case ovloc.loc of
  2172. LOC_VOID:
  2173. begin
  2174. ai:=taicpu.op_sym(A_B,hl);
  2175. ai.is_jmp:=true;
  2176. if not((def.typ=pointerdef) or
  2177. ((def.typ=orddef) and
  2178. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2179. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2180. ai.SetCondition(C_VC)
  2181. else
  2182. if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
  2183. ai.SetCondition(C_CS)
  2184. else
  2185. ai.SetCondition(C_CC);
  2186. list.concat(ai);
  2187. end;
  2188. LOC_FLAGS:
  2189. begin
  2190. hflags:=ovloc.resflags;
  2191. inverse_flags(hflags);
  2192. cg.a_jmp_flags(list,hflags,hl);
  2193. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2194. end;
  2195. else
  2196. internalerror(200409281);
  2197. end;
  2198. a_call_name(list,'FPC_OVERFLOW',false);
  2199. a_label(list,hl);
  2200. end;
  2201. procedure tcgarm.g_save_registers(list : TAsmList);
  2202. begin
  2203. { this work is done in g_proc_entry }
  2204. end;
  2205. procedure tcgarm.g_restore_registers(list : TAsmList);
  2206. begin
  2207. { this work is done in g_proc_exit }
  2208. end;
  2209. procedure tcgarm.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  2210. var
  2211. ai : taicpu;
  2212. begin
  2213. ai:=Taicpu.Op_sym(A_B,l);
  2214. ai.SetCondition(OpCmp2AsmCond[cond]);
  2215. ai.is_jmp:=true;
  2216. list.concat(ai);
  2217. end;
  2218. procedure tcgarm.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2219. var
  2220. hsym : tsym;
  2221. href : treference;
  2222. paraloc : Pcgparalocation;
  2223. shift : byte;
  2224. begin
  2225. { calculate the parameter info for the procdef }
  2226. procdef.init_paraloc_info(callerside);
  2227. hsym:=tsym(procdef.parast.Find('self'));
  2228. if not(assigned(hsym) and
  2229. (hsym.typ=paravarsym)) then
  2230. internalerror(200305251);
  2231. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  2232. while paraloc<>nil do
  2233. with paraloc^ do
  2234. begin
  2235. case loc of
  2236. LOC_REGISTER:
  2237. begin
  2238. if is_shifter_const(ioffset,shift) then
  2239. a_op_const_reg(list,OP_SUB,size,ioffset,register)
  2240. else
  2241. begin
  2242. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  2243. a_op_reg_reg(list,OP_SUB,size,NR_R12,register);
  2244. end;
  2245. end;
  2246. LOC_REFERENCE:
  2247. begin
  2248. { offset in the wrapper needs to be adjusted for the stored
  2249. return address }
  2250. reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint));
  2251. if is_shifter_const(ioffset,shift) then
  2252. a_op_const_ref(list,OP_SUB,size,ioffset,href)
  2253. else
  2254. begin
  2255. a_load_const_reg(list,OS_ADDR,ioffset,NR_R12);
  2256. a_op_reg_ref(list,OP_SUB,size,NR_R12,href);
  2257. end;
  2258. end
  2259. else
  2260. internalerror(200309189);
  2261. end;
  2262. paraloc:=next;
  2263. end;
  2264. end;
  2265. procedure tcgarm.g_stackpointer_alloc(list: TAsmList; size: longint);
  2266. begin
  2267. internalerror(200807237);
  2268. end;
  2269. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  2270. const
  2271. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  2272. (A_FCPYS,A_FCVTSD,A_NONE,A_NONE,A_NONE),
  2273. (A_FCVTDS,A_FCPYD,A_NONE,A_NONE,A_NONE),
  2274. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2275. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  2276. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  2277. begin
  2278. result:=convertop[fromsize,tosize];
  2279. if result=A_NONE then
  2280. internalerror(200312205);
  2281. end;
  2282. procedure tcgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
  2283. var
  2284. instr: taicpu;
  2285. begin
  2286. if shuffle=nil then
  2287. begin
  2288. if fromsize=tosize then
  2289. { needs correct size in case of spilling }
  2290. case fromsize of
  2291. OS_F32:
  2292. instr:=taicpu.op_reg_reg(A_FCPYS,reg2,reg1);
  2293. OS_F64:
  2294. instr:=taicpu.op_reg_reg(A_FCPYD,reg2,reg1);
  2295. else
  2296. internalerror(2009112405);
  2297. end
  2298. else
  2299. internalerror(2009112406);
  2300. end
  2301. else if shufflescalar(shuffle) then
  2302. instr:=taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1)
  2303. else
  2304. internalerror(2009112407);
  2305. list.concat(instr);
  2306. case instr.opcode of
  2307. A_FCPYS,
  2308. A_FCPYD:
  2309. add_move_instruction(instr);
  2310. end;
  2311. end;
  2312. procedure tcgarm.a_loadmm_ref_reg(list: tasmlist; fromsize,tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  2313. var
  2314. intreg,
  2315. tmpmmreg : tregister;
  2316. reg64 : tregister64;
  2317. op : tasmop;
  2318. begin
  2319. if assigned(shuffle) and
  2320. not(shufflescalar(shuffle)) then
  2321. internalerror(2009112413);
  2322. case fromsize of
  2323. OS_32,OS_S32:
  2324. begin
  2325. fromsize:=OS_F32;
  2326. { since we are loading an integer, no conversion may be required }
  2327. if (fromsize<>tosize) then
  2328. internalerror(2009112801);
  2329. end;
  2330. OS_64,OS_S64:
  2331. begin
  2332. fromsize:=OS_F64;
  2333. { since we are loading an integer, no conversion may be required }
  2334. if (fromsize<>tosize) then
  2335. internalerror(2009112901);
  2336. end;
  2337. end;
  2338. if (fromsize<>tosize) then
  2339. tmpmmreg:=getmmregister(list,fromsize)
  2340. else
  2341. tmpmmreg:=reg;
  2342. if (ref.alignment in [1,2]) then
  2343. begin
  2344. case fromsize of
  2345. OS_F32:
  2346. begin
  2347. intreg:=getintregister(list,OS_32);
  2348. a_load_ref_reg(list,OS_32,OS_32,ref,intreg);
  2349. a_loadmm_intreg_reg(list,OS_32,OS_F32,intreg,tmpmmreg,mms_movescalar);
  2350. end;
  2351. OS_F64:
  2352. begin
  2353. reg64.reglo:=getintregister(list,OS_32);
  2354. reg64.reghi:=getintregister(list,OS_32);
  2355. cg64.a_load64_ref_reg(list,ref,reg64);
  2356. cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,tmpmmreg);
  2357. end;
  2358. else
  2359. internalerror(2009112412);
  2360. end;
  2361. end
  2362. else
  2363. begin
  2364. case fromsize of
  2365. OS_F32:
  2366. op:=A_FLDS;
  2367. OS_F64:
  2368. op:=A_FLDD;
  2369. else
  2370. internalerror(2009112415);
  2371. end;
  2372. handle_load_store(list,op,PF_None,tmpmmreg,ref);
  2373. end;
  2374. if (tmpmmreg<>reg) then
  2375. a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
  2376. end;
  2377. procedure tcgarm.a_loadmm_reg_ref(list: tasmlist; fromsize,tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  2378. var
  2379. intreg,
  2380. tmpmmreg : tregister;
  2381. reg64 : tregister64;
  2382. op : tasmop;
  2383. begin
  2384. if assigned(shuffle) and
  2385. not(shufflescalar(shuffle)) then
  2386. internalerror(2009112416);
  2387. case tosize of
  2388. OS_32,OS_S32:
  2389. begin
  2390. tosize:=OS_F32;
  2391. { since we are loading an integer, no conversion may be required }
  2392. if (fromsize<>tosize) then
  2393. internalerror(2009112801);
  2394. end;
  2395. OS_64,OS_S64:
  2396. begin
  2397. tosize:=OS_F64;
  2398. { since we are loading an integer, no conversion may be required }
  2399. if (fromsize<>tosize) then
  2400. internalerror(2009112901);
  2401. end;
  2402. end;
  2403. if (fromsize<>tosize) then
  2404. begin
  2405. tmpmmreg:=getmmregister(list,tosize);
  2406. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpmmreg,shuffle);
  2407. end
  2408. else
  2409. tmpmmreg:=reg;
  2410. if (ref.alignment in [1,2]) then
  2411. begin
  2412. case tosize of
  2413. OS_F32:
  2414. begin
  2415. intreg:=getintregister(list,OS_32);
  2416. a_loadmm_reg_intreg(list,OS_F32,OS_32,tmpmmreg,intreg,shuffle);
  2417. a_load_reg_ref(list,OS_32,OS_32,intreg,ref);
  2418. end;
  2419. OS_F64:
  2420. begin
  2421. reg64.reglo:=getintregister(list,OS_32);
  2422. reg64.reghi:=getintregister(list,OS_32);
  2423. cg64.a_loadmm_reg_intreg64(list,OS_F64,tmpmmreg,reg64);
  2424. cg64.a_load64_reg_ref(list,reg64,ref);
  2425. end;
  2426. else
  2427. internalerror(2009112417);
  2428. end;
  2429. end
  2430. else
  2431. begin
  2432. case fromsize of
  2433. OS_F32:
  2434. op:=A_FSTS;
  2435. OS_F64:
  2436. op:=A_FSTD;
  2437. else
  2438. internalerror(2009112418);
  2439. end;
  2440. handle_load_store(list,op,PF_None,tmpmmreg,ref);
  2441. end;
  2442. end;
  2443. procedure tcgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  2444. begin
  2445. { this code can only be used to transfer raw data, not to perform
  2446. conversions }
  2447. if (tosize<>OS_F32) then
  2448. internalerror(2009112419);
  2449. if not(fromsize in [OS_32,OS_S32]) then
  2450. internalerror(2009112420);
  2451. if assigned(shuffle) and
  2452. not shufflescalar(shuffle) then
  2453. internalerror(2009112516);
  2454. list.concat(taicpu.op_reg_reg(A_FMSR,mmreg,intreg));
  2455. end;
  2456. procedure tcgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister;shuffle : pmmshuffle);
  2457. begin
  2458. { this code can only be used to transfer raw data, not to perform
  2459. conversions }
  2460. if (fromsize<>OS_F32) then
  2461. internalerror(2009112430);
  2462. if not(tosize in [OS_32,OS_S32]) then
  2463. internalerror(2009112420);
  2464. if assigned(shuffle) and
  2465. not shufflescalar(shuffle) then
  2466. internalerror(2009112514);
  2467. list.concat(taicpu.op_reg_reg(A_FMRS,intreg,mmreg));
  2468. end;
  2469. procedure tcgarm.a_opmm_reg_reg(list: tasmlist; op: topcg; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  2470. var
  2471. tmpreg: tregister;
  2472. begin
  2473. { the vfp doesn't support xor nor any other logical operation, but
  2474. this routine is used to initialise global mm regvars. We can
  2475. easily initialise an mm reg with 0 though. }
  2476. case op of
  2477. OP_XOR:
  2478. begin
  2479. if (src<>dst) or
  2480. (reg_cgsize(src)<>size) or
  2481. assigned(shuffle) then
  2482. internalerror(2009112907);
  2483. tmpreg:=getintregister(list,OS_32);
  2484. a_load_const_reg(list,OS_32,0,tmpreg);
  2485. case size of
  2486. OS_F32:
  2487. list.concat(taicpu.op_reg_reg(A_FMSR,dst,tmpreg));
  2488. OS_F64:
  2489. list.concat(taicpu.op_reg_reg_reg(A_FMDRR,dst,tmpreg,tmpreg));
  2490. else
  2491. internalerror(2009112908);
  2492. end;
  2493. end
  2494. else
  2495. internalerror(2009112906);
  2496. end;
  2497. end;
  2498. procedure tcgarm.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  2499. procedure loadvmttor12;
  2500. var
  2501. href : treference;
  2502. begin
  2503. reference_reset_base(href,NR_R0,0,sizeof(pint));
  2504. cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
  2505. end;
  2506. procedure op_onr12methodaddr;
  2507. var
  2508. href : treference;
  2509. begin
  2510. if (procdef.extnumber=$ffff) then
  2511. Internalerror(200006139);
  2512. { call/jmp vmtoffs(%eax) ; method offs }
  2513. reference_reset_base(href,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
  2514. cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
  2515. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
  2516. end;
  2517. var
  2518. make_global : boolean;
  2519. begin
  2520. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  2521. Internalerror(200006137);
  2522. if not assigned(procdef.struct) or
  2523. (procdef.procoptions*[po_classmethod, po_staticmethod,
  2524. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  2525. Internalerror(200006138);
  2526. if procdef.owner.symtabletype<>ObjectSymtable then
  2527. Internalerror(200109191);
  2528. make_global:=false;
  2529. if (not current_module.is_unit) or
  2530. create_smartlink or
  2531. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  2532. make_global:=true;
  2533. if make_global then
  2534. list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  2535. else
  2536. list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  2537. { the wrapper might need aktlocaldata for the additional data to
  2538. load the constant }
  2539. current_procinfo:=cprocinfo.create(nil);
  2540. { set param1 interface to self }
  2541. g_adjust_self_value(list,procdef,ioffset);
  2542. { case 4 }
  2543. if (po_virtualmethod in procdef.procoptions) and
  2544. not is_objectpascal_helper(procdef.struct) then
  2545. begin
  2546. loadvmttor12;
  2547. op_onr12methodaddr;
  2548. end
  2549. { case 0 }
  2550. else
  2551. list.concat(taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(procdef.mangledname)));
  2552. list.concatlist(current_procinfo.aktlocaldata);
  2553. current_procinfo.Free;
  2554. current_procinfo:=nil;
  2555. list.concat(Tai_symbol_end.Createname(labelname));
  2556. end;
  2557. procedure tcgarm.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  2558. const
  2559. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];
  2560. begin
  2561. if (op in overflowops) and
  2562. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  2563. a_load_reg_reg(list,OS_32,size,dst,dst);
  2564. end;
  2565. function tcgarm.get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  2566. var
  2567. stubname: string;
  2568. l1: tasmsymbol;
  2569. href: treference;
  2570. begin
  2571. stubname := 'L'+s+'$stub';
  2572. result := current_asmdata.getasmsymbol(stubname);
  2573. if assigned(result) then
  2574. exit;
  2575. if current_asmdata.asmlists[al_imports]=nil then
  2576. current_asmdata.asmlists[al_imports]:=TAsmList.create;
  2577. new_section(current_asmdata.asmlists[al_imports],sec_stub,'',4);
  2578. result := current_asmdata.RefAsmSymbol(stubname);
  2579. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(result,0));
  2580. { register as a weak symbol if necessary }
  2581. if weak then
  2582. current_asmdata.weakrefasmsymbol(s);
  2583. current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
  2584. if not(cs_create_pic in current_settings.moduleswitches) then
  2585. begin
  2586. l1 := current_asmdata.RefAsmSymbol('L'+s+'$slp');
  2587. reference_reset_symbol(href,l1,0,sizeof(pint));
  2588. href.refaddr:=addr_full;
  2589. current_asmdata.asmlists[al_imports].concat(taicpu.op_reg_ref(A_LDR,NR_R12,href));
  2590. reference_reset_base(href,NR_R12,0,sizeof(pint));
  2591. current_asmdata.asmlists[al_imports].concat(taicpu.op_reg_ref(A_LDR,NR_R15,href));
  2592. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(l1,0));
  2593. l1 := current_asmdata.RefAsmSymbol('L'+s+'$lazy_ptr');
  2594. current_asmdata.asmlists[al_imports].concat(tai_const.create_sym(l1));
  2595. end
  2596. else
  2597. internalerror(2008100401);
  2598. new_section(current_asmdata.asmlists[al_imports],sec_data_lazy,'',sizeof(pint));
  2599. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(l1,0));
  2600. current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
  2601. current_asmdata.asmlists[al_imports].concat(tai_const.createname('dyld_stub_binding_helper',0));
  2602. end;
  2603. procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  2604. begin
  2605. case op of
  2606. OP_NEG:
  2607. begin
  2608. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2609. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  2610. list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
  2611. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2612. end;
  2613. OP_NOT:
  2614. begin
  2615. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reglo,regdst.reglo);
  2616. cg.a_op_reg_reg(list,OP_NOT,OS_INT,regsrc.reghi,regdst.reghi);
  2617. end;
  2618. else
  2619. a_op64_reg_reg_reg(list,op,size,regsrc,regdst,regdst);
  2620. end;
  2621. end;
  2622. procedure tcg64farm.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
  2623. begin
  2624. a_op64_const_reg_reg(list,op,size,value,reg,reg);
  2625. end;
  2626. procedure tcg64farm.a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);
  2627. var
  2628. ovloc : tlocation;
  2629. begin
  2630. a_op64_const_reg_reg_checkoverflow(list,op,size,value,regsrc,regdst,false,ovloc);
  2631. end;
  2632. procedure tcg64farm.a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);
  2633. var
  2634. ovloc : tlocation;
  2635. begin
  2636. a_op64_reg_reg_reg_checkoverflow(list,op,size,regsrc1,regsrc2,regdst,false,ovloc);
  2637. end;
  2638. procedure tcg64farm.a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);
  2639. begin
  2640. { this code can only be used to transfer raw data, not to perform
  2641. conversions }
  2642. if (mmsize<>OS_F64) then
  2643. internalerror(2009112405);
  2644. list.concat(taicpu.op_reg_reg_reg(A_FMDRR,mmreg,intreg.reglo,intreg.reghi));
  2645. end;
  2646. procedure tcg64farm.a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);
  2647. begin
  2648. { this code can only be used to transfer raw data, not to perform
  2649. conversions }
  2650. if (mmsize<>OS_F64) then
  2651. internalerror(2009112406);
  2652. list.concat(taicpu.op_reg_reg_reg(A_FMRRD,intreg.reglo,intreg.reghi,mmreg));
  2653. end;
  2654. procedure tcg64farm.a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  2655. var
  2656. tmpreg : tregister;
  2657. b : byte;
  2658. begin
  2659. ovloc.loc:=LOC_VOID;
  2660. case op of
  2661. OP_NEG,
  2662. OP_NOT :
  2663. internalerror(200306017);
  2664. end;
  2665. if (setflags or tcgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  2666. begin
  2667. case op of
  2668. OP_ADD:
  2669. begin
  2670. if is_shifter_const(lo(value),b) then
  2671. begin
  2672. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2673. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  2674. end
  2675. else
  2676. begin
  2677. tmpreg:=cg.getintregister(list,OS_32);
  2678. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  2679. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2680. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  2681. end;
  2682. if is_shifter_const(hi(value),b) then
  2683. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,hi(value)),PF_S))
  2684. else
  2685. begin
  2686. tmpreg:=cg.getintregister(list,OS_32);
  2687. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  2688. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  2689. end;
  2690. end;
  2691. OP_SUB:
  2692. begin
  2693. if is_shifter_const(lo(value),b) then
  2694. begin
  2695. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2696. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
  2697. end
  2698. else
  2699. begin
  2700. tmpreg:=cg.getintregister(list,OS_32);
  2701. cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
  2702. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2703. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  2704. end;
  2705. if is_shifter_const(hi(value),b) then
  2706. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))),PF_S))
  2707. else
  2708. begin
  2709. tmpreg:=cg.getintregister(list,OS_32);
  2710. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  2711. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg),PF_S));
  2712. end;
  2713. end;
  2714. else
  2715. internalerror(200502131);
  2716. end;
  2717. if size=OS_64 then
  2718. begin
  2719. { the arm has an weired opinion how flags for SUB/ADD are handled }
  2720. ovloc.loc:=LOC_FLAGS;
  2721. case op of
  2722. OP_ADD:
  2723. ovloc.resflags:=F_CS;
  2724. OP_SUB:
  2725. ovloc.resflags:=F_CC;
  2726. end;
  2727. end;
  2728. end
  2729. else
  2730. begin
  2731. case op of
  2732. OP_AND,OP_OR,OP_XOR:
  2733. begin
  2734. cg.a_op_const_reg_reg(list,op,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
  2735. cg.a_op_const_reg_reg(list,op,OS_32,aint(hi(value)),regsrc.reghi,regdst.reghi);
  2736. end;
  2737. OP_ADD:
  2738. begin
  2739. if is_shifter_const(aint(lo(value)),b) then
  2740. begin
  2741. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2742. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  2743. end
  2744. else
  2745. begin
  2746. tmpreg:=cg.getintregister(list,OS_32);
  2747. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  2748. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2749. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  2750. end;
  2751. if is_shifter_const(aint(hi(value)),b) then
  2752. list.concat(taicpu.op_reg_reg_const(A_ADC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  2753. else
  2754. begin
  2755. tmpreg:=cg.getintregister(list,OS_32);
  2756. cg.a_load_const_reg(list,OS_32,aint(hi(value)),tmpreg);
  2757. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc.reghi,tmpreg));
  2758. end;
  2759. end;
  2760. OP_SUB:
  2761. begin
  2762. if is_shifter_const(aint(lo(value)),b) then
  2763. begin
  2764. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2765. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
  2766. end
  2767. else
  2768. begin
  2769. tmpreg:=cg.getintregister(list,OS_32);
  2770. cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
  2771. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2772. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
  2773. end;
  2774. if is_shifter_const(aint(hi(value)),b) then
  2775. list.concat(taicpu.op_reg_reg_const(A_SBC,regdst.reghi,regsrc.reghi,aint(hi(value))))
  2776. else
  2777. begin
  2778. tmpreg:=cg.getintregister(list,OS_32);
  2779. cg.a_load_const_reg(list,OS_32,hi(value),tmpreg);
  2780. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc.reghi,tmpreg));
  2781. end;
  2782. end;
  2783. else
  2784. internalerror(2003083101);
  2785. end;
  2786. end;
  2787. end;
  2788. procedure tcg64farm.a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);
  2789. begin
  2790. ovloc.loc:=LOC_VOID;
  2791. case op of
  2792. OP_NEG,
  2793. OP_NOT :
  2794. internalerror(200306017);
  2795. end;
  2796. if (setflags or tcgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
  2797. begin
  2798. case op of
  2799. OP_ADD:
  2800. begin
  2801. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2802. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  2803. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
  2804. end;
  2805. OP_SUB:
  2806. begin
  2807. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2808. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  2809. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
  2810. end;
  2811. else
  2812. internalerror(2003083101);
  2813. end;
  2814. if size=OS_64 then
  2815. begin
  2816. { the arm has an weired opinion how flags for SUB/ADD are handled }
  2817. ovloc.loc:=LOC_FLAGS;
  2818. case op of
  2819. OP_ADD:
  2820. ovloc.resflags:=F_CS;
  2821. OP_SUB:
  2822. ovloc.resflags:=F_CC;
  2823. end;
  2824. end;
  2825. end
  2826. else
  2827. begin
  2828. case op of
  2829. OP_AND,OP_OR,OP_XOR:
  2830. begin
  2831. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reglo,regsrc2.reglo,regdst.reglo);
  2832. cg.a_op_reg_reg_reg(list,op,OS_32,regsrc1.reghi,regsrc2.reghi,regdst.reghi);
  2833. end;
  2834. OP_ADD:
  2835. begin
  2836. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2837. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
  2838. list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
  2839. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2840. end;
  2841. OP_SUB:
  2842. begin
  2843. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  2844. list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
  2845. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
  2846. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  2847. end;
  2848. else
  2849. internalerror(2003083101);
  2850. end;
  2851. end;
  2852. end;
  2853. procedure Tthumb2cgarm.init_register_allocators;
  2854. begin
  2855. inherited init_register_allocators;
  2856. { currently, we save R14 always, so we can use it }
  2857. if (target_info.system<>system_arm_darwin) then
  2858. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  2859. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  2860. RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
  2861. else
  2862. { r9 is not available on Darwin according to the llvm code generator }
  2863. rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
  2864. [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
  2865. RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
  2866. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  2867. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
  2868. if current_settings.fputype=fpu_fpv4_s16 then
  2869. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
  2870. [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
  2871. RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
  2872. ],first_mm_imreg,[])
  2873. else
  2874. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
  2875. [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
  2876. end;
  2877. procedure Tthumb2cgarm.done_register_allocators;
  2878. begin
  2879. rg[R_INTREGISTER].free;
  2880. rg[R_FPUREGISTER].free;
  2881. rg[R_MMREGISTER].free;
  2882. inherited done_register_allocators;
  2883. end;
  2884. procedure Tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
  2885. begin
  2886. list.concat(taicpu.op_reg(A_BLX, reg));
  2887. {
  2888. the compiler does not properly set this flag anymore in pass 1, and
  2889. for now we only need it after pass 2 (I hope) (JM)
  2890. if not(pi_do_call in current_procinfo.flags) then
  2891. internalerror(2003060703);
  2892. }
  2893. include(current_procinfo.flags,pi_do_call);
  2894. end;
  2895. procedure Tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
  2896. var
  2897. imm_shift : byte;
  2898. l : tasmlabel;
  2899. hr : treference;
  2900. begin
  2901. if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
  2902. internalerror(2002090902);
  2903. if is_thumb_imm(a) then
  2904. list.concat(taicpu.op_reg_const(A_MOV,reg,a))
  2905. else if is_thumb_imm(not(a)) then
  2906. list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
  2907. else if (a and $FFFF)=a then
  2908. list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
  2909. else
  2910. begin
  2911. reference_reset(hr,4);
  2912. current_asmdata.getjumplabel(l);
  2913. cg.a_label(current_procinfo.aktlocaldata,l);
  2914. hr.symboldata:=current_procinfo.aktlocaldata.last;
  2915. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  2916. hr.symbol:=l;
  2917. hr.base:=NR_PC;
  2918. list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
  2919. end;
  2920. end;
  2921. procedure Tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
  2922. var
  2923. oppostfix:toppostfix;
  2924. usedtmpref: treference;
  2925. tmpreg,tmpreg2 : tregister;
  2926. so : tshifterop;
  2927. dir : integer;
  2928. begin
  2929. if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
  2930. FromSize := ToSize;
  2931. case FromSize of
  2932. { signed integer registers }
  2933. OS_8:
  2934. oppostfix:=PF_B;
  2935. OS_S8:
  2936. oppostfix:=PF_SB;
  2937. OS_16:
  2938. oppostfix:=PF_H;
  2939. OS_S16:
  2940. oppostfix:=PF_SH;
  2941. OS_32,
  2942. OS_S32:
  2943. oppostfix:=PF_None;
  2944. else
  2945. InternalError(200308297);
  2946. end;
  2947. if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
  2948. begin
  2949. if target_info.endian=endian_big then
  2950. dir:=-1
  2951. else
  2952. dir:=1;
  2953. case FromSize of
  2954. OS_16,OS_S16:
  2955. begin
  2956. { only complicated references need an extra loadaddr }
  2957. if assigned(ref.symbol) or
  2958. (ref.index<>NR_NO) or
  2959. (ref.offset<-255) or
  2960. (ref.offset>4094) or
  2961. { sometimes the compiler reused registers }
  2962. (reg=ref.index) or
  2963. (reg=ref.base) then
  2964. begin
  2965. tmpreg2:=getintregister(list,OS_INT);
  2966. a_loadaddr_ref_reg(list,ref,tmpreg2);
  2967. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  2968. end
  2969. else
  2970. usedtmpref:=ref;
  2971. if target_info.endian=endian_big then
  2972. inc(usedtmpref.offset,1);
  2973. shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
  2974. tmpreg:=getintregister(list,OS_INT);
  2975. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  2976. inc(usedtmpref.offset,dir);
  2977. if FromSize=OS_16 then
  2978. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
  2979. else
  2980. a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
  2981. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  2982. end;
  2983. OS_32,OS_S32:
  2984. begin
  2985. tmpreg:=getintregister(list,OS_INT);
  2986. { only complicated references need an extra loadaddr }
  2987. if assigned(ref.symbol) or
  2988. (ref.index<>NR_NO) or
  2989. (ref.offset<-255) or
  2990. (ref.offset>4092) or
  2991. { sometimes the compiler reused registers }
  2992. (reg=ref.index) or
  2993. (reg=ref.base) then
  2994. begin
  2995. tmpreg2:=getintregister(list,OS_INT);
  2996. a_loadaddr_ref_reg(list,ref,tmpreg2);
  2997. reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
  2998. end
  2999. else
  3000. usedtmpref:=ref;
  3001. shifterop_reset(so);so.shiftmode:=SM_LSL;
  3002. if ref.alignment=2 then
  3003. begin
  3004. if target_info.endian=endian_big then
  3005. inc(usedtmpref.offset,2);
  3006. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
  3007. inc(usedtmpref.offset,dir*2);
  3008. a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
  3009. so.shiftimm:=16;
  3010. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3011. end
  3012. else
  3013. begin
  3014. if target_info.endian=endian_big then
  3015. inc(usedtmpref.offset,3);
  3016. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
  3017. inc(usedtmpref.offset,dir);
  3018. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3019. so.shiftimm:=8;
  3020. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3021. inc(usedtmpref.offset,dir);
  3022. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3023. so.shiftimm:=16;
  3024. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3025. inc(usedtmpref.offset,dir);
  3026. a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
  3027. so.shiftimm:=24;
  3028. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
  3029. end;
  3030. end
  3031. else
  3032. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3033. end;
  3034. end
  3035. else
  3036. handle_load_store(list,A_LDR,oppostfix,reg,ref);
  3037. if (fromsize=OS_S8) and (tosize = OS_16) then
  3038. a_load_reg_reg(list,OS_16,OS_32,reg,reg);
  3039. end;
  3040. procedure Tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
  3041. var
  3042. shift : byte;
  3043. tmpreg : tregister;
  3044. so : tshifterop;
  3045. l1 : longint;
  3046. begin
  3047. ovloc.loc:=LOC_VOID;
  3048. if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
  3049. case op of
  3050. OP_ADD:
  3051. begin
  3052. op:=OP_SUB;
  3053. a:=aint(dword(-a));
  3054. end;
  3055. OP_SUB:
  3056. begin
  3057. op:=OP_ADD;
  3058. a:=aint(dword(-a));
  3059. end
  3060. end;
  3061. if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
  3062. case op of
  3063. OP_NEG,OP_NOT,
  3064. OP_DIV,OP_IDIV:
  3065. internalerror(200308281);
  3066. OP_SHL:
  3067. begin
  3068. if a>32 then
  3069. internalerror(200308294);
  3070. if a<>0 then
  3071. begin
  3072. shifterop_reset(so);
  3073. so.shiftmode:=SM_LSL;
  3074. so.shiftimm:=a;
  3075. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  3076. end
  3077. else
  3078. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  3079. end;
  3080. OP_ROL:
  3081. begin
  3082. if a>32 then
  3083. internalerror(200308294);
  3084. if a<>0 then
  3085. begin
  3086. shifterop_reset(so);
  3087. so.shiftmode:=SM_ROR;
  3088. so.shiftimm:=32-a;
  3089. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  3090. end
  3091. else
  3092. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  3093. end;
  3094. OP_ROR:
  3095. begin
  3096. if a>32 then
  3097. internalerror(200308294);
  3098. if a<>0 then
  3099. begin
  3100. shifterop_reset(so);
  3101. so.shiftmode:=SM_ROR;
  3102. so.shiftimm:=a;
  3103. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  3104. end
  3105. else
  3106. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  3107. end;
  3108. OP_SHR:
  3109. begin
  3110. if a>32 then
  3111. internalerror(200308292);
  3112. shifterop_reset(so);
  3113. if a<>0 then
  3114. begin
  3115. so.shiftmode:=SM_LSR;
  3116. so.shiftimm:=a;
  3117. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  3118. end
  3119. else
  3120. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  3121. end;
  3122. OP_SAR:
  3123. begin
  3124. if a>32 then
  3125. internalerror(200308295);
  3126. if a<>0 then
  3127. begin
  3128. shifterop_reset(so);
  3129. so.shiftmode:=SM_ASR;
  3130. so.shiftimm:=a;
  3131. list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
  3132. end
  3133. else
  3134. list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
  3135. end;
  3136. else
  3137. if (op in [OP_SUB, OP_ADD]) and
  3138. ((a < 0) or
  3139. (a > 4095)) then
  3140. begin
  3141. tmpreg:=getintregister(list,size);
  3142. a_load_const_reg(list, size, a, tmpreg);
  3143. if cgsetflags or setflags then
  3144. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3145. list.concat(setoppostfix(
  3146. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  3147. end
  3148. else
  3149. begin
  3150. if cgsetflags or setflags then
  3151. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3152. list.concat(setoppostfix(
  3153. taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  3154. end;
  3155. if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
  3156. begin
  3157. ovloc.loc:=LOC_FLAGS;
  3158. case op of
  3159. OP_ADD:
  3160. ovloc.resflags:=F_CS;
  3161. OP_SUB:
  3162. ovloc.resflags:=F_CC;
  3163. end;
  3164. end;
  3165. end
  3166. else
  3167. begin
  3168. { there could be added some more sophisticated optimizations }
  3169. if (op in [OP_MUL,OP_IMUL]) and (a=1) then
  3170. a_load_reg_reg(list,size,size,src,dst)
  3171. else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
  3172. a_load_const_reg(list,size,0,dst)
  3173. else if (op in [OP_IMUL]) and (a=-1) then
  3174. a_op_reg_reg(list,OP_NEG,size,src,dst)
  3175. { we do this here instead in the peephole optimizer because
  3176. it saves us a register }
  3177. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
  3178. a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
  3179. { for example : b=a*5 -> b=a*4+a with add instruction and shl }
  3180. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
  3181. begin
  3182. if l1>32 then{roozbeh does this ever happen?}
  3183. internalerror(200308296);
  3184. shifterop_reset(so);
  3185. so.shiftmode:=SM_LSL;
  3186. so.shiftimm:=l1;
  3187. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
  3188. end
  3189. { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
  3190. else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
  3191. begin
  3192. if l1>32 then{does this ever happen?}
  3193. internalerror(201205181);
  3194. shifterop_reset(so);
  3195. so.shiftmode:=SM_LSL;
  3196. so.shiftimm:=l1;
  3197. list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
  3198. end
  3199. else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
  3200. begin
  3201. { nothing to do on success }
  3202. end
  3203. { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
  3204. Just using mov x, #0 might allow some easier optimizations down the line. }
  3205. else if (op = OP_AND) and (dword(a)=0) then
  3206. list.concat(taicpu.op_reg_const(A_MOV,dst,0))
  3207. { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
  3208. else if (op = OP_AND) and (not(dword(a))=0) then
  3209. list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
  3210. { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
  3211. broader range of shifterconstants.}
  3212. {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
  3213. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
  3214. else if (op = OP_AND) and is_thumb_imm(a) then
  3215. list.concat(taicpu.op_reg_reg_const(A_MOV,dst,src,dword(a)))
  3216. else if (op = OP_AND) and is_thumb_imm(not(dword(a))) then
  3217. list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
  3218. else
  3219. begin
  3220. tmpreg:=getintregister(list,size);
  3221. a_load_const_reg(list,size,a,tmpreg);
  3222. a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
  3223. end;
  3224. end;
  3225. maybeadjustresult(list,op,size,dst);
  3226. end;
  3227. const
  3228. op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
  3229. (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
  3230. A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
  3231. procedure Tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
  3232. var
  3233. so : tshifterop;
  3234. tmpreg,overflowreg : tregister;
  3235. asmop : tasmop;
  3236. begin
  3237. ovloc.loc:=LOC_VOID;
  3238. case op of
  3239. OP_NEG,OP_NOT:
  3240. internalerror(200308281);
  3241. OP_ROL:
  3242. begin
  3243. if not(size in [OS_32,OS_S32]) then
  3244. internalerror(2008072801);
  3245. { simulate ROL by ror'ing 32-value }
  3246. tmpreg:=getintregister(list,OS_32);
  3247. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
  3248. list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
  3249. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  3250. end;
  3251. OP_ROR:
  3252. begin
  3253. if not(size in [OS_32,OS_S32]) then
  3254. internalerror(2008072802);
  3255. list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
  3256. end;
  3257. OP_IMUL,
  3258. OP_MUL:
  3259. begin
  3260. if cgsetflags or setflags then
  3261. begin
  3262. overflowreg:=getintregister(list,size);
  3263. if op=OP_IMUL then
  3264. asmop:=A_SMULL
  3265. else
  3266. asmop:=A_UMULL;
  3267. { the arm doesn't allow that rd and rm are the same }
  3268. if dst=src2 then
  3269. begin
  3270. if dst<>src1 then
  3271. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
  3272. else
  3273. begin
  3274. tmpreg:=getintregister(list,size);
  3275. a_load_reg_reg(list,size,size,src2,dst);
  3276. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
  3277. end;
  3278. end
  3279. else
  3280. list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
  3281. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3282. if op=OP_IMUL then
  3283. begin
  3284. shifterop_reset(so);
  3285. so.shiftmode:=SM_ASR;
  3286. so.shiftimm:=31;
  3287. list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
  3288. end
  3289. else
  3290. list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
  3291. ovloc.loc:=LOC_FLAGS;
  3292. ovloc.resflags:=F_NE;
  3293. end
  3294. else
  3295. begin
  3296. { the arm doesn't allow that rd and rm are the same }
  3297. if dst=src2 then
  3298. begin
  3299. if dst<>src1 then
  3300. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
  3301. else
  3302. begin
  3303. tmpreg:=getintregister(list,size);
  3304. a_load_reg_reg(list,size,size,src2,dst);
  3305. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
  3306. end;
  3307. end
  3308. else
  3309. list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
  3310. end;
  3311. end;
  3312. else
  3313. begin
  3314. if cgsetflags or setflags then
  3315. a_reg_alloc(list,NR_DEFAULTFLAGS);
  3316. list.concat(setoppostfix(
  3317. taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
  3318. end;
  3319. end;
  3320. maybeadjustresult(list,op,size,dst);
  3321. end;
  3322. procedure Tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
  3323. var item: taicpu;
  3324. begin
  3325. list.concat(taicpu.op_cond(A_ITE, flags_to_cond(f)));
  3326. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f)));
  3327. list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
  3328. end;
  3329. procedure Tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  3330. var
  3331. ref : treference;
  3332. shift : byte;
  3333. firstfloatreg,lastfloatreg,
  3334. r : byte;
  3335. regs : tcpuregisterset;
  3336. stackmisalignment: pint;
  3337. begin
  3338. LocalSize:=align(LocalSize,4);
  3339. { call instruction does not put anything on the stack }
  3340. stackmisalignment:=0;
  3341. if not(nostackframe) then
  3342. begin
  3343. firstfloatreg:=RS_NO;
  3344. { save floating point registers? }
  3345. for r:=RS_F0 to RS_F7 do
  3346. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  3347. begin
  3348. if firstfloatreg=RS_NO then
  3349. firstfloatreg:=r;
  3350. lastfloatreg:=r;
  3351. inc(stackmisalignment,12);
  3352. end;
  3353. a_reg_alloc(list,NR_STACK_POINTER_REG);
  3354. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3355. begin
  3356. a_reg_alloc(list,NR_FRAME_POINTER_REG);
  3357. a_reg_alloc(list,NR_R12);
  3358. list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
  3359. end;
  3360. { save int registers }
  3361. reference_reset(ref,4);
  3362. ref.index:=NR_STACK_POINTER_REG;
  3363. ref.addressmode:=AM_PREINDEXED;
  3364. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3365. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3366. regs:=regs+[RS_FRAME_POINTER_REG,RS_R14]
  3367. else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
  3368. include(regs,RS_R14);
  3369. if regs<>[] then
  3370. begin
  3371. for r:=RS_R0 to RS_R15 do
  3372. if (r in regs) then
  3373. inc(stackmisalignment,4);
  3374. list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  3375. end;
  3376. if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
  3377. begin
  3378. { the framepointer now points to the saved R15, so the saved
  3379. framepointer is at R11-12 (for get_caller_frame) }
  3380. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
  3381. a_reg_dealloc(list,NR_R12);
  3382. end;
  3383. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  3384. if (LocalSize<>0) or
  3385. ((stackmisalignment<>0) and
  3386. ((pi_do_call in current_procinfo.flags) or
  3387. (po_assembler in current_procinfo.procdef.procoptions))) then
  3388. begin
  3389. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3390. if not(is_shifter_const(localsize,shift)) then
  3391. begin
  3392. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  3393. a_reg_alloc(list,NR_R12);
  3394. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3395. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
  3396. a_reg_dealloc(list,NR_R12);
  3397. end
  3398. else
  3399. begin
  3400. a_reg_dealloc(list,NR_R12);
  3401. list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
  3402. end;
  3403. end;
  3404. if firstfloatreg<>RS_NO then
  3405. begin
  3406. reference_reset(ref,4);
  3407. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  3408. begin
  3409. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  3410. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  3411. ref.base:=NR_R12;
  3412. end
  3413. else
  3414. begin
  3415. ref.base:=current_procinfo.framepointer;
  3416. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  3417. end;
  3418. list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  3419. lastfloatreg-firstfloatreg+1,ref));
  3420. end;
  3421. end;
  3422. end;
  3423. procedure Tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
  3424. var
  3425. ref : treference;
  3426. firstfloatreg,lastfloatreg,
  3427. r : byte;
  3428. shift : byte;
  3429. regs : tcpuregisterset;
  3430. LocalSize : longint;
  3431. stackmisalignment: pint;
  3432. begin
  3433. if not(nostackframe) then
  3434. begin
  3435. stackmisalignment:=0;
  3436. { restore floating point register }
  3437. firstfloatreg:=RS_NO;
  3438. { save floating point registers? }
  3439. for r:=RS_F0 to RS_F7 do
  3440. if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
  3441. begin
  3442. if firstfloatreg=RS_NO then
  3443. firstfloatreg:=r;
  3444. lastfloatreg:=r;
  3445. { floating point register space is already included in
  3446. localsize below by calc_stackframe_size
  3447. inc(stackmisalignment,12);
  3448. }
  3449. end;
  3450. if firstfloatreg<>RS_NO then
  3451. begin
  3452. reference_reset(ref,4);
  3453. if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
  3454. begin
  3455. a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
  3456. list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
  3457. ref.base:=NR_R12;
  3458. end
  3459. else
  3460. begin
  3461. ref.base:=current_procinfo.framepointer;
  3462. ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
  3463. end;
  3464. list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
  3465. lastfloatreg-firstfloatreg+1,ref));
  3466. end;
  3467. regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
  3468. if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
  3469. begin
  3470. exclude(regs,RS_R14);
  3471. include(regs,RS_R15);
  3472. end;
  3473. if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
  3474. regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
  3475. for r:=RS_R0 to RS_R15 do
  3476. if (r in regs) then
  3477. inc(stackmisalignment,4);
  3478. stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
  3479. LocalSize:=current_procinfo.calc_stackframe_size;
  3480. if (LocalSize<>0) or
  3481. ((stackmisalignment<>0) and
  3482. ((pi_do_call in current_procinfo.flags) or
  3483. (po_assembler in current_procinfo.procdef.procoptions))) then
  3484. begin
  3485. localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
  3486. if not(is_shifter_const(LocalSize,shift)) then
  3487. begin
  3488. a_reg_alloc(list,NR_R12);
  3489. a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
  3490. list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
  3491. a_reg_dealloc(list,NR_R12);
  3492. end
  3493. else
  3494. begin
  3495. a_reg_dealloc(list,NR_R12);
  3496. list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
  3497. end;
  3498. end;
  3499. if regs=[] then
  3500. list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
  3501. else
  3502. begin
  3503. reference_reset(ref,4);
  3504. ref.index:=NR_STACK_POINTER_REG;
  3505. ref.addressmode:=AM_PREINDEXED;
  3506. list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
  3507. end;
  3508. end
  3509. else
  3510. list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
  3511. end;
  3512. function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
  3513. var
  3514. tmpreg : tregister;
  3515. tmpref : treference;
  3516. l : tasmlabel;
  3517. so: tshifterop;
  3518. begin
  3519. tmpreg:=NR_NO;
  3520. { Be sure to have a base register }
  3521. if (ref.base=NR_NO) then
  3522. begin
  3523. if ref.shiftmode<>SM_None then
  3524. internalerror(200308294);
  3525. ref.base:=ref.index;
  3526. ref.index:=NR_NO;
  3527. end;
  3528. { absolute symbols can't be handled directly, we've to store the symbol reference
  3529. in the text segment and access it pc relative
  3530. For now, we assume that references where base or index equals to PC are already
  3531. relative, all other references are assumed to be absolute and thus they need
  3532. to be handled extra.
  3533. A proper solution would be to change refoptions to a set and store the information
  3534. if the symbol is absolute or relative there.
  3535. }
  3536. if (assigned(ref.symbol) and
  3537. not(is_pc(ref.base)) and
  3538. not(is_pc(ref.index))
  3539. ) or
  3540. { [#xxx] isn't a valid address operand }
  3541. ((ref.base=NR_NO) and (ref.index=NR_NO)) or
  3542. //(ref.offset<-4095) or
  3543. (ref.offset<-255) or
  3544. (ref.offset>4095) or
  3545. ((oppostfix in [PF_SB,PF_H,PF_SH]) and
  3546. ((ref.offset<-255) or
  3547. (ref.offset>255)
  3548. )
  3549. ) or
  3550. ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) and
  3551. ((ref.offset<-1020) or
  3552. (ref.offset>1020) or
  3553. { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
  3554. assigned(ref.symbol)
  3555. )
  3556. ) then
  3557. begin
  3558. reference_reset(tmpref,4);
  3559. { load symbol }
  3560. tmpreg:=getintregister(list,OS_INT);
  3561. if assigned(ref.symbol) then
  3562. begin
  3563. current_asmdata.getjumplabel(l);
  3564. cg.a_label(current_procinfo.aktlocaldata,l);
  3565. tmpref.symboldata:=current_procinfo.aktlocaldata.last;
  3566. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
  3567. { load consts entry }
  3568. tmpref.symbol:=l;
  3569. tmpref.base:=NR_R15;
  3570. list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
  3571. { in case of LDF/STF, we got rid of the NR_R15 }
  3572. if is_pc(ref.base) then
  3573. ref.base:=NR_NO;
  3574. if is_pc(ref.index) then
  3575. ref.index:=NR_NO;
  3576. end
  3577. else
  3578. a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
  3579. if (ref.base<>NR_NO) then
  3580. begin
  3581. if ref.index<>NR_NO then
  3582. begin
  3583. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  3584. ref.base:=tmpreg;
  3585. end
  3586. else
  3587. begin
  3588. ref.index:=tmpreg;
  3589. ref.shiftimm:=0;
  3590. ref.signindex:=1;
  3591. ref.shiftmode:=SM_None;
  3592. end;
  3593. end
  3594. else
  3595. ref.base:=tmpreg;
  3596. ref.offset:=0;
  3597. ref.symbol:=nil;
  3598. end;
  3599. if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
  3600. begin
  3601. if tmpreg<>NR_NO then
  3602. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
  3603. else
  3604. begin
  3605. tmpreg:=getintregister(list,OS_ADDR);
  3606. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
  3607. ref.base:=tmpreg;
  3608. end;
  3609. ref.offset:=0;
  3610. end;
  3611. { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
  3612. if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
  3613. begin
  3614. tmpreg:=getintregister(list,OS_ADDR);
  3615. list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
  3616. ref.base := tmpreg;
  3617. end;
  3618. { floating point operations have only limited references
  3619. we expect here, that a base is already set }
  3620. if (op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) and (ref.index<>NR_NO) then
  3621. begin
  3622. if ref.shiftmode<>SM_none then
  3623. internalerror(200309121);
  3624. if tmpreg<>NR_NO then
  3625. begin
  3626. if ref.base=tmpreg then
  3627. begin
  3628. if ref.signindex<0 then
  3629. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
  3630. else
  3631. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
  3632. ref.index:=NR_NO;
  3633. end
  3634. else
  3635. begin
  3636. if ref.index<>tmpreg then
  3637. internalerror(200403161);
  3638. if ref.signindex<0 then
  3639. list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
  3640. else
  3641. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  3642. ref.base:=tmpreg;
  3643. ref.index:=NR_NO;
  3644. end;
  3645. end
  3646. else
  3647. begin
  3648. tmpreg:=getintregister(list,OS_ADDR);
  3649. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
  3650. ref.base:=tmpreg;
  3651. ref.index:=NR_NO;
  3652. end;
  3653. end;
  3654. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  3655. Result := ref;
  3656. end;
  3657. procedure Tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  3658. var
  3659. instr: taicpu;
  3660. begin
  3661. if (fromsize=OS_F32) and
  3662. (tosize=OS_F32) then
  3663. begin
  3664. instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
  3665. list.Concat(instr);
  3666. add_move_instruction(instr);
  3667. end
  3668. else if (fromsize=OS_F64) and
  3669. (tosize=OS_F64) then
  3670. begin
  3671. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
  3672. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
  3673. end
  3674. else if (fromsize=OS_F32) and
  3675. (tosize=OS_F64) then
  3676. //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
  3677. begin
  3678. //list.concat(nil);
  3679. end;
  3680. end;
  3681. procedure Tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  3682. var
  3683. href: treference;
  3684. tmpreg: TRegister;
  3685. so: tshifterop;
  3686. begin
  3687. href:=ref;
  3688. if (href.base<>NR_NO) and
  3689. (href.index<>NR_NO) then
  3690. begin
  3691. tmpreg:=getintregister(list,OS_INT);
  3692. if href.shiftmode<>SM_None then
  3693. begin
  3694. so.rs:=href.index;
  3695. so.shiftimm:=href.shiftimm;
  3696. so.shiftmode:=href.shiftmode;
  3697. list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
  3698. end
  3699. else
  3700. a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
  3701. reference_reset_base(href,tmpreg,href.offset,0);
  3702. end;
  3703. if assigned(href.symbol) then
  3704. begin
  3705. tmpreg:=getintregister(list,OS_INT);
  3706. a_loadaddr_ref_reg(list,href,tmpreg);
  3707. reference_reset_base(href,tmpreg,0,0);
  3708. end;
  3709. if fromsize=OS_F32 then
  3710. list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F32))
  3711. else
  3712. list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F64));
  3713. end;
  3714. procedure Tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  3715. var
  3716. href: treference;
  3717. so: tshifterop;
  3718. tmpreg: TRegister;
  3719. begin
  3720. href:=ref;
  3721. if (href.base<>NR_NO) and
  3722. (href.index<>NR_NO) then
  3723. begin
  3724. tmpreg:=getintregister(list,OS_INT);
  3725. if href.shiftmode<>SM_None then
  3726. begin
  3727. so.rs:=href.index;
  3728. so.shiftimm:=href.shiftimm;
  3729. so.shiftmode:=href.shiftmode;
  3730. list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
  3731. end
  3732. else
  3733. a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
  3734. reference_reset_base(href,tmpreg,href.offset,0);
  3735. end;
  3736. if assigned(href.symbol) then
  3737. begin
  3738. tmpreg:=getintregister(list,OS_INT);
  3739. a_loadaddr_ref_reg(list,href,tmpreg);
  3740. reference_reset_base(href,tmpreg,0,0);
  3741. end;
  3742. if fromsize=OS_F32 then
  3743. list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_32))
  3744. else
  3745. list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_64));
  3746. end;
  3747. procedure Tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  3748. begin
  3749. if //(shuffle=nil) and
  3750. (tosize=OS_F32) then
  3751. list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
  3752. else
  3753. internalerror(2012100813);
  3754. end;
  3755. procedure Tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  3756. begin
  3757. if //(shuffle=nil) and
  3758. (fromsize=OS_F32) then
  3759. list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
  3760. else
  3761. internalerror(2012100814);
  3762. end;
  3763. procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
  3764. var tmpreg: tregister;
  3765. begin
  3766. case op of
  3767. OP_NEG:
  3768. begin
  3769. cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
  3770. list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
  3771. tmpreg:=cg.getintregister(list,OS_32);
  3772. list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
  3773. list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
  3774. cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
  3775. end;
  3776. else
  3777. inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
  3778. end;
  3779. end;
  3780. procedure create_codegen;
  3781. begin
  3782. if current_settings.cputype in cpu_thumb2 then
  3783. begin
  3784. cg:=tthumb2cgarm.create;
  3785. cg64:=tthumb2cg64farm.create;
  3786. casmoptimizer:=TCpuThumb2AsmOptimizer;
  3787. end
  3788. else
  3789. begin
  3790. cg:=tarmcgarm.create;
  3791. cg64:=tcg64farm.create;
  3792. casmoptimizer:=TCpuAsmOptimizer;
  3793. end;
  3794. end;
  3795. end.