PageRenderTime 53ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/gdb-linaro-dev/sim/ppc/altivec.igen

https://bitbucket.org/codefirex/toolchain_gdb
Unknown | 2359 lines | 2086 code | 273 blank | 0 comment | 0 complexity | 17c7ed3c7a3807f69d71e411378c2869 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, LGPL-2.0, GPL-3.0, LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. # Altivec instruction set, for PSIM, the PowerPC simulator.
  2. # Copyright 2003-2013 Free Software Foundation, Inc.
  3. # Contributed by Red Hat Inc; developed under contract from Motorola.
  4. # Written by matthew green <mrg@redhat.com>.
  5. # This file is part of GDB.
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 3 of the License, or
  9. # (at your option) any later version.
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>. */
  16. #
  17. # Motorola AltiVec instructions.
  18. #
  19. :cache:av:::VS:VS:
  20. :cache:av::vreg *:vS:VS:(cpu_registers(processor)->altivec.vr + VS)
  21. :cache:av::unsigned32:VS_BITMASK:VS:(1 << VS)
  22. :cache:av:::VA:VA:
  23. :cache:av::vreg *:vA:VA:(cpu_registers(processor)->altivec.vr + VA)
  24. :cache:av::unsigned32:VA_BITMASK:VA:(1 << VA)
  25. :cache:av:::VB:VB:
  26. :cache:av::vreg *:vB:VB:(cpu_registers(processor)->altivec.vr + VB)
  27. :cache:av::unsigned32:VB_BITMASK:VB:(1 << VB)
  28. :cache:av:::VC:VC:
  29. :cache:av::vreg *:vC:VC:(cpu_registers(processor)->altivec.vr + VC)
  30. :cache:av::unsigned32:VC_BITMASK:VC:(1 << VC)
  31. # Flags for model.h
  32. ::model-macro:::
  33. #define PPC_INSN_INT_VR(OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK) \
  34. do { \
  35. if (CURRENT_MODEL_ISSUE > 0) \
  36. ppc_insn_int_vr(MY_INDEX, cpu_model(processor), OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK); \
  37. } while (0)
  38. #define PPC_INSN_VR(OUT_VMASK, IN_VMASK) \
  39. do { \
  40. if (CURRENT_MODEL_ISSUE > 0) \
  41. ppc_insn_vr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \
  42. } while (0)
  43. #define PPC_INSN_VR_CR(OUT_VMASK, IN_VMASK, CR_MASK) \
  44. do { \
  45. if (CURRENT_MODEL_ISSUE > 0) \
  46. ppc_insn_vr_cr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK, CR_MASK); \
  47. } while (0)
  48. #define PPC_INSN_VR_VSCR(OUT_VMASK, IN_VMASK) \
  49. do { \
  50. if (CURRENT_MODEL_ISSUE > 0) \
  51. ppc_insn_vr_vscr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \
  52. } while (0)
  53. #define PPC_INSN_FROM_VSCR(VR_MASK) \
  54. do { \
  55. if (CURRENT_MODEL_ISSUE > 0) \
  56. ppc_insn_from_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \
  57. } while (0)
  58. #define PPC_INSN_TO_VSCR(VR_MASK) \
  59. do { \
  60. if (CURRENT_MODEL_ISSUE > 0) \
  61. ppc_insn_to_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \
  62. } while (0)
  63. # Trace waiting for AltiVec registers to become available
  64. void::model-static::model_trace_altivec_busy_p:model_data *model_ptr, unsigned32 vr_busy
  65. int i;
  66. if (vr_busy) {
  67. vr_busy &= model_ptr->vr_busy;
  68. for(i = 0; i < 32; i++) {
  69. if (((1 << i) & vr_busy) != 0) {
  70. TRACE(trace_model, ("Waiting for register v%d.\n", i));
  71. }
  72. }
  73. }
  74. if (model_ptr->vscr_busy)
  75. TRACE(trace_model, ("Waiting for VSCR\n"));
  76. # Trace making AltiVec registers busy
  77. void::model-static::model_trace_altivec_make_busy:model_data *model_ptr, unsigned32 vr_mask, unsigned32 cr_mask
  78. int i;
  79. if (vr_mask) {
  80. for(i = 0; i < 32; i++) {
  81. if (((1 << i) & vr_mask) != 0) {
  82. TRACE(trace_model, ("Register v%d is now busy.\n", i));
  83. }
  84. }
  85. }
  86. if (cr_mask) {
  87. for(i = 0; i < 8; i++) {
  88. if (((1 << i) & cr_mask) != 0) {
  89. TRACE(trace_model, ("Register cr%d is now busy.\n", i));
  90. }
  91. }
  92. }
  93. # Schedule an AltiVec instruction that takes integer input registers and produces output registers
  94. void::model-function::ppc_insn_int_vr:itable_index index, model_data *model_ptr, const unsigned32 out_mask, const unsigned32 in_mask, const unsigned32 out_vmask, const unsigned32 in_vmask
  95. const unsigned32 int_mask = out_mask | in_mask;
  96. const unsigned32 vr_mask = out_vmask | in_vmask;
  97. model_busy *busy_ptr;
  98. if ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) {
  99. model_new_cycle(model_ptr); /* don't count first dependency as a stall */
  100. while ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) {
  101. if (WITH_TRACE && ppc_trace[trace_model]) {
  102. model_trace_busy_p(model_ptr, int_mask, 0, 0, PPC_NO_SPR);
  103. model_trace_altivec_busy_p(model_ptr, vr_mask);
  104. }
  105. model_ptr->nr_stalls_data++;
  106. model_new_cycle(model_ptr);
  107. }
  108. }
  109. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  110. model_ptr->int_busy |= out_mask;
  111. busy_ptr->int_busy |= out_mask;
  112. model_ptr->vr_busy |= out_vmask;
  113. busy_ptr->vr_busy |= out_vmask;
  114. if (out_mask)
  115. busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
  116. if (out_vmask)
  117. busy_ptr->nr_writebacks += (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
  118. if (WITH_TRACE && ppc_trace[trace_model]) {
  119. model_trace_make_busy(model_ptr, out_mask, 0, 0);
  120. model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
  121. }
  122. # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers
  123. void::model-function::ppc_insn_vr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask
  124. const unsigned32 vr_mask = out_vmask | in_vmask;
  125. model_busy *busy_ptr;
  126. if (model_ptr->vr_busy & vr_mask) {
  127. model_new_cycle(model_ptr); /* don't count first dependency as a stall */
  128. while (model_ptr->vr_busy & vr_mask) {
  129. if (WITH_TRACE && ppc_trace[trace_model]) {
  130. model_trace_altivec_busy_p(model_ptr, vr_mask);
  131. }
  132. model_ptr->nr_stalls_data++;
  133. model_new_cycle(model_ptr);
  134. }
  135. }
  136. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  137. model_ptr->vr_busy |= out_vmask;
  138. busy_ptr->vr_busy |= out_vmask;
  139. if (out_vmask)
  140. busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
  141. if (WITH_TRACE && ppc_trace[trace_model]) {
  142. model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
  143. }
  144. # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches CR
  145. void::model-function::ppc_insn_vr_cr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask, const unsigned32 cr_mask
  146. const unsigned32 vr_mask = out_vmask | in_vmask;
  147. model_busy *busy_ptr;
  148. if ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) {
  149. model_new_cycle(model_ptr); /* don't count first dependency as a stall */
  150. while ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) {
  151. if (WITH_TRACE && ppc_trace[trace_model]) {
  152. model_trace_busy_p(model_ptr, 0, 0, cr_mask, PPC_NO_SPR);
  153. model_trace_altivec_busy_p(model_ptr, vr_mask);
  154. }
  155. model_ptr->nr_stalls_data++;
  156. model_new_cycle(model_ptr);
  157. }
  158. }
  159. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  160. model_ptr->cr_fpscr_busy |= cr_mask;
  161. busy_ptr->cr_fpscr_busy |= cr_mask;
  162. model_ptr->vr_busy |= out_vmask;
  163. busy_ptr->vr_busy |= out_vmask;
  164. if (out_vmask)
  165. busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
  166. if (cr_mask)
  167. busy_ptr->nr_writebacks++;
  168. if (WITH_TRACE && ppc_trace[trace_model])
  169. model_trace_altivec_make_busy(model_ptr, vr_mask, cr_mask);
  170. # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches VSCR
  171. void::model-function::ppc_insn_vr_vscr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask
  172. const unsigned32 vr_mask = out_vmask | in_vmask;
  173. model_busy *busy_ptr;
  174. if ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
  175. model_new_cycle(model_ptr); /* don't count first dependency as a stall */
  176. while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
  177. if (WITH_TRACE && ppc_trace[trace_model])
  178. model_trace_altivec_busy_p(model_ptr, vr_mask);
  179. model_ptr->nr_stalls_data++;
  180. model_new_cycle(model_ptr);
  181. }
  182. }
  183. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  184. model_ptr->vr_busy |= out_vmask;
  185. busy_ptr->vr_busy |= out_vmask;
  186. model_ptr->vscr_busy = 1;
  187. busy_ptr->vscr_busy = 1;
  188. if (out_vmask)
  189. busy_ptr->nr_writebacks = 1 + (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
  190. if (WITH_TRACE && ppc_trace[trace_model])
  191. model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
  192. # Schedule an MFVSCR instruction that VSCR input register and produces an AltiVec output register
  193. void::model-function::ppc_insn_from_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask
  194. model_busy *busy_ptr;
  195. while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
  196. if (WITH_TRACE && ppc_trace[trace_model])
  197. model_trace_altivec_busy_p(model_ptr, vr_mask);
  198. model_ptr->nr_stalls_data++;
  199. model_new_cycle(model_ptr);
  200. }
  201. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  202. model_ptr->cr_fpscr_busy |= vr_mask;
  203. busy_ptr->cr_fpscr_busy |= vr_mask;
  204. if (vr_mask)
  205. busy_ptr->nr_writebacks = 1;
  206. model_ptr->vr_busy |= vr_mask;
  207. if (WITH_TRACE && ppc_trace[trace_model])
  208. model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
  209. # Schedule an MTVSCR instruction that one AltiVec input register and produces a vscr output register
  210. void::model-function::ppc_insn_to_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask
  211. model_busy *busy_ptr;
  212. while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
  213. if (WITH_TRACE && ppc_trace[trace_model])
  214. model_trace_altivec_busy_p(model_ptr, vr_mask);
  215. model_ptr->nr_stalls_data++;
  216. model_new_cycle(model_ptr);
  217. }
  218. busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
  219. busy_ptr ->vscr_busy = 1;
  220. model_ptr->vscr_busy = 1;
  221. busy_ptr->nr_writebacks = 1;
  222. TRACE(trace_model,("Making VSCR busy.\n"));
  223. # The follow are AltiVec saturate operations
  224. signed8::model-function::altivec_signed_saturate_8:signed16 val, int *sat
  225. signed8 rv;
  226. if (val > 127) {
  227. rv = 127;
  228. *sat = 1;
  229. } else if (val < -128) {
  230. rv = -128;
  231. *sat = 1;
  232. } else {
  233. rv = val;
  234. *sat = 0;
  235. }
  236. return rv;
  237. signed16::model-function::altivec_signed_saturate_16:signed32 val, int *sat
  238. signed16 rv;
  239. if (val > 32767) {
  240. rv = 32767;
  241. *sat = 1;
  242. } else if (val < -32768) {
  243. rv = -32768;
  244. *sat = 1;
  245. } else {
  246. rv = val;
  247. *sat = 0;
  248. }
  249. return rv;
  250. signed32::model-function::altivec_signed_saturate_32:signed64 val, int *sat
  251. signed32 rv;
  252. if (val > 2147483647) {
  253. rv = 2147483647;
  254. *sat = 1;
  255. } else if (val < -2147483648LL) {
  256. rv = -2147483648LL;
  257. *sat = 1;
  258. } else {
  259. rv = val;
  260. *sat = 0;
  261. }
  262. return rv;
  263. unsigned8::model-function::altivec_unsigned_saturate_8:signed16 val, int *sat
  264. unsigned8 rv;
  265. if (val > 255) {
  266. rv = 255;
  267. *sat = 1;
  268. } else if (val < 0) {
  269. rv = 0;
  270. *sat = 1;
  271. } else {
  272. rv = val;
  273. *sat = 0;
  274. }
  275. return rv;
  276. unsigned16::model-function::altivec_unsigned_saturate_16:signed32 val, int *sat
  277. unsigned16 rv;
  278. if (val > 65535) {
  279. rv = 65535;
  280. *sat = 1;
  281. } else if (val < 0) {
  282. rv = 0;
  283. *sat = 1;
  284. } else {
  285. rv = val;
  286. *sat = 0;
  287. }
  288. return rv;
  289. unsigned32::model-function::altivec_unsigned_saturate_32:signed64 val, int *sat
  290. unsigned32 rv;
  291. if (val > 4294967295LL) {
  292. rv = 4294967295LL;
  293. *sat = 1;
  294. } else if (val < 0) {
  295. rv = 0;
  296. *sat = 1;
  297. } else {
  298. rv = val;
  299. *sat = 0;
  300. }
  301. return rv;
  302. #
  303. # Load instructions, 6-14 ... 6-22.
  304. #
  305. 0.31,6.VS,11.RA,16.RB,21.7,31.0:X:av:lvebx %VD, %RA, %RB:Load Vector Element Byte Indexed
  306. unsigned_word b;
  307. unsigned_word EA;
  308. unsigned_word eb;
  309. if (RA_is_0) b = 0;
  310. else b = *rA;
  311. EA = b + *rB;
  312. eb = EA & 0xf;
  313. (*vS).b[AV_BINDEX(eb)] = MEM(unsigned, EA, 1);
  314. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  315. 0.31,6.VS,11.RA,16.RB,21.39,31.0:X:av:lvehx %VD, %RA, %RB:Load Vector Element Half Word Indexed
  316. unsigned_word b;
  317. unsigned_word EA;
  318. unsigned_word eb;
  319. if (RA_is_0) b = 0;
  320. else b = *rA;
  321. EA = (b + *rB) & ~1;
  322. eb = EA & 0xf;
  323. (*vS).h[AV_HINDEX(eb/2)] = MEM(unsigned, EA, 2);
  324. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  325. 0.31,6.VS,11.RA,16.RB,21.71,31.0:X:av:lvewx %VD, %RA, %RB:Load Vector Element Word Indexed
  326. unsigned_word b;
  327. unsigned_word EA;
  328. unsigned_word eb;
  329. if (RA_is_0) b = 0;
  330. else b = *rA;
  331. EA = (b + *rB) & ~3;
  332. eb = EA & 0xf;
  333. (*vS).w[eb/4] = MEM(unsigned, EA, 4);
  334. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  335. 0.31,6.VS,11.RA,16.RB,21.6,31.0:X:av:lvsl %VD, %RA, %RB:Load Vector for Shift Left
  336. unsigned_word b;
  337. unsigned_word addr;
  338. int i, j;
  339. if (RA_is_0) b = 0;
  340. else b = *rA;
  341. addr = b + *rB;
  342. j = addr & 0xf;
  343. for (i = 0; i < 16; i++)
  344. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
  345. (*vS).b[AV_BINDEX(i)] = j++;
  346. else
  347. (*vS).b[AV_BINDEX(15 - i)] = j++;
  348. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  349. 0.31,6.VS,11.RA,16.RB,21.38,31.0:X:av:lvsr %VD, %RA, %RB:Load Vector for Shift Right
  350. unsigned_word b;
  351. unsigned_word addr;
  352. int i, j;
  353. if (RA_is_0) b = 0;
  354. else b = *rA;
  355. addr = b + *rB;
  356. j = 0x10 - (addr & 0xf);
  357. for (i = 0; i < 16; i++)
  358. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
  359. (*vS).b[AV_BINDEX(i)] = j++;
  360. else
  361. (*vS).b[AV_BINDEX(15 - i)] = j++;
  362. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  363. 0.31,6.VS,11.RA,16.RB,21.103,31.0:X:av:lvx %VD, %RA, %RB:Load Vector Indexed
  364. unsigned_word b;
  365. unsigned_word EA;
  366. if (RA_is_0) b = 0;
  367. else b = *rA;
  368. EA = (b + *rB) & ~0xf;
  369. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
  370. (*vS).w[0] = MEM(unsigned, EA + 0, 4);
  371. (*vS).w[1] = MEM(unsigned, EA + 4, 4);
  372. (*vS).w[2] = MEM(unsigned, EA + 8, 4);
  373. (*vS).w[3] = MEM(unsigned, EA + 12, 4);
  374. } else {
  375. (*vS).w[0] = MEM(unsigned, EA + 12, 4);
  376. (*vS).w[1] = MEM(unsigned, EA + 8, 4);
  377. (*vS).w[2] = MEM(unsigned, EA + 4, 4);
  378. (*vS).w[3] = MEM(unsigned, EA + 0, 4);
  379. }
  380. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  381. 0.31,6.VS,11.RA,16.RB,21.359,31.0:X:av:lvxl %VD, %RA, %RB:Load Vector Indexed LRU
  382. unsigned_word b;
  383. unsigned_word EA;
  384. if (RA_is_0) b = 0;
  385. else b = *rA;
  386. EA = (b + *rB) & ~0xf;
  387. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
  388. (*vS).w[0] = MEM(unsigned, EA + 0, 4);
  389. (*vS).w[1] = MEM(unsigned, EA + 4, 4);
  390. (*vS).w[2] = MEM(unsigned, EA + 8, 4);
  391. (*vS).w[3] = MEM(unsigned, EA + 12, 4);
  392. } else {
  393. (*vS).w[0] = MEM(unsigned, EA + 12, 4);
  394. (*vS).w[1] = MEM(unsigned, EA + 8, 4);
  395. (*vS).w[2] = MEM(unsigned, EA + 4, 4);
  396. (*vS).w[3] = MEM(unsigned, EA + 0, 4);
  397. }
  398. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  399. #
  400. # Move to/from VSCR instructions, 6-23 & 6-24.
  401. #
  402. 0.4,6.VS,11.0,16.0,21.1540:VX:av:mfvscr %VS:Move from Vector Status and Control Register
  403. (*vS).w[0] = 0;
  404. (*vS).w[1] = 0;
  405. (*vS).w[2] = 0;
  406. (*vS).w[3] = VSCR;
  407. PPC_INSN_FROM_VSCR(VS_BITMASK);
  408. 0.4,6.0,11.0,16.VB,21.1604:VX:av:mtvscr %VB:Move to Vector Status and Control Register
  409. VSCR = (*vB).w[3];
  410. PPC_INSN_TO_VSCR(VB_BITMASK);
  411. #
  412. # Store instructions, 6-25 ... 6-29.
  413. #
  414. 0.31,6.VS,11.RA,16.RB,21.135,31.0:X:av:stvebx %VD, %RA, %RB:Store Vector Element Byte Indexed
  415. unsigned_word b;
  416. unsigned_word EA;
  417. unsigned_word eb;
  418. if (RA_is_0) b = 0;
  419. else b = *rA;
  420. EA = b + *rB;
  421. eb = EA & 0xf;
  422. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
  423. STORE(EA, 1, (*vS).b[eb]);
  424. else
  425. STORE(EA, 1, (*vS).b[15-eb]);
  426. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  427. 0.31,6.VS,11.RA,16.RB,21.167,31.0:X:av:stvehx %VD, %RA, %RB:Store Vector Element Half Word Indexed
  428. unsigned_word b;
  429. unsigned_word EA;
  430. unsigned_word eb;
  431. if (RA_is_0) b = 0;
  432. else b = *rA;
  433. EA = (b + *rB) & ~1;
  434. eb = EA & 0xf;
  435. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
  436. STORE(EA, 2, (*vS).h[eb/2]);
  437. else
  438. STORE(EA, 2, (*vS).h[7-eb]);
  439. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  440. 0.31,6.VS,11.RA,16.RB,21.199,31.0:X:av:stvewx %VD, %RA, %RB:Store Vector Element Word Indexed
  441. unsigned_word b;
  442. unsigned_word EA;
  443. unsigned_word eb;
  444. if (RA_is_0) b = 0;
  445. else b = *rA;
  446. EA = (b + *rB) & ~3;
  447. eb = EA & 0xf;
  448. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
  449. STORE(EA, 4, (*vS).w[eb/4]);
  450. else
  451. STORE(EA, 4, (*vS).w[3-(eb/4)]);
  452. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  453. 0.31,6.VS,11.RA,16.RB,21.231,31.0:X:av:stvx %VD, %RA, %RB:Store Vector Indexed
  454. unsigned_word b;
  455. unsigned_word EA;
  456. if (RA_is_0) b = 0;
  457. else b = *rA;
  458. EA = (b + *rB) & ~0xf;
  459. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
  460. STORE(EA + 0, 4, (*vS).w[0]);
  461. STORE(EA + 4, 4, (*vS).w[1]);
  462. STORE(EA + 8, 4, (*vS).w[2]);
  463. STORE(EA + 12, 4, (*vS).w[3]);
  464. } else {
  465. STORE(EA + 12, 4, (*vS).w[0]);
  466. STORE(EA + 8, 4, (*vS).w[1]);
  467. STORE(EA + 4, 4, (*vS).w[2]);
  468. STORE(EA + 0, 4, (*vS).w[3]);
  469. }
  470. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  471. 0.31,6.VS,11.RA,16.RB,21.487,31.0:X:av:stvxl %VD, %RA, %RB:Store Vector Indexed LRU
  472. unsigned_word b;
  473. unsigned_word EA;
  474. if (RA_is_0) b = 0;
  475. else b = *rA;
  476. EA = (b + *rB) & ~0xf;
  477. if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
  478. STORE(EA + 0, 4, (*vS).w[0]);
  479. STORE(EA + 4, 4, (*vS).w[1]);
  480. STORE(EA + 8, 4, (*vS).w[2]);
  481. STORE(EA + 12, 4, (*vS).w[3]);
  482. } else {
  483. STORE(EA + 12, 4, (*vS).w[0]);
  484. STORE(EA + 8, 4, (*vS).w[1]);
  485. STORE(EA + 4, 4, (*vS).w[2]);
  486. STORE(EA + 0, 4, (*vS).w[3]);
  487. }
  488. PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
  489. #
  490. # Vector Add instructions, 6-30 ... 6-40.
  491. #
  492. 0.4,6.VS,11.VA,16.VB,21.384:VX:av:vaddcuw %VD, %VA, %VB:Vector Add Carryout Unsigned Word
  493. unsigned64 temp;
  494. int i;
  495. for (i = 0; i < 4; i++) {
  496. temp = (unsigned64)(*vA).w[i] + (unsigned64)(*vB).w[i];
  497. (*vS).w[i] = temp >> 32;
  498. }
  499. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  500. 0.4,6.VS,11.VA,16.VB,21.10:VX:av:vaddfp %VD, %VA, %VB:Vector Add Floating Point
  501. int i;
  502. unsigned32 f;
  503. sim_fpu a, b, d;
  504. for (i = 0; i < 4; i++) {
  505. sim_fpu_32to (&a, (*vA).w[i]);
  506. sim_fpu_32to (&b, (*vB).w[i]);
  507. sim_fpu_add (&d, &a, &b);
  508. sim_fpu_to32 (&f, &d);
  509. (*vS).w[i] = f;
  510. }
  511. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  512. 0.4,6.VS,11.VA,16.VB,21.768:VX:av:vaddsbs %VD, %VA, %VB:Vector Add Signed Byte Saturate
  513. int i, sat, tempsat;
  514. signed16 temp;
  515. for (i = 0; i < 16; i++) {
  516. temp = (signed16)(signed8)(*vA).b[i] + (signed16)(signed8)(*vB).b[i];
  517. (*vS).b[i] = altivec_signed_saturate_8(temp, &tempsat);
  518. sat |= tempsat;
  519. }
  520. ALTIVEC_SET_SAT(sat);
  521. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  522. 0.4,6.VS,11.VA,16.VB,21.832:VX:av:vaddshs %VD, %VA, %VB:Vector Add Signed Half Word Saturate
  523. int i, sat, tempsat;
  524. signed32 temp, a, b;
  525. for (i = 0; i < 8; i++) {
  526. a = (signed32)(signed16)(*vA).h[i];
  527. b = (signed32)(signed16)(*vB).h[i];
  528. temp = a + b;
  529. (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
  530. sat |= tempsat;
  531. }
  532. ALTIVEC_SET_SAT(sat);
  533. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  534. 0.4,6.VS,11.VA,16.VB,21.896:VX:av:vaddsws %VD, %VA, %VB:Vector Add Signed Word Saturate
  535. int i, sat, tempsat;
  536. signed64 temp;
  537. for (i = 0; i < 4; i++) {
  538. temp = (signed64)(signed32)(*vA).w[i] + (signed64)(signed32)(*vB).w[i];
  539. (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
  540. sat |= tempsat;
  541. }
  542. ALTIVEC_SET_SAT(sat);
  543. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  544. 0.4,6.VS,11.VA,16.VB,21.0:VX:av:vaddubm %VD, %VA, %VB:Vector Add Unsigned Byte Modulo
  545. int i;
  546. for (i = 0; i < 16; i++)
  547. (*vS).b[i] = ((*vA).b[i] + (*vB).b[i]) & 0xff;
  548. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  549. 0.4,6.VS,11.VA,16.VB,21.512:VX:av:vaddubs %VD, %VA, %VB:Vector Add Unsigned Byte Saturate
  550. int i, sat, tempsat;
  551. signed16 temp;
  552. sat = 0;
  553. for (i = 0; i < 16; i++) {
  554. temp = (signed16)(unsigned8)(*vA).b[i] + (signed16)(unsigned8)(*vB).b[i];
  555. (*vS).b[i] = altivec_unsigned_saturate_8(temp, &tempsat);
  556. sat |= tempsat;
  557. }
  558. ALTIVEC_SET_SAT(sat);
  559. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  560. 0.4,6.VS,11.VA,16.VB,21.64:VX:av:vadduhm %VD, %VA, %VB:Vector Add Unsigned Half Word Modulo
  561. int i;
  562. for (i = 0; i < 8; i++)
  563. (*vS).h[i] = ((*vA).h[i] + (*vB).h[i]) & 0xffff;
  564. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  565. 0.4,6.VS,11.VA,16.VB,21.576:VX:av:vadduhs %VD, %VA, %VB:Vector Add Unsigned Half Word Saturate
  566. int i, sat, tempsat;
  567. signed32 temp;
  568. for (i = 0; i < 8; i++) {
  569. temp = (signed32)(unsigned16)(*vA).h[i] + (signed32)(unsigned16)(*vB).h[i];
  570. (*vS).h[i] = altivec_unsigned_saturate_16(temp, &tempsat);
  571. sat |= tempsat;
  572. }
  573. ALTIVEC_SET_SAT(sat);
  574. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  575. 0.4,6.VS,11.VA,16.VB,21.128:VX:av:vadduwm %VD, %VA, %VB:Vector Add Unsigned Word Modulo
  576. int i;
  577. for (i = 0; i < 4; i++)
  578. (*vS).w[i] = (*vA).w[i] + (*vB).w[i];
  579. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  580. 0.4,6.VS,11.VA,16.VB,21.640:VX:av:vadduws %VD, %VA, %VB:Vector Add Unsigned Word Saturate
  581. int i, sat, tempsat;
  582. signed64 temp;
  583. for (i = 0; i < 4; i++) {
  584. temp = (signed64)(unsigned32)(*vA).w[i] + (signed64)(unsigned32)(*vB).w[i];
  585. (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
  586. sat |= tempsat;
  587. }
  588. ALTIVEC_SET_SAT(sat);
  589. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  590. #
  591. # Vector AND instructions, 6-41, 6-42
  592. #
  593. 0.4,6.VS,11.VA,16.VB,21.1028:VX:av:vand %VD, %VA, %VB:Vector Logical AND
  594. int i;
  595. for (i = 0; i < 4; i++)
  596. (*vS).w[i] = (*vA).w[i] & (*vB).w[i];
  597. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  598. 0.4,6.VS,11.VA,16.VB,21.1092:VX:av:vandc %VD, %VA, %VB:Vector Logical AND with Compliment
  599. int i;
  600. for (i = 0; i < 4; i++)
  601. (*vS).w[i] = (*vA).w[i] & ~((*vB).w[i]);
  602. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  603. #
  604. # Vector Average instructions, 6-43, 6-48
  605. #
  606. 0.4,6.VS,11.VA,16.VB,21.1282:VX:av:vavgsb %VD, %VA, %VB:Vector Average Signed Byte
  607. int i;
  608. signed16 temp, a, b;
  609. for (i = 0; i < 16; i++) {
  610. a = (signed16)(signed8)(*vA).b[i];
  611. b = (signed16)(signed8)(*vB).b[i];
  612. temp = a + b + 1;
  613. (*vS).b[i] = (temp >> 1) & 0xff;
  614. }
  615. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  616. 0.4,6.VS,11.VA,16.VB,21.1346:VX:av:vavgsh %VD, %VA, %VB:Vector Average Signed Half Word
  617. int i;
  618. signed32 temp, a, b;
  619. for (i = 0; i < 8; i++) {
  620. a = (signed32)(signed16)(*vA).h[i];
  621. b = (signed32)(signed16)(*vB).h[i];
  622. temp = a + b + 1;
  623. (*vS).h[i] = (temp >> 1) & 0xffff;
  624. }
  625. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  626. 0.4,6.VS,11.VA,16.VB,21.1410:VX:av:vavgsw %VD, %VA, %VB:Vector Average Signed Word
  627. int i;
  628. signed64 temp, a, b;
  629. for (i = 0; i < 4; i++) {
  630. a = (signed64)(signed32)(*vA).w[i];
  631. b = (signed64)(signed32)(*vB).w[i];
  632. temp = a + b + 1;
  633. (*vS).w[i] = (temp >> 1) & 0xffffffff;
  634. }
  635. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  636. 0.4,6.VS,11.VA,16.VB,21.1026:VX:av:vavgub %VD, %VA, %VB:Vector Average Unsigned Byte
  637. int i;
  638. unsigned16 temp, a, b;
  639. for (i = 0; i < 16; i++) {
  640. a = (*vA).b[i];
  641. b = (*vB).b[i];
  642. temp = a + b + 1;
  643. (*vS).b[i] = (temp >> 1) & 0xff;
  644. }
  645. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  646. 0.4,6.VS,11.VA,16.VB,21.1090:VX:av:vavguh %VD, %VA, %VB:Vector Average Unsigned Half Word
  647. int i;
  648. unsigned32 temp, a, b;
  649. for (i = 0; i < 8; i++) {
  650. a = (*vA).h[i];
  651. b = (*vB).h[i];
  652. temp = a + b + 1;
  653. (*vS).h[i] = (temp >> 1) & 0xffff;
  654. }
  655. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  656. 0.4,6.VS,11.VA,16.VB,21.1154:VX:av:vavguw %VD, %VA, %VB:Vector Average Unsigned Word
  657. int i;
  658. unsigned64 temp, a, b;
  659. for (i = 0; i < 4; i++) {
  660. a = (*vA).w[i];
  661. b = (*vB).w[i];
  662. temp = a + b + 1;
  663. (*vS).w[i] = (temp >> 1) & 0xffffffff;
  664. }
  665. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  666. #
  667. # Vector Fixed Point Convert instructions, 6-49, 6-50
  668. #
  669. 0.4,6.VS,11.UIMM,16.VB,21.842:VX:av:vcfsx %VD, %VB, %UIMM:Vector Convert From Signed Fixed-Point Word
  670. int i;
  671. unsigned32 f;
  672. sim_fpu b, div, d;
  673. for (i = 0; i < 4; i++) {
  674. sim_fpu_32to (&b, (*vB).w[i]);
  675. sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default);
  676. sim_fpu_div (&d, &b, &div);
  677. sim_fpu_to32 (&f, &d);
  678. (*vS).w[i] = f;
  679. }
  680. PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
  681. 0.4,6.VS,11.UIMM,16.VB,21.778:VX:av:vcfux %VD, %VA, %UIMM:Vector Convert From Unsigned Fixed-Point Word
  682. int i;
  683. unsigned32 f;
  684. sim_fpu b, d, div;
  685. for (i = 0; i < 4; i++) {
  686. sim_fpu_32to (&b, (*vB).w[i]);
  687. sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default);
  688. sim_fpu_div (&d, &b, &div);
  689. sim_fpu_to32u (&f, &d, sim_fpu_round_default);
  690. (*vS).w[i] = f;
  691. }
  692. PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
  693. #
  694. # Vector Compare instructions, 6-51 ... 6-64
  695. #
  696. 0.4,6.VS,11.VA,16.VB,21.RC,22.966:VXR:av:vcmpbpfpx %VD, %VA, %VB:Vector Compare Bounds Floating Point
  697. int i, le, ge;
  698. sim_fpu a, b, d;
  699. for (i = 0; i < 4; i++) {
  700. sim_fpu_32to (&a, (*vA).w[i]);
  701. sim_fpu_32to (&b, (*vB).w[i]);
  702. le = sim_fpu_is_le(&a, &b);
  703. ge = sim_fpu_is_ge(&a, &b);
  704. (*vS).w[i] = (le ? 0 : 1 << 31) | (ge ? 0 : 1 << 30);
  705. }
  706. if (RC)
  707. ALTIVEC_SET_CR6(vS, 0);
  708. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  709. 0.4,6.VS,11.VA,16.VB,21.RC,22.198:VXR:av:vcmpeqfpx %VD, %VA, %VB:Vector Compare Equal-to-Floating Point
  710. int i;
  711. sim_fpu a, b;
  712. for (i = 0; i < 4; i++) {
  713. sim_fpu_32to (&a, (*vA).w[i]);
  714. sim_fpu_32to (&b, (*vB).w[i]);
  715. if (sim_fpu_is_eq(&a, &b))
  716. (*vS).w[i] = 0xffffffff;
  717. else
  718. (*vS).w[i] = 0;
  719. }
  720. if (RC)
  721. ALTIVEC_SET_CR6(vS, 1);
  722. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  723. 0.4,6.VS,11.VA,16.VB,21.RC,22.6:VXR:av:vcmpequbx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Byte
  724. int i;
  725. for (i = 0; i < 16; i++)
  726. if ((*vA).b[i] == (*vB).b[i])
  727. (*vS).b[i] = 0xff;
  728. else
  729. (*vS).b[i] = 0;
  730. if (RC)
  731. ALTIVEC_SET_CR6(vS, 1);
  732. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  733. 0.4,6.VS,11.VA,16.VB,21.RC,22.70:VXR:av:vcmpequhx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Half Word
  734. int i;
  735. for (i = 0; i < 8; i++)
  736. if ((*vA).h[i] == (*vB).h[i])
  737. (*vS).h[i] = 0xffff;
  738. else
  739. (*vS).h[i] = 0;
  740. if (RC)
  741. ALTIVEC_SET_CR6(vS, 1);
  742. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  743. 0.4,6.VS,11.VA,16.VB,21.RC,22.134:VXR:av:vcmpequwx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Word
  744. int i;
  745. for (i = 0; i < 4; i++)
  746. if ((*vA).w[i] == (*vB).w[i])
  747. (*vS).w[i] = 0xffffffff;
  748. else
  749. (*vS).w[i] = 0;
  750. if (RC)
  751. ALTIVEC_SET_CR6(vS, 1);
  752. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  753. 0.4,6.VS,11.VA,16.VB,21.RC,22.454:VXR:av:vcmpgefpx %VD, %VA, %VB:Vector Compare Greater-Than-or-Equal-to Floating Point
  754. int i;
  755. sim_fpu a, b;
  756. for (i = 0; i < 4; i++) {
  757. sim_fpu_32to (&a, (*vA).w[i]);
  758. sim_fpu_32to (&b, (*vB).w[i]);
  759. if (sim_fpu_is_ge(&a, &b))
  760. (*vS).w[i] = 0xffffffff;
  761. else
  762. (*vS).w[i] = 0;
  763. }
  764. if (RC)
  765. ALTIVEC_SET_CR6(vS, 1);
  766. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  767. 0.4,6.VS,11.VA,16.VB,21.RC,22.710:VXR:av:vcmpgtfpx %VD, %VA, %VB:Vector Compare Greater-Than Floating Point
  768. int i;
  769. sim_fpu a, b;
  770. for (i = 0; i < 4; i++) {
  771. sim_fpu_32to (&a, (*vA).w[i]);
  772. sim_fpu_32to (&b, (*vB).w[i]);
  773. if (sim_fpu_is_gt(&a, &b))
  774. (*vS).w[i] = 0xffffffff;
  775. else
  776. (*vS).w[i] = 0;
  777. }
  778. if (RC)
  779. ALTIVEC_SET_CR6(vS, 1);
  780. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  781. 0.4,6.VS,11.VA,16.VB,21.RC,22.774:VXR:av:vcmpgtsbx %VD, %VA, %VB:Vector Compare Greater-Than Signed Byte
  782. int i;
  783. signed8 a, b;
  784. for (i = 0; i < 16; i++) {
  785. a = (*vA).b[i];
  786. b = (*vB).b[i];
  787. if (a > b)
  788. (*vS).b[i] = 0xff;
  789. else
  790. (*vS).b[i] = 0;
  791. }
  792. if (RC)
  793. ALTIVEC_SET_CR6(vS, 1);
  794. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  795. 0.4,6.VS,11.VA,16.VB,21.RC,22.838:VXR:av:vcmpgtshx %VD, %VA, %VB:Vector Compare Greater-Than Signed Half Word
  796. int i;
  797. signed16 a, b;
  798. for (i = 0; i < 8; i++) {
  799. a = (*vA).h[i];
  800. b = (*vB).h[i];
  801. if (a > b)
  802. (*vS).h[i] = 0xffff;
  803. else
  804. (*vS).h[i] = 0;
  805. }
  806. if (RC)
  807. ALTIVEC_SET_CR6(vS, 1);
  808. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  809. 0.4,6.VS,11.VA,16.VB,21.RC,22.902:VXR:av:vcmpgtswx %VD, %VA, %VB:Vector Compare Greater-Than Signed Word
  810. int i;
  811. signed32 a, b;
  812. for (i = 0; i < 4; i++) {
  813. a = (*vA).w[i];
  814. b = (*vB).w[i];
  815. if (a > b)
  816. (*vS).w[i] = 0xffffffff;
  817. else
  818. (*vS).w[i] = 0;
  819. }
  820. if (RC)
  821. ALTIVEC_SET_CR6(vS, 1);
  822. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  823. 0.4,6.VS,11.VA,16.VB,21.RC,22.518:VXR:av:vcmpgtubx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Byte
  824. int i;
  825. unsigned8 a, b;
  826. for (i = 0; i < 16; i++) {
  827. a = (*vA).b[i];
  828. b = (*vB).b[i];
  829. if (a > b)
  830. (*vS).b[i] = 0xff;
  831. else
  832. (*vS).b[i] = 0;
  833. }
  834. if (RC)
  835. ALTIVEC_SET_CR6(vS, 1);
  836. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  837. 0.4,6.VS,11.VA,16.VB,21.RC,22.582:VXR:av:vcmpgtuhx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Half Word
  838. int i;
  839. unsigned16 a, b;
  840. for (i = 0; i < 8; i++) {
  841. a = (*vA).h[i];
  842. b = (*vB).h[i];
  843. if (a > b)
  844. (*vS).h[i] = 0xffff;
  845. else
  846. (*vS).h[i] = 0;
  847. }
  848. if (RC)
  849. ALTIVEC_SET_CR6(vS, 1);
  850. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  851. 0.4,6.VS,11.VA,16.VB,21.RC,22.646:VXR:av:vcmpgtuwx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Word
  852. int i;
  853. unsigned32 a, b;
  854. for (i = 0; i < 4; i++) {
  855. a = (*vA).w[i];
  856. b = (*vB).w[i];
  857. if (a > b)
  858. (*vS).w[i] = 0xffffffff;
  859. else
  860. (*vS).w[i] = 0;
  861. }
  862. if (RC)
  863. ALTIVEC_SET_CR6(vS, 1);
  864. PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
  865. #
  866. # Vector Convert instructions, 6-65, 6-66.
  867. #
  868. 0.4,6.VS,11.UIMM,16.VB,21.970:VX:av:vctsxs %VD, %VB, %UIMM:Vector Convert to Signed Fixed-Point Word Saturate
  869. int i, sat, tempsat;
  870. signed64 temp;
  871. sim_fpu a, b, m;
  872. sat = 0;
  873. for (i = 0; i < 4; i++) {
  874. sim_fpu_32to (&b, (*vB).w[i]);
  875. sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default);
  876. sim_fpu_mul (&a, &b, &m);
  877. sim_fpu_to64i (&temp, &a, sim_fpu_round_default);
  878. (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
  879. sat |= tempsat;
  880. }
  881. ALTIVEC_SET_SAT(sat);
  882. PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
  883. 0.4,6.VS,11.UIMM,16.VB,21.906:VX:av:vctuxs %VD, %VB, %UIMM:Vector Convert to Unsigned Fixed-Point Word Saturate
  884. int i, sat, tempsat;
  885. signed64 temp;
  886. sim_fpu a, b, m;
  887. sat = 0;
  888. for (i = 0; i < 4; i++) {
  889. sim_fpu_32to (&b, (*vB).w[i]);
  890. sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default);
  891. sim_fpu_mul (&a, &b, &m);
  892. sim_fpu_to64u (&temp, &a, sim_fpu_round_default);
  893. (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
  894. sat |= tempsat;
  895. }
  896. ALTIVEC_SET_SAT(sat);
  897. PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
  898. #
  899. # Vector Estimate instructions, 6-67 ... 6-70.
  900. #
  901. 0.4,6.VS,11.0,16.VB,21.394:VX:av:vexptefp %VD, %VB:Vector 2 Raised to the Exponent Estimate Floating Point
  902. int i;
  903. unsigned32 f;
  904. signed32 bi;
  905. sim_fpu b, d;
  906. for (i = 0; i < 4; i++) {
  907. /*HACK!*/
  908. sim_fpu_32to (&b, (*vB).w[i]);
  909. sim_fpu_to32i (&bi, &b, sim_fpu_round_default);
  910. bi = 2 ^ bi;
  911. sim_fpu_32to (&d, bi);
  912. sim_fpu_to32 (&f, &d);
  913. (*vS).w[i] = f;
  914. }
  915. PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
  916. 0.4,6.VS,11.0,16.VB,21.458:VX:av:vlogefp %VD, %VB:Vector Log2 Estimate Floating Point
  917. int i;
  918. unsigned32 c, u, f;
  919. sim_fpu b, cfpu, d;
  920. for (i = 0; i < 4; i++) {
  921. /*HACK!*/
  922. sim_fpu_32to (&b, (*vB).w[i]);
  923. sim_fpu_to32u (&u, &b, sim_fpu_round_default);
  924. for (c = 0; (u /= 2) > 1; c++)
  925. ;
  926. sim_fpu_32to (&cfpu, c);
  927. sim_fpu_add (&d, &b, &cfpu);
  928. sim_fpu_to32 (&f, &d);
  929. (*vS).w[i] = f;
  930. }
  931. PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
  932. #
  933. # Vector Multiply Add instruction, 6-71
  934. #
  935. 0.4,6.VS,11.VA,16.VB,21.VC,26.46:VAX:av:vmaddfp %VD, %VA, %VB, %VC:Vector Multiply Add Floating Point
  936. int i;
  937. unsigned32 f;
  938. sim_fpu a, b, c, d, e;
  939. for (i = 0; i < 4; i++) {
  940. sim_fpu_32to (&a, (*vA).w[i]);
  941. sim_fpu_32to (&b, (*vB).w[i]);
  942. sim_fpu_32to (&c, (*vC).w[i]);
  943. sim_fpu_mul (&e, &a, &c);
  944. sim_fpu_add (&d, &e, &b);
  945. sim_fpu_to32 (&f, &d);
  946. (*vS).w[i] = f;
  947. }
  948. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  949. #
  950. # Vector Maximum instructions, 6-72 ... 6-78.
  951. #
  952. 0.4,6.VS,11.VA,16.VB,21.1034:VX:av:vmaxfp %VD, %VA, %VB:Vector Maximum Floating Point
  953. int i;
  954. unsigned32 f;
  955. sim_fpu a, b, d;
  956. for (i = 0; i < 4; i++) {
  957. sim_fpu_32to (&a, (*vA).w[i]);
  958. sim_fpu_32to (&b, (*vB).w[i]);
  959. sim_fpu_max (&d, &a, &b);
  960. sim_fpu_to32 (&f, &d);
  961. (*vS).w[i] = f;
  962. }
  963. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  964. 0.4,6.VS,11.VA,16.VB,21.258:VX:av:vmaxsb %VD, %VA, %VB:Vector Maximum Signed Byte
  965. int i;
  966. signed8 a, b;
  967. for (i = 0; i < 16; i++) {
  968. a = (*vA).b[i];
  969. b = (*vB).b[i];
  970. if (a > b)
  971. (*vS).b[i] = a;
  972. else
  973. (*vS).b[i] = b;
  974. }
  975. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  976. 0.4,6.VS,11.VA,16.VB,21.322:VX:av:vmaxsh %VD, %VA, %VB:Vector Maximum Signed Half Word
  977. int i;
  978. signed16 a, b;
  979. for (i = 0; i < 8; i++) {
  980. a = (*vA).h[i];
  981. b = (*vB).h[i];
  982. if (a > b)
  983. (*vS).h[i] = a;
  984. else
  985. (*vS).h[i] = b;
  986. }
  987. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  988. 0.4,6.VS,11.VA,16.VB,21.386:VX:av:vmaxsw %VD, %VA, %VB:Vector Maximum Signed Word
  989. int i;
  990. signed32 a, b;
  991. for (i = 0; i < 4; i++) {
  992. a = (*vA).w[i];
  993. b = (*vB).w[i];
  994. if (a > b)
  995. (*vS).w[i] = a;
  996. else
  997. (*vS).w[i] = b;
  998. }
  999. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1000. 0.4,6.VS,11.VA,16.VB,21.2:VX:av:vmaxub %VD, %VA, %VB:Vector Maximum Unsigned Byte
  1001. int i;
  1002. unsigned8 a, b;
  1003. for (i = 0; i < 16; i++) {
  1004. a = (*vA).b[i];
  1005. b = (*vB).b[i];
  1006. if (a > b)
  1007. (*vS).b[i] = a;
  1008. else
  1009. (*vS).b[i] = b;
  1010. };
  1011. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1012. 0.4,6.VS,11.VA,16.VB,21.66:VX:av:vmaxus %VD, %VA, %VB:Vector Maximum Unsigned Half Word
  1013. int i;
  1014. unsigned16 a, b;
  1015. for (i = 0; i < 8; i++) {
  1016. a = (*vA).h[i];
  1017. b = (*vB).h[i];
  1018. if (a > b)
  1019. (*vS).h[i] = a;
  1020. else
  1021. (*vS).h[i] = b;
  1022. }
  1023. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1024. 0.4,6.VS,11.VA,16.VB,21.130:VX:av:vmaxuw %VD, %VA, %VB:Vector Maximum Unsigned Word
  1025. int i;
  1026. unsigned32 a, b;
  1027. for (i = 0; i < 4; i++) {
  1028. a = (*vA).w[i];
  1029. b = (*vB).w[i];
  1030. if (a > b)
  1031. (*vS).w[i] = a;
  1032. else
  1033. (*vS).w[i] = b;
  1034. }
  1035. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1036. #
  1037. # Vector Multiple High instructions, 6-79, 6-80.
  1038. #
  1039. 0.4,6.VS,11.VA,16.VB,21.VC,26.32:VAX:av:vmhaddshs %VD, %VA, %VB, %VC:Vector Multiple High and Add Signed Half Word Saturate
  1040. int i, sat, tempsat;
  1041. signed16 a, b;
  1042. signed32 prod, temp, c;
  1043. for (i = 0; i < 8; i++) {
  1044. a = (*vA).h[i];
  1045. b = (*vB).h[i];
  1046. c = (signed32)(signed16)(*vC).h[i];
  1047. prod = (signed32)a * (signed32)b;
  1048. temp = (prod >> 15) + c;
  1049. (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
  1050. sat |= tempsat;
  1051. }
  1052. ALTIVEC_SET_SAT(sat);
  1053. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1054. 0.4,6.VS,11.VA,16.VB,21.VC,26.33:VAX:av:vmhraddshs %VD, %VA, %VB, %VC:Vector Multiple High Round and Add Signed Half Word Saturate
  1055. int i, sat, tempsat;
  1056. signed16 a, b;
  1057. signed32 prod, temp, c;
  1058. for (i = 0; i < 8; i++) {
  1059. a = (*vA).h[i];
  1060. b = (*vB).h[i];
  1061. c = (signed32)(signed16)(*vC).h[i];
  1062. prod = (signed32)a * (signed32)b;
  1063. prod += 0x4000;
  1064. temp = (prod >> 15) + c;
  1065. (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
  1066. sat |= tempsat;
  1067. }
  1068. ALTIVEC_SET_SAT(sat);
  1069. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1070. #
  1071. # Vector Minimum instructions, 6-81 ... 6-87
  1072. #
  1073. 0.4,6.VS,11.VA,16.VB,21.1098:VX:av:vminfp %VD, %VA, %VB:Vector Minimum Floating Point
  1074. int i;
  1075. unsigned32 f;
  1076. sim_fpu a, b, d;
  1077. for (i = 0; i < 4; i++) {
  1078. sim_fpu_32to (&a, (*vA).w[i]);
  1079. sim_fpu_32to (&b, (*vB).w[i]);
  1080. sim_fpu_min (&d, &a, &b);
  1081. sim_fpu_to32 (&f, &d);
  1082. (*vS).w[i] = f;
  1083. }
  1084. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1085. 0.4,6.VS,11.VA,16.VB,21.770:VX:av:vminsb %VD, %VA, %VB:Vector Minimum Signed Byte
  1086. int i;
  1087. signed8 a, b;
  1088. for (i = 0; i < 16; i++) {
  1089. a = (*vA).b[i];
  1090. b = (*vB).b[i];
  1091. if (a < b)
  1092. (*vS).b[i] = a;
  1093. else
  1094. (*vS).b[i] = b;
  1095. }
  1096. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1097. 0.4,6.VS,11.VA,16.VB,21.834:VX:av:vminsh %VD, %VA, %VB:Vector Minimum Signed Half Word
  1098. int i;
  1099. signed16 a, b;
  1100. for (i = 0; i < 8; i++) {
  1101. a = (*vA).h[i];
  1102. b = (*vB).h[i];
  1103. if (a < b)
  1104. (*vS).h[i] = a;
  1105. else
  1106. (*vS).h[i] = b;
  1107. }
  1108. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1109. 0.4,6.VS,11.VA,16.VB,21.898:VX:av:vminsw %VD, %VA, %VB:Vector Minimum Signed Word
  1110. int i;
  1111. signed32 a, b;
  1112. for (i = 0; i < 4; i++) {
  1113. a = (*vA).w[i];
  1114. b = (*vB).w[i];
  1115. if (a < b)
  1116. (*vS).w[i] = a;
  1117. else
  1118. (*vS).w[i] = b;
  1119. }
  1120. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1121. 0.4,6.VS,11.VA,16.VB,21.514:VX:av:vminub %VD, %VA, %VB:Vector Minimum Unsigned Byte
  1122. int i;
  1123. unsigned8 a, b;
  1124. for (i = 0; i < 16; i++) {
  1125. a = (*vA).b[i];
  1126. b = (*vB).b[i];
  1127. if (a < b)
  1128. (*vS).b[i] = a;
  1129. else
  1130. (*vS).b[i] = b;
  1131. };
  1132. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1133. 0.4,6.VS,11.VA,16.VB,21.578:VX:av:vminuh %VD, %VA, %VB:Vector Minimum Unsigned Half Word
  1134. int i;
  1135. unsigned16 a, b;
  1136. for (i = 0; i < 8; i++) {
  1137. a = (*vA).h[i];
  1138. b = (*vB).h[i];
  1139. if (a < b)
  1140. (*vS).h[i] = a;
  1141. else
  1142. (*vS).h[i] = b;
  1143. }
  1144. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1145. 0.4,6.VS,11.VA,16.VB,21.642:VX:av:vminuw %VD, %VA, %VB:Vector Minimum Unsigned Word
  1146. int i;
  1147. unsigned32 a, b;
  1148. for (i = 0; i < 4; i++) {
  1149. a = (*vA).w[i];
  1150. b = (*vB).w[i];
  1151. if (a < b)
  1152. (*vS).w[i] = a;
  1153. else
  1154. (*vS).w[i] = b;
  1155. }
  1156. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1157. #
  1158. # Vector Multiply Low instruction, 6-88
  1159. #
  1160. 0.4,6.VS,11.VA,16.VB,21.VC,26.34:VAX:av:vmladduhm %VD, %VA, %VB, %VC:Vector Multiply Low and Add Unsigned Half Word Modulo
  1161. int i;
  1162. unsigned16 a, b, c;
  1163. unsigned32 prod;
  1164. for (i = 0; i < 8; i++) {
  1165. a = (*vA).h[i];
  1166. b = (*vB).h[i];
  1167. c = (*vC).h[i];
  1168. prod = (unsigned32)a * (unsigned32)b;
  1169. (*vS).h[i] = (prod + c) & 0xffff;
  1170. }
  1171. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1172. #
  1173. # Vector Merge instructions, 6-89 ... 6-94
  1174. #
  1175. 0.4,6.VS,11.VA,16.VB,21.12:VX:av:vmrghb %VD, %VA, %VB:Vector Merge High Byte
  1176. int i;
  1177. for (i = 0; i < 16; i += 2) {
  1178. (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i/2)];
  1179. (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX(i/2)];
  1180. }
  1181. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1182. 0.4,6.VS,11.VA,16.VB,21.76:VX:av:vmrghh %VD, %VA, %VB:Vector Merge High Half Word
  1183. int i;
  1184. for (i = 0; i < 8; i += 2) {
  1185. (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX(i/2)];
  1186. (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX(i/2)];
  1187. }
  1188. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1189. 0.4,6.VS,11.VA,16.VB,21.140:VX:av:vmrghw %VD, %VA, %VB:Vector Merge High Word
  1190. int i;
  1191. for (i = 0; i < 4; i += 2) {
  1192. (*vS).w[i] = (*vA).w[i/2];
  1193. (*vS).w[i+1] = (*vB).w[i/2];
  1194. }
  1195. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1196. 0.4,6.VS,11.VA,16.VB,21.268:VX:av:vmrglb %VD, %VA, %VB:Vector Merge Low Byte
  1197. int i;
  1198. for (i = 0; i < 16; i += 2) {
  1199. (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX((i/2) + 8)];
  1200. (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX((i/2) + 8)];
  1201. }
  1202. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1203. 0.4,6.VS,11.VA,16.VB,21.332:VX:av:vmrglh %VD, %VA, %VB:Vector Merge Low Half Word
  1204. int i;
  1205. for (i = 0; i < 8; i += 2) {
  1206. (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX((i/2) + 4)];
  1207. (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX((i/2) + 4)];
  1208. }
  1209. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1210. 0.4,6.VS,11.VA,16.VB,21.396:VX:av:vmrglw %VD, %VA, %VB:Vector Merge Low Word
  1211. int i;
  1212. for (i = 0; i < 4; i += 2) {
  1213. (*vS).w[i] = (*vA).w[(i/2) + 2];
  1214. (*vS).w[i+1] = (*vB).w[(i/2) + 2];
  1215. }
  1216. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1217. #
  1218. # Vector Multiply Sum instructions, 6-95 ... 6-100
  1219. #
  1220. 0.4,6.VS,11.VA,16.VB,21.VC,26.37:VAX:av:vmsummbm %VD, %VA, %VB, %VC:Vector Multiply Sum Mixed-Sign Byte Modulo
  1221. int i, j;
  1222. signed32 temp;
  1223. signed16 prod, a;
  1224. unsigned16 b;
  1225. for (i = 0; i < 4; i++) {
  1226. temp = (*vC).w[i];
  1227. for (j = 0; j < 4; j++) {
  1228. a = (signed16)(signed8)(*vA).b[i*4+j];
  1229. b = (*vB).b[i*4+j];
  1230. prod = a * b;
  1231. temp += (signed32)prod;
  1232. }
  1233. (*vS).w[i] = temp;
  1234. }
  1235. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1236. 0.4,6.VS,11.VA,16.VB,21.VC,26.40:VAX:av:vmsumshm %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Modulo
  1237. int i, j;
  1238. signed32 temp, prod, a, b;
  1239. for (i = 0; i < 4; i++) {
  1240. temp = (*vC).w[i];
  1241. for (j = 0; j < 2; j++) {
  1242. a = (signed32)(signed16)(*vA).h[i*2+j];
  1243. b = (signed32)(signed16)(*vB).h[i*2+j];
  1244. prod = a * b;
  1245. temp += prod;
  1246. }
  1247. (*vS).w[i] = temp;
  1248. }
  1249. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1250. 0.4,6.VS,11.VA,16.VB,21.VC,26.41:VAX:av:vmsumshs %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Saturate
  1251. int i, j, sat, tempsat;
  1252. signed64 temp;
  1253. signed32 prod, a, b;
  1254. sat = 0;
  1255. for (i = 0; i < 4; i++) {
  1256. temp = (signed64)(signed32)(*vC).w[i];
  1257. for (j = 0; j < 2; j++) {
  1258. a = (signed32)(signed16)(*vA).h[i*2+j];
  1259. b = (signed32)(signed16)(*vB).h[i*2+j];
  1260. prod = a * b;
  1261. temp += (signed64)prod;
  1262. }
  1263. (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
  1264. sat |= tempsat;
  1265. }
  1266. ALTIVEC_SET_SAT(sat);
  1267. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1268. 0.4,6.VS,11.VA,16.VB,21.VC,26.36:VAX:av:vmsumubm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Byte Modulo
  1269. int i, j;
  1270. unsigned32 temp;
  1271. unsigned16 prod, a, b;
  1272. for (i = 0; i < 4; i++) {
  1273. temp = (*vC).w[i];
  1274. for (j = 0; j < 4; j++) {
  1275. a = (*vA).b[i*4+j];
  1276. b = (*vB).b[i*4+j];
  1277. prod = a * b;
  1278. temp += prod;
  1279. }
  1280. (*vS).w[i] = temp;
  1281. }
  1282. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1283. 0.4,6.VS,11.VA,16.VB,21.VC,26.38:VAX:av:vmsumuhm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Modulo
  1284. int i, j;
  1285. unsigned32 temp, prod, a, b;
  1286. for (i = 0; i < 4; i++) {
  1287. temp = (*vC).w[i];
  1288. for (j = 0; j < 2; j++) {
  1289. a = (*vA).h[i*2+j];
  1290. b = (*vB).h[i*2+j];
  1291. prod = a * b;
  1292. temp += prod;
  1293. }
  1294. (*vS).w[i] = temp;
  1295. }
  1296. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1297. 0.4,6.VS,11.VA,16.VB,21.VC,26.39:VAX:av:vmsumuhs %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Saturate
  1298. int i, j, sat, tempsat;
  1299. unsigned32 temp, prod, a, b;
  1300. sat = 0;
  1301. for (i = 0; i < 4; i++) {
  1302. temp = (*vC).w[i];
  1303. for (j = 0; j < 2; j++) {
  1304. a = (*vA).h[i*2+j];
  1305. b = (*vB).h[i*2+j];
  1306. prod = a * b;
  1307. temp += prod;
  1308. }
  1309. (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
  1310. sat |= tempsat;
  1311. }
  1312. ALTIVEC_SET_SAT(sat);
  1313. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1314. #
  1315. # Vector Multiply Even/Odd instructions, 6-101 ... 6-108
  1316. #
  1317. 0.4,6.VS,11.VA,16.VB,21.776:VX:av:vmulesb %VD, %VA, %VB:Vector Multiply Even Signed Byte
  1318. int i;
  1319. signed8 a, b;
  1320. signed16 prod;
  1321. for (i = 0; i < 8; i++) {
  1322. a = (*vA).b[AV_BINDEX(i*2)];
  1323. b = (*vB).b[AV_BINDEX(i*2)];
  1324. prod = a * b;
  1325. (*vS).h[AV_HINDEX(i)] = prod;
  1326. }
  1327. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1328. 0.4,6.VS,11.VA,16.VB,21.840:VX:av:vmulesh %VD, %VA, %VB:Vector Multiply Even Signed Half Word
  1329. int i;
  1330. signed16 a, b;
  1331. signed32 prod;
  1332. for (i = 0; i < 4; i++) {
  1333. a = (*vA).h[AV_HINDEX(i*2)];
  1334. b = (*vB).h[AV_HINDEX(i*2)];
  1335. prod = a * b;
  1336. (*vS).w[i] = prod;
  1337. }
  1338. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1339. 0.4,6.VS,11.VA,16.VB,21.520:VX:av:vmuleub %VD, %VA, %VB:Vector Multiply Even Unsigned Byte
  1340. int i;
  1341. unsigned8 a, b;
  1342. unsigned16 prod;
  1343. for (i = 0; i < 8; i++) {
  1344. a = (*vA).b[AV_BINDEX(i*2)];
  1345. b = (*vB).b[AV_BINDEX(i*2)];
  1346. prod = a * b;
  1347. (*vS).h[AV_HINDEX(i)] = prod;
  1348. }
  1349. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1350. 0.4,6.VS,11.VA,16.VB,21.584:VX:av:vmuleuh %VD, %VA, %VB:Vector Multiply Even Unsigned Half Word
  1351. int i;
  1352. unsigned16 a, b;
  1353. unsigned32 prod;
  1354. for (i = 0; i < 4; i++) {
  1355. a = (*vA).h[AV_HINDEX(i*2)];
  1356. b = (*vB).h[AV_HINDEX(i*2)];
  1357. prod = a * b;
  1358. (*vS).w[i] = prod;
  1359. }
  1360. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1361. 0.4,6.VS,11.VA,16.VB,21.264:VX:av:vmulosb %VD, %VA, %VB:Vector Multiply Odd Signed Byte
  1362. int i;
  1363. signed8 a, b;
  1364. signed16 prod;
  1365. for (i = 0; i < 8; i++) {
  1366. a = (*vA).b[AV_BINDEX((i*2)+1)];
  1367. b = (*vB).b[AV_BINDEX((i*2)+1)];
  1368. prod = a * b;
  1369. (*vS).h[AV_HINDEX(i)] = prod;
  1370. }
  1371. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1372. 0.4,6.VS,11.VA,16.VB,21.328:VX:av:vmulosh %VD, %VA, %VB:Vector Multiply Odd Signed Half Word
  1373. int i;
  1374. signed16 a, b;
  1375. signed32 prod;
  1376. for (i = 0; i < 4; i++) {
  1377. a = (*vA).h[AV_HINDEX((i*2)+1)];
  1378. b = (*vB).h[AV_HINDEX((i*2)+1)];
  1379. prod = a * b;
  1380. (*vS).w[i] = prod;
  1381. }
  1382. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1383. 0.4,6.VS,11.VA,16.VB,21.8:VX:av:vmuloub %VD, %VA, %VB:Vector Multiply Odd Unsigned Byte
  1384. int i;
  1385. unsigned8 a, b;
  1386. unsigned16 prod;
  1387. for (i = 0; i < 8; i++) {
  1388. a = (*vA).b[AV_BINDEX((i*2)+1)];
  1389. b = (*vB).b[AV_BINDEX((i*2)+1)];
  1390. prod = a * b;
  1391. (*vS).h[AV_HINDEX(i)] = prod;
  1392. }
  1393. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1394. 0.4,6.VS,11.VA,16.VB,21.72:VX:av:vmulouh %VD, %VA, %VB:Vector Multiply Odd Unsigned Half Word
  1395. int i;
  1396. unsigned16 a, b;
  1397. unsigned32 prod;
  1398. for (i = 0; i < 4; i++) {
  1399. a = (*vA).h[AV_HINDEX((i*2)+1)];
  1400. b = (*vB).h[AV_HINDEX((i*2)+1)];
  1401. prod = a * b;
  1402. (*vS).w[i] = prod;
  1403. }
  1404. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1405. #
  1406. # Vector Negative Multiply-Subtract instruction, 6-109
  1407. #
  1408. 0.4,6.VS,11.VA,16.VB,21.VC,26.47:VX:av:vnmsubfp %VD, %VA, %VB, %VC:Vector Negative Multiply-Subtract Floating Point
  1409. int i;
  1410. unsigned32 f;
  1411. sim_fpu a, b, c, d, i1, i2;
  1412. for (i = 0; i < 4; i++) {
  1413. sim_fpu_32to (&a, (*vA).w[i]);
  1414. sim_fpu_32to (&b, (*vB).w[i]);
  1415. sim_fpu_32to (&c, (*vC).w[i]);
  1416. sim_fpu_mul (&i1, &a, &c);
  1417. sim_fpu_sub (&i2, &i1, &b);
  1418. sim_fpu_neg (&d, &i2);
  1419. sim_fpu_to32 (&f, &d);
  1420. (*vS).w[i] = f;
  1421. }
  1422. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1423. #
  1424. # Vector Logical OR instructions, 6-110, 6-111, 6-177
  1425. #
  1426. 0.4,6.VS,11.VA,16.VB,21.1284:VX:av:vnor %VD, %VA, %VB:Vector Logical NOR
  1427. int i;
  1428. for (i = 0; i < 4; i++)
  1429. (*vS).w[i] = ~((*vA).w[i] | (*vB).w[i]);
  1430. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1431. 0.4,6.VS,11.VA,16.VB,21.1156:VX:av:vor %VD, %VA, %VB:Vector Logical OR
  1432. int i;
  1433. for (i = 0; i < 4; i++)
  1434. (*vS).w[i] = (*vA).w[i] | (*vB).w[i];
  1435. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1436. 0.4,6.VS,11.VA,16.VB,21.1220:VX:av:vxor %VD, %VA, %VB:Vector Logical XOR
  1437. int i;
  1438. for (i = 0; i < 4; i++)
  1439. (*vS).w[i] = (*vA).w[i] ^ (*vB).w[i];
  1440. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1441. #
  1442. # Vector Permute instruction, 6-112
  1443. #
  1444. 0.4,6.VS,11.VA,16.VB,21.VC,26.43:VX:av:vperm %VD, %VA, %VB, %VC:Vector Permute
  1445. int i, who;
  1446. /* The permutation vector might have us read into the source vectors
  1447. back at positions before the iteration index, so we must latch the
  1448. sources to prevent early-clobbering in case the destination vector
  1449. is the same as one of them. */
  1450. vreg myvA = (*vA), myvB = (*vB);
  1451. for (i = 0; i < 16; i++) {
  1452. who = (*vC).b[AV_BINDEX(i)] & 0x1f;
  1453. if (who & 0x10)
  1454. (*vS).b[AV_BINDEX(i)] = myvB.b[AV_BINDEX(who & 0xf)];
  1455. else
  1456. (*vS).b[AV_BINDEX(i)] = myvA.b[AV_BINDEX(who & 0xf)];
  1457. }
  1458. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
  1459. #
  1460. # Vector Pack instructions, 6-113 ... 6-121
  1461. #
  1462. 0.4,6.VS,11.VA,16.VB,21.782:VX:av:vpkpx %VD, %VA, %VB:Vector Pack Pixel32
  1463. int i;
  1464. for (i = 0; i < 4; i++) {
  1465. (*vS).h[AV_HINDEX(i+4)] = ((((*vB).w[i]) >> 9) & 0xfc00)
  1466. | ((((*vB).w[i]) >> 6) & 0x03e0)
  1467. | ((((*vB).w[i]) >> 3) & 0x001f);
  1468. (*vS).h[AV_HINDEX(i)] = ((((*vA).w[i]) >> 9) & 0xfc00)
  1469. | ((((*vA).w[i]) >> 6) & 0x03e0)
  1470. | ((((*vA).w[i]) >> 3) & 0x001f);
  1471. }
  1472. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1473. 0.4,6.VS,11.VA,16.VB,21.398:VX:av:vpkshss %VD, %VA, %VB:Vector Pack Signed Half Word Signed Saturate
  1474. int i, sat, tempsat;
  1475. signed16 temp;
  1476. sat = 0;
  1477. for (i = 0; i < 16; i++) {
  1478. if (i < 8)
  1479. temp = (*vA).h[AV_HINDEX(i)];
  1480. else
  1481. temp = (*vB).h[AV_HINDEX(i-8)];
  1482. (*vS).b[AV_BINDEX(i)] = altivec_signed_saturate_8(temp, &tempsat);
  1483. sat |= tempsat;
  1484. }
  1485. ALTIVEC_SET_SAT(sat);
  1486. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1487. 0.4,6.VS,11.VA,16.VB,21.270:VX:av:vpkshus %VD, %VA, %VB:Vector Pack Signed Half Word Unsigned Saturate
  1488. int i, sat, tempsat;
  1489. signed16 temp;
  1490. sat = 0;
  1491. for (i = 0; i < 16; i++) {
  1492. if (i < 8)
  1493. temp = (*vA).h[AV_HINDEX(i)];
  1494. else
  1495. temp = (*vB).h[AV_HINDEX(i-8)];
  1496. (*vS).b[AV_BINDEX(i)] = altivec_unsigned_saturate_8(temp, &tempsat);
  1497. sat |= tempsat;
  1498. }
  1499. ALTIVEC_SET_SAT(sat);
  1500. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1501. 0.4,6.VS,11.VA,16.VB,21.462:VX:av:vpkswss %VD, %VA, %VB:Vector Pack Signed Word Signed Saturate
  1502. int i, sat, tempsat;
  1503. signed32 temp;
  1504. sat = 0;
  1505. for (i = 0; i < 8; i++) {
  1506. if (i < 4)
  1507. temp = (*vA).w[i];
  1508. else
  1509. temp = (*vB).w[i-4];
  1510. (*vS).h[AV_HINDEX(i)] = altivec_signed_saturate_16(temp, &tempsat);
  1511. sat |= tempsat;
  1512. }
  1513. ALTIVEC_SET_SAT(sat);
  1514. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1515. 0.4,6.VS,11.VA,16.VB,21.334:VX:av:vpkswus %VD, %VA, %VB:Vector Pack Signed Word Unsigned Saturate
  1516. int i, sat, tempsat;
  1517. signed32 temp;
  1518. sat = 0;
  1519. for (i = 0; i < 8; i++) {
  1520. if (i < 4)
  1521. temp = (*vA).w[i];
  1522. else
  1523. temp = (*vB).w[i-4];
  1524. (*vS).h[AV_HINDEX(i)] = altivec_unsigned_saturate_16(temp, &tempsat);
  1525. sat |= tempsat;
  1526. }
  1527. ALTIVEC_SET_SAT(sat);
  1528. PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1529. 0.4,6.VS,11.VA,16.VB,21.14:VX:av:vpkuhum %VD, %VA, %VB:Vector Pack Unsigned Half Word Unsigned Modulo
  1530. int i;
  1531. for (i = 0; i < 16; i++)
  1532. if (i < 8)
  1533. (*vS).b[AV_BINDEX(i)] = (*vA).h[AV_HINDEX(i)];
  1534. else
  1535. (*vS).b[AV_BINDEX(i)] = (*vB).h[AV_HINDEX(i-8)];
  1536. PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
  1537. 0.4,6.VS,11.VA,16.VB,21.142:VX:av:vpkuhus %VD, %

Large files files are truncated, but you can click here to view the full file