/target/mips/tcg/lmmi_helper.c

https://gitlab.com/paelzer/qemu · C · 747 lines · 609 code · 114 blank · 24 comment · 51 complexity · 6866f9a2291acf02389143af10f3abd1 MD5 · raw file

  1. /*
  2. * Loongson Multimedia Instruction emulation helpers for QEMU.
  3. *
  4. * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "cpu.h"
  21. #include "exec/helper-proto.h"
  22. /*
  23. * If the byte ordering doesn't matter, i.e. all columns are treated
  24. * identically, then this union can be used directly. If byte ordering
  25. * does matter, we generally ignore dumping to memory.
  26. */
  27. typedef union {
  28. uint8_t ub[8];
  29. int8_t sb[8];
  30. uint16_t uh[4];
  31. int16_t sh[4];
  32. uint32_t uw[2];
  33. int32_t sw[2];
  34. uint64_t d;
  35. } LMIValue;
  36. /* Some byte ordering issues can be mitigated by XORing in the following. */
  37. #ifdef HOST_WORDS_BIGENDIAN
  38. # define BYTE_ORDER_XOR(N) N
  39. #else
  40. # define BYTE_ORDER_XOR(N) 0
  41. #endif
  42. #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
  43. #define SATUB(x) (x > 0xff ? 0xff : x)
  44. #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
  45. #define SATUH(x) (x > 0xffff ? 0xffff : x)
  46. #define SATSW(x) \
  47. (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
  48. #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
  49. uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
  50. {
  51. LMIValue vs, vt;
  52. unsigned int i;
  53. vs.d = fs;
  54. vt.d = ft;
  55. for (i = 0; i < 8; ++i) {
  56. int r = vs.sb[i] + vt.sb[i];
  57. vs.sb[i] = SATSB(r);
  58. }
  59. return vs.d;
  60. }
  61. uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
  62. {
  63. LMIValue vs, vt;
  64. unsigned int i;
  65. vs.d = fs;
  66. vt.d = ft;
  67. for (i = 0; i < 8; ++i) {
  68. int r = vs.ub[i] + vt.ub[i];
  69. vs.ub[i] = SATUB(r);
  70. }
  71. return vs.d;
  72. }
  73. uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
  74. {
  75. LMIValue vs, vt;
  76. unsigned int i;
  77. vs.d = fs;
  78. vt.d = ft;
  79. for (i = 0; i < 4; ++i) {
  80. int r = vs.sh[i] + vt.sh[i];
  81. vs.sh[i] = SATSH(r);
  82. }
  83. return vs.d;
  84. }
  85. uint64_t helper_paddush(uint64_t fs, uint64_t ft)
  86. {
  87. LMIValue vs, vt;
  88. unsigned int i;
  89. vs.d = fs;
  90. vt.d = ft;
  91. for (i = 0; i < 4; ++i) {
  92. int r = vs.uh[i] + vt.uh[i];
  93. vs.uh[i] = SATUH(r);
  94. }
  95. return vs.d;
  96. }
  97. uint64_t helper_paddb(uint64_t fs, uint64_t ft)
  98. {
  99. LMIValue vs, vt;
  100. unsigned int i;
  101. vs.d = fs;
  102. vt.d = ft;
  103. for (i = 0; i < 8; ++i) {
  104. vs.ub[i] += vt.ub[i];
  105. }
  106. return vs.d;
  107. }
  108. uint64_t helper_paddh(uint64_t fs, uint64_t ft)
  109. {
  110. LMIValue vs, vt;
  111. unsigned int i;
  112. vs.d = fs;
  113. vt.d = ft;
  114. for (i = 0; i < 4; ++i) {
  115. vs.uh[i] += vt.uh[i];
  116. }
  117. return vs.d;
  118. }
  119. uint64_t helper_paddw(uint64_t fs, uint64_t ft)
  120. {
  121. LMIValue vs, vt;
  122. unsigned int i;
  123. vs.d = fs;
  124. vt.d = ft;
  125. for (i = 0; i < 2; ++i) {
  126. vs.uw[i] += vt.uw[i];
  127. }
  128. return vs.d;
  129. }
  130. uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
  131. {
  132. LMIValue vs, vt;
  133. unsigned int i;
  134. vs.d = fs;
  135. vt.d = ft;
  136. for (i = 0; i < 8; ++i) {
  137. int r = vs.sb[i] - vt.sb[i];
  138. vs.sb[i] = SATSB(r);
  139. }
  140. return vs.d;
  141. }
  142. uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
  143. {
  144. LMIValue vs, vt;
  145. unsigned int i;
  146. vs.d = fs;
  147. vt.d = ft;
  148. for (i = 0; i < 8; ++i) {
  149. int r = vs.ub[i] - vt.ub[i];
  150. vs.ub[i] = SATUB(r);
  151. }
  152. return vs.d;
  153. }
  154. uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
  155. {
  156. LMIValue vs, vt;
  157. unsigned int i;
  158. vs.d = fs;
  159. vt.d = ft;
  160. for (i = 0; i < 4; ++i) {
  161. int r = vs.sh[i] - vt.sh[i];
  162. vs.sh[i] = SATSH(r);
  163. }
  164. return vs.d;
  165. }
  166. uint64_t helper_psubush(uint64_t fs, uint64_t ft)
  167. {
  168. LMIValue vs, vt;
  169. unsigned int i;
  170. vs.d = fs;
  171. vt.d = ft;
  172. for (i = 0; i < 4; ++i) {
  173. int r = vs.uh[i] - vt.uh[i];
  174. vs.uh[i] = SATUH(r);
  175. }
  176. return vs.d;
  177. }
  178. uint64_t helper_psubb(uint64_t fs, uint64_t ft)
  179. {
  180. LMIValue vs, vt;
  181. unsigned int i;
  182. vs.d = fs;
  183. vt.d = ft;
  184. for (i = 0; i < 8; ++i) {
  185. vs.ub[i] -= vt.ub[i];
  186. }
  187. return vs.d;
  188. }
  189. uint64_t helper_psubh(uint64_t fs, uint64_t ft)
  190. {
  191. LMIValue vs, vt;
  192. unsigned int i;
  193. vs.d = fs;
  194. vt.d = ft;
  195. for (i = 0; i < 4; ++i) {
  196. vs.uh[i] -= vt.uh[i];
  197. }
  198. return vs.d;
  199. }
  200. uint64_t helper_psubw(uint64_t fs, uint64_t ft)
  201. {
  202. LMIValue vs, vt;
  203. unsigned int i;
  204. vs.d = fs;
  205. vt.d = ft;
  206. for (i = 0; i < 2; ++i) {
  207. vs.uw[i] -= vt.uw[i];
  208. }
  209. return vs.d;
  210. }
  211. uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
  212. {
  213. unsigned host = BYTE_ORDER_XOR(3);
  214. LMIValue vd, vs;
  215. unsigned i;
  216. vs.d = fs;
  217. vd.d = 0;
  218. for (i = 0; i < 4; i++, ft >>= 2) {
  219. vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
  220. }
  221. return vd.d;
  222. }
  223. uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
  224. {
  225. uint64_t fd = 0;
  226. int64_t tmp;
  227. tmp = (int32_t)(fs >> 0);
  228. tmp = SATSH(tmp);
  229. fd |= (tmp & 0xffff) << 0;
  230. tmp = (int32_t)(fs >> 32);
  231. tmp = SATSH(tmp);
  232. fd |= (tmp & 0xffff) << 16;
  233. tmp = (int32_t)(ft >> 0);
  234. tmp = SATSH(tmp);
  235. fd |= (tmp & 0xffff) << 32;
  236. tmp = (int32_t)(ft >> 32);
  237. tmp = SATSH(tmp);
  238. fd |= (tmp & 0xffff) << 48;
  239. return fd;
  240. }
  241. uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
  242. {
  243. uint64_t fd = 0;
  244. unsigned int i;
  245. for (i = 0; i < 4; ++i) {
  246. int16_t tmp = fs >> (i * 16);
  247. tmp = SATSB(tmp);
  248. fd |= (uint64_t)(tmp & 0xff) << (i * 8);
  249. }
  250. for (i = 0; i < 4; ++i) {
  251. int16_t tmp = ft >> (i * 16);
  252. tmp = SATSB(tmp);
  253. fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
  254. }
  255. return fd;
  256. }
  257. uint64_t helper_packushb(uint64_t fs, uint64_t ft)
  258. {
  259. uint64_t fd = 0;
  260. unsigned int i;
  261. for (i = 0; i < 4; ++i) {
  262. int16_t tmp = fs >> (i * 16);
  263. tmp = SATUB(tmp);
  264. fd |= (uint64_t)(tmp & 0xff) << (i * 8);
  265. }
  266. for (i = 0; i < 4; ++i) {
  267. int16_t tmp = ft >> (i * 16);
  268. tmp = SATUB(tmp);
  269. fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
  270. }
  271. return fd;
  272. }
  273. uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
  274. {
  275. return (fs & 0xffffffff) | (ft << 32);
  276. }
  277. uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
  278. {
  279. return (fs >> 32) | (ft & ~0xffffffffull);
  280. }
  281. uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
  282. {
  283. unsigned host = BYTE_ORDER_XOR(3);
  284. LMIValue vd, vs, vt;
  285. vs.d = fs;
  286. vt.d = ft;
  287. vd.uh[0 ^ host] = vs.uh[0 ^ host];
  288. vd.uh[1 ^ host] = vt.uh[0 ^ host];
  289. vd.uh[2 ^ host] = vs.uh[1 ^ host];
  290. vd.uh[3 ^ host] = vt.uh[1 ^ host];
  291. return vd.d;
  292. }
  293. uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
  294. {
  295. unsigned host = BYTE_ORDER_XOR(3);
  296. LMIValue vd, vs, vt;
  297. vs.d = fs;
  298. vt.d = ft;
  299. vd.uh[0 ^ host] = vs.uh[2 ^ host];
  300. vd.uh[1 ^ host] = vt.uh[2 ^ host];
  301. vd.uh[2 ^ host] = vs.uh[3 ^ host];
  302. vd.uh[3 ^ host] = vt.uh[3 ^ host];
  303. return vd.d;
  304. }
  305. uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
  306. {
  307. unsigned host = BYTE_ORDER_XOR(7);
  308. LMIValue vd, vs, vt;
  309. vs.d = fs;
  310. vt.d = ft;
  311. vd.ub[0 ^ host] = vs.ub[0 ^ host];
  312. vd.ub[1 ^ host] = vt.ub[0 ^ host];
  313. vd.ub[2 ^ host] = vs.ub[1 ^ host];
  314. vd.ub[3 ^ host] = vt.ub[1 ^ host];
  315. vd.ub[4 ^ host] = vs.ub[2 ^ host];
  316. vd.ub[5 ^ host] = vt.ub[2 ^ host];
  317. vd.ub[6 ^ host] = vs.ub[3 ^ host];
  318. vd.ub[7 ^ host] = vt.ub[3 ^ host];
  319. return vd.d;
  320. }
  321. uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
  322. {
  323. unsigned host = BYTE_ORDER_XOR(7);
  324. LMIValue vd, vs, vt;
  325. vs.d = fs;
  326. vt.d = ft;
  327. vd.ub[0 ^ host] = vs.ub[4 ^ host];
  328. vd.ub[1 ^ host] = vt.ub[4 ^ host];
  329. vd.ub[2 ^ host] = vs.ub[5 ^ host];
  330. vd.ub[3 ^ host] = vt.ub[5 ^ host];
  331. vd.ub[4 ^ host] = vs.ub[6 ^ host];
  332. vd.ub[5 ^ host] = vt.ub[6 ^ host];
  333. vd.ub[6 ^ host] = vs.ub[7 ^ host];
  334. vd.ub[7 ^ host] = vt.ub[7 ^ host];
  335. return vd.d;
  336. }
  337. uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
  338. {
  339. LMIValue vs, vt;
  340. unsigned i;
  341. vs.d = fs;
  342. vt.d = ft;
  343. for (i = 0; i < 4; i++) {
  344. vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
  345. }
  346. return vs.d;
  347. }
  348. uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
  349. {
  350. LMIValue vs, vt;
  351. unsigned i;
  352. vs.d = fs;
  353. vt.d = ft;
  354. for (i = 0; i < 8; i++) {
  355. vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
  356. }
  357. return vs.d;
  358. }
  359. uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
  360. {
  361. LMIValue vs, vt;
  362. unsigned i;
  363. vs.d = fs;
  364. vt.d = ft;
  365. for (i = 0; i < 4; i++) {
  366. vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
  367. }
  368. return vs.d;
  369. }
  370. uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
  371. {
  372. LMIValue vs, vt;
  373. unsigned i;
  374. vs.d = fs;
  375. vt.d = ft;
  376. for (i = 0; i < 4; i++) {
  377. vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
  378. }
  379. return vs.d;
  380. }
  381. uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
  382. {
  383. LMIValue vs, vt;
  384. unsigned i;
  385. vs.d = fs;
  386. vt.d = ft;
  387. for (i = 0; i < 4; i++) {
  388. vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
  389. }
  390. return vs.d;
  391. }
  392. uint64_t helper_pminub(uint64_t fs, uint64_t ft)
  393. {
  394. LMIValue vs, vt;
  395. unsigned i;
  396. vs.d = fs;
  397. vt.d = ft;
  398. for (i = 0; i < 4; i++) {
  399. vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
  400. }
  401. return vs.d;
  402. }
  403. uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
  404. {
  405. LMIValue vs, vt;
  406. unsigned i;
  407. vs.d = fs;
  408. vt.d = ft;
  409. for (i = 0; i < 2; i++) {
  410. vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
  411. }
  412. return vs.d;
  413. }
  414. uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
  415. {
  416. LMIValue vs, vt;
  417. unsigned i;
  418. vs.d = fs;
  419. vt.d = ft;
  420. for (i = 0; i < 2; i++) {
  421. vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
  422. }
  423. return vs.d;
  424. }
  425. uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
  426. {
  427. LMIValue vs, vt;
  428. unsigned i;
  429. vs.d = fs;
  430. vt.d = ft;
  431. for (i = 0; i < 4; i++) {
  432. vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
  433. }
  434. return vs.d;
  435. }
  436. uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
  437. {
  438. LMIValue vs, vt;
  439. unsigned i;
  440. vs.d = fs;
  441. vt.d = ft;
  442. for (i = 0; i < 4; i++) {
  443. vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
  444. }
  445. return vs.d;
  446. }
  447. uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
  448. {
  449. LMIValue vs, vt;
  450. unsigned i;
  451. vs.d = fs;
  452. vt.d = ft;
  453. for (i = 0; i < 8; i++) {
  454. vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
  455. }
  456. return vs.d;
  457. }
  458. uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
  459. {
  460. LMIValue vs, vt;
  461. unsigned i;
  462. vs.d = fs;
  463. vt.d = ft;
  464. for (i = 0; i < 8; i++) {
  465. vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
  466. }
  467. return vs.d;
  468. }
  469. uint64_t helper_psllw(uint64_t fs, uint64_t ft)
  470. {
  471. LMIValue vs;
  472. unsigned i;
  473. ft &= 0x7f;
  474. if (ft > 31) {
  475. return 0;
  476. }
  477. vs.d = fs;
  478. for (i = 0; i < 2; ++i) {
  479. vs.uw[i] <<= ft;
  480. }
  481. return vs.d;
  482. }
  483. uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
  484. {
  485. LMIValue vs;
  486. unsigned i;
  487. ft &= 0x7f;
  488. if (ft > 31) {
  489. return 0;
  490. }
  491. vs.d = fs;
  492. for (i = 0; i < 2; ++i) {
  493. vs.uw[i] >>= ft;
  494. }
  495. return vs.d;
  496. }
  497. uint64_t helper_psraw(uint64_t fs, uint64_t ft)
  498. {
  499. LMIValue vs;
  500. unsigned i;
  501. ft &= 0x7f;
  502. if (ft > 31) {
  503. ft = 31;
  504. }
  505. vs.d = fs;
  506. for (i = 0; i < 2; ++i) {
  507. vs.sw[i] >>= ft;
  508. }
  509. return vs.d;
  510. }
  511. uint64_t helper_psllh(uint64_t fs, uint64_t ft)
  512. {
  513. LMIValue vs;
  514. unsigned i;
  515. ft &= 0x7f;
  516. if (ft > 15) {
  517. return 0;
  518. }
  519. vs.d = fs;
  520. for (i = 0; i < 4; ++i) {
  521. vs.uh[i] <<= ft;
  522. }
  523. return vs.d;
  524. }
  525. uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
  526. {
  527. LMIValue vs;
  528. unsigned i;
  529. ft &= 0x7f;
  530. if (ft > 15) {
  531. return 0;
  532. }
  533. vs.d = fs;
  534. for (i = 0; i < 4; ++i) {
  535. vs.uh[i] >>= ft;
  536. }
  537. return vs.d;
  538. }
  539. uint64_t helper_psrah(uint64_t fs, uint64_t ft)
  540. {
  541. LMIValue vs;
  542. unsigned i;
  543. ft &= 0x7f;
  544. if (ft > 15) {
  545. ft = 15;
  546. }
  547. vs.d = fs;
  548. for (i = 0; i < 4; ++i) {
  549. vs.sh[i] >>= ft;
  550. }
  551. return vs.d;
  552. }
  553. uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
  554. {
  555. LMIValue vs, vt;
  556. unsigned i;
  557. vs.d = fs;
  558. vt.d = ft;
  559. for (i = 0; i < 4; ++i) {
  560. vs.sh[i] *= vt.sh[i];
  561. }
  562. return vs.d;
  563. }
  564. uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
  565. {
  566. LMIValue vs, vt;
  567. unsigned i;
  568. vs.d = fs;
  569. vt.d = ft;
  570. for (i = 0; i < 4; ++i) {
  571. int32_t r = vs.sh[i] * vt.sh[i];
  572. vs.sh[i] = r >> 16;
  573. }
  574. return vs.d;
  575. }
  576. uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
  577. {
  578. LMIValue vs, vt;
  579. unsigned i;
  580. vs.d = fs;
  581. vt.d = ft;
  582. for (i = 0; i < 4; ++i) {
  583. uint32_t r = vs.uh[i] * vt.uh[i];
  584. vs.uh[i] = r >> 16;
  585. }
  586. return vs.d;
  587. }
  588. uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
  589. {
  590. unsigned host = BYTE_ORDER_XOR(3);
  591. LMIValue vs, vt;
  592. uint32_t p0, p1;
  593. vs.d = fs;
  594. vt.d = ft;
  595. p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
  596. p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
  597. p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
  598. p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
  599. return ((uint64_t)p1 << 32) | p0;
  600. }
  601. uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
  602. {
  603. LMIValue vs, vt;
  604. unsigned i;
  605. vs.d = fs;
  606. vt.d = ft;
  607. for (i = 0; i < 8; ++i) {
  608. int r = vs.ub[i] - vt.ub[i];
  609. vs.ub[i] = (r < 0 ? -r : r);
  610. }
  611. return vs.d;
  612. }
  613. uint64_t helper_biadd(uint64_t fs)
  614. {
  615. unsigned i, fd;
  616. for (i = fd = 0; i < 8; ++i) {
  617. fd += (fs >> (i * 8)) & 0xff;
  618. }
  619. return fd & 0xffff;
  620. }
  621. uint64_t helper_pmovmskb(uint64_t fs)
  622. {
  623. unsigned fd = 0;
  624. fd |= ((fs >> 7) & 1) << 0;
  625. fd |= ((fs >> 15) & 1) << 1;
  626. fd |= ((fs >> 23) & 1) << 2;
  627. fd |= ((fs >> 31) & 1) << 3;
  628. fd |= ((fs >> 39) & 1) << 4;
  629. fd |= ((fs >> 47) & 1) << 5;
  630. fd |= ((fs >> 55) & 1) << 6;
  631. fd |= ((fs >> 63) & 1) << 7;
  632. return fd & 0xff;
  633. }