PageRenderTime 1981ms CodeModel.GetById 35ms RepoModel.GetById 1ms app.codeStats 0ms

/src/netbsd/src/external/gpl3/gcc/dist/gcc/config/xtensa/ieee754-sf.S

https://bitbucket.org/killerpenguinassassins/open_distrib_devel
Assembly | 1757 lines | 1690 code | 67 blank | 0 comment | 26 complexity | b0822ccd881752266d7fa4f7ffe9b30e MD5 | raw file
Possible License(s): CC0-1.0, MIT, LGPL-2.0, LGPL-3.0, WTFPL, GPL-2.0, BSD-2-Clause, AGPL-3.0, CC-BY-SA-3.0, MPL-2.0, JSON, BSD-3-Clause-No-Nuclear-License-2014, LGPL-2.1, CPL-1.0, AGPL-1.0, 0BSD, ISC, Apache-2.0, GPL-3.0, IPL-1.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /* IEEE-754 single-precision functions for Xtensa
  2. Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
  3. Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. GCC is distributed in the hope that it will be useful, but WITHOUT
  10. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  12. License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #ifdef __XTENSA_EB__
  21. #define xh a2
  22. #define xl a3
  23. #define yh a4
  24. #define yl a5
  25. #else
  26. #define xh a3
  27. #define xl a2
  28. #define yh a5
  29. #define yl a4
  30. #endif
  31. /* Warning! The branch displacements for some Xtensa branch instructions
  32. are quite small, and this code has been carefully laid out to keep
  33. branch targets in range. If you change anything, be sure to check that
  34. the assembler is not relaxing anything to branch over a jump. */
  35. #ifdef L_negsf2
  36. .align 4
  37. .global __negsf2
  38. .type __negsf2, @function
  39. __negsf2:
  40. leaf_entry sp, 16
  41. movi a4, 0x80000000
  42. xor a2, a2, a4
  43. leaf_return
  44. #endif /* L_negsf2 */
  45. #ifdef L_addsubsf3
  46. /* Addition */
  47. __addsf3_aux:
  48. /* Handle NaNs and Infinities. (This code is placed before the
  49. start of the function just to keep it in range of the limited
  50. branch displacements.) */
  51. .Ladd_xnan_or_inf:
  52. /* If y is neither Infinity nor NaN, return x. */
  53. bnall a3, a6, 1f
  54. /* If x is a NaN, return it. Otherwise, return y. */
  55. slli a7, a2, 9
  56. beqz a7, .Ladd_ynan_or_inf
  57. 1: leaf_return
  58. .Ladd_ynan_or_inf:
  59. /* Return y. */
  60. mov a2, a3
  61. leaf_return
  62. .Ladd_opposite_signs:
  63. /* Operand signs differ. Do a subtraction. */
  64. slli a7, a6, 8
  65. xor a3, a3, a7
  66. j .Lsub_same_sign
  67. .align 4
  68. .global __addsf3
  69. .type __addsf3, @function
  70. __addsf3:
  71. leaf_entry sp, 16
  72. movi a6, 0x7f800000
  73. /* Check if the two operands have the same sign. */
  74. xor a7, a2, a3
  75. bltz a7, .Ladd_opposite_signs
  76. .Ladd_same_sign:
  77. /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
  78. ball a2, a6, .Ladd_xnan_or_inf
  79. ball a3, a6, .Ladd_ynan_or_inf
  80. /* Compare the exponents. The smaller operand will be shifted
  81. right by the exponent difference and added to the larger
  82. one. */
  83. extui a7, a2, 23, 9
  84. extui a8, a3, 23, 9
  85. bltu a7, a8, .Ladd_shiftx
  86. .Ladd_shifty:
  87. /* Check if the smaller (or equal) exponent is zero. */
  88. bnone a3, a6, .Ladd_yexpzero
  89. /* Replace y sign/exponent with 0x008. */
  90. or a3, a3, a6
  91. slli a3, a3, 8
  92. srli a3, a3, 8
  93. .Ladd_yexpdiff:
  94. /* Compute the exponent difference. */
  95. sub a10, a7, a8
  96. /* Exponent difference > 32 -- just return the bigger value. */
  97. bgeui a10, 32, 1f
  98. /* Shift y right by the exponent difference. Any bits that are
  99. shifted out of y are saved in a9 for rounding the result. */
  100. ssr a10
  101. movi a9, 0
  102. src a9, a3, a9
  103. srl a3, a3
  104. /* Do the addition. */
  105. add a2, a2, a3
  106. /* Check if the add overflowed into the exponent. */
  107. extui a10, a2, 23, 9
  108. beq a10, a7, .Ladd_round
  109. mov a8, a7
  110. j .Ladd_carry
  111. .Ladd_yexpzero:
  112. /* y is a subnormal value. Replace its sign/exponent with zero,
  113. i.e., no implicit "1.0", and increment the apparent exponent
  114. because subnormals behave as if they had the minimum (nonzero)
  115. exponent. Test for the case when both exponents are zero. */
  116. slli a3, a3, 9
  117. srli a3, a3, 9
  118. bnone a2, a6, .Ladd_bothexpzero
  119. addi a8, a8, 1
  120. j .Ladd_yexpdiff
  121. .Ladd_bothexpzero:
  122. /* Both exponents are zero. Handle this as a special case. There
  123. is no need to shift or round, and the normal code for handling
  124. a carry into the exponent field will not work because it
  125. assumes there is an implicit "1.0" that needs to be added. */
  126. add a2, a2, a3
  127. 1: leaf_return
  128. .Ladd_xexpzero:
  129. /* Same as "yexpzero" except skip handling the case when both
  130. exponents are zero. */
  131. slli a2, a2, 9
  132. srli a2, a2, 9
  133. addi a7, a7, 1
  134. j .Ladd_xexpdiff
  135. .Ladd_shiftx:
  136. /* Same thing as the "shifty" code, but with x and y swapped. Also,
  137. because the exponent difference is always nonzero in this version,
  138. the shift sequence can use SLL and skip loading a constant zero. */
  139. bnone a2, a6, .Ladd_xexpzero
  140. or a2, a2, a6
  141. slli a2, a2, 8
  142. srli a2, a2, 8
  143. .Ladd_xexpdiff:
  144. sub a10, a8, a7
  145. bgeui a10, 32, .Ladd_returny
  146. ssr a10
  147. sll a9, a2
  148. srl a2, a2
  149. add a2, a2, a3
  150. /* Check if the add overflowed into the exponent. */
  151. extui a10, a2, 23, 9
  152. bne a10, a8, .Ladd_carry
  153. .Ladd_round:
  154. /* Round up if the leftover fraction is >= 1/2. */
  155. bgez a9, 1f
  156. addi a2, a2, 1
  157. /* Check if the leftover fraction is exactly 1/2. */
  158. slli a9, a9, 1
  159. beqz a9, .Ladd_exactlyhalf
  160. 1: leaf_return
  161. .Ladd_returny:
  162. mov a2, a3
  163. leaf_return
  164. .Ladd_carry:
  165. /* The addition has overflowed into the exponent field, so the
  166. value needs to be renormalized. The mantissa of the result
  167. can be recovered by subtracting the original exponent and
  168. adding 0x800000 (which is the explicit "1.0" for the
  169. mantissa of the non-shifted operand -- the "1.0" for the
  170. shifted operand was already added). The mantissa can then
  171. be shifted right by one bit. The explicit "1.0" of the
  172. shifted mantissa then needs to be replaced by the exponent,
  173. incremented by one to account for the normalizing shift.
  174. It is faster to combine these operations: do the shift first
  175. and combine the additions and subtractions. If x is the
  176. original exponent, the result is:
  177. shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
  178. or:
  179. shifted mantissa + ((x + 1) << 22)
  180. Note that the exponent is incremented here by leaving the
  181. explicit "1.0" of the mantissa in the exponent field. */
  182. /* Shift x right by one bit. Save the lsb. */
  183. mov a10, a2
  184. srli a2, a2, 1
  185. /* See explanation above. The original exponent is in a8. */
  186. addi a8, a8, 1
  187. slli a8, a8, 22
  188. add a2, a2, a8
  189. /* Return an Infinity if the exponent overflowed. */
  190. ball a2, a6, .Ladd_infinity
  191. /* Same thing as the "round" code except the msb of the leftover
  192. fraction is bit 0 of a10, with the rest of the fraction in a9. */
  193. bbci.l a10, 0, 1f
  194. addi a2, a2, 1
  195. beqz a9, .Ladd_exactlyhalf
  196. 1: leaf_return
  197. .Ladd_infinity:
  198. /* Clear the mantissa. */
  199. srli a2, a2, 23
  200. slli a2, a2, 23
  201. /* The sign bit may have been lost in a carry-out. Put it back. */
  202. slli a8, a8, 1
  203. or a2, a2, a8
  204. leaf_return
  205. .Ladd_exactlyhalf:
  206. /* Round down to the nearest even value. */
  207. srli a2, a2, 1
  208. slli a2, a2, 1
  209. leaf_return
  210. /* Subtraction */
  211. __subsf3_aux:
  212. /* Handle NaNs and Infinities. (This code is placed before the
  213. start of the function just to keep it in range of the limited
  214. branch displacements.) */
  215. .Lsub_xnan_or_inf:
  216. /* If y is neither Infinity nor NaN, return x. */
  217. bnall a3, a6, 1f
  218. /* Both x and y are either NaN or Inf, so the result is NaN. */
  219. movi a4, 0x400000 /* make it a quiet NaN */
  220. or a2, a2, a4
  221. 1: leaf_return
  222. .Lsub_ynan_or_inf:
  223. /* Negate y and return it. */
  224. slli a7, a6, 8
  225. xor a2, a3, a7
  226. leaf_return
  227. .Lsub_opposite_signs:
  228. /* Operand signs differ. Do an addition. */
  229. slli a7, a6, 8
  230. xor a3, a3, a7
  231. j .Ladd_same_sign
  232. .align 4
  233. .global __subsf3
  234. .type __subsf3, @function
  235. __subsf3:
  236. leaf_entry sp, 16
  237. movi a6, 0x7f800000
  238. /* Check if the two operands have the same sign. */
  239. xor a7, a2, a3
  240. bltz a7, .Lsub_opposite_signs
  241. .Lsub_same_sign:
  242. /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
  243. ball a2, a6, .Lsub_xnan_or_inf
  244. ball a3, a6, .Lsub_ynan_or_inf
  245. /* Compare the operands. In contrast to addition, the entire
  246. value matters here. */
  247. extui a7, a2, 23, 8
  248. extui a8, a3, 23, 8
  249. bltu a2, a3, .Lsub_xsmaller
  250. .Lsub_ysmaller:
  251. /* Check if the smaller (or equal) exponent is zero. */
  252. bnone a3, a6, .Lsub_yexpzero
  253. /* Replace y sign/exponent with 0x008. */
  254. or a3, a3, a6
  255. slli a3, a3, 8
  256. srli a3, a3, 8
  257. .Lsub_yexpdiff:
  258. /* Compute the exponent difference. */
  259. sub a10, a7, a8
  260. /* Exponent difference > 32 -- just return the bigger value. */
  261. bgeui a10, 32, 1f
  262. /* Shift y right by the exponent difference. Any bits that are
  263. shifted out of y are saved in a9 for rounding the result. */
  264. ssr a10
  265. movi a9, 0
  266. src a9, a3, a9
  267. srl a3, a3
  268. sub a2, a2, a3
  269. /* Subtract the leftover bits in a9 from zero and propagate any
  270. borrow from a2. */
  271. neg a9, a9
  272. addi a10, a2, -1
  273. movnez a2, a10, a9
  274. /* Check if the subtract underflowed into the exponent. */
  275. extui a10, a2, 23, 8
  276. beq a10, a7, .Lsub_round
  277. j .Lsub_borrow
  278. .Lsub_yexpzero:
  279. /* Return zero if the inputs are equal. (For the non-subnormal
  280. case, subtracting the "1.0" will cause a borrow from the exponent
  281. and this case can be detected when handling the borrow.) */
  282. beq a2, a3, .Lsub_return_zero
  283. /* y is a subnormal value. Replace its sign/exponent with zero,
  284. i.e., no implicit "1.0". Unless x is also a subnormal, increment
  285. y's apparent exponent because subnormals behave as if they had
  286. the minimum (nonzero) exponent. */
  287. slli a3, a3, 9
  288. srli a3, a3, 9
  289. bnone a2, a6, .Lsub_yexpdiff
  290. addi a8, a8, 1
  291. j .Lsub_yexpdiff
  292. .Lsub_returny:
  293. /* Negate and return y. */
  294. slli a7, a6, 8
  295. xor a2, a3, a7
  296. 1: leaf_return
  297. .Lsub_xsmaller:
  298. /* Same thing as the "ysmaller" code, but with x and y swapped and
  299. with y negated. */
  300. bnone a2, a6, .Lsub_xexpzero
  301. or a2, a2, a6
  302. slli a2, a2, 8
  303. srli a2, a2, 8
  304. .Lsub_xexpdiff:
  305. sub a10, a8, a7
  306. bgeui a10, 32, .Lsub_returny
  307. ssr a10
  308. movi a9, 0
  309. src a9, a2, a9
  310. srl a2, a2
  311. /* Negate y. */
  312. slli a11, a6, 8
  313. xor a3, a3, a11
  314. sub a2, a3, a2
  315. neg a9, a9
  316. addi a10, a2, -1
  317. movnez a2, a10, a9
  318. /* Check if the subtract underflowed into the exponent. */
  319. extui a10, a2, 23, 8
  320. bne a10, a8, .Lsub_borrow
  321. .Lsub_round:
  322. /* Round up if the leftover fraction is >= 1/2. */
  323. bgez a9, 1f
  324. addi a2, a2, 1
  325. /* Check if the leftover fraction is exactly 1/2. */
  326. slli a9, a9, 1
  327. beqz a9, .Lsub_exactlyhalf
  328. 1: leaf_return
  329. .Lsub_xexpzero:
  330. /* Same as "yexpzero". */
  331. beq a2, a3, .Lsub_return_zero
  332. slli a2, a2, 9
  333. srli a2, a2, 9
  334. bnone a3, a6, .Lsub_xexpdiff
  335. addi a7, a7, 1
  336. j .Lsub_xexpdiff
  337. .Lsub_return_zero:
  338. movi a2, 0
  339. leaf_return
  340. .Lsub_borrow:
  341. /* The subtraction has underflowed into the exponent field, so the
  342. value needs to be renormalized. Shift the mantissa left as
  343. needed to remove any leading zeros and adjust the exponent
  344. accordingly. If the exponent is not large enough to remove
  345. all the leading zeros, the result will be a subnormal value. */
  346. slli a8, a2, 9
  347. beqz a8, .Lsub_xzero
  348. do_nsau a6, a8, a7, a11
  349. srli a8, a8, 9
  350. bge a6, a10, .Lsub_subnormal
  351. addi a6, a6, 1
  352. .Lsub_normalize_shift:
  353. /* Shift the mantissa (a8/a9) left by a6. */
  354. ssl a6
  355. src a8, a8, a9
  356. sll a9, a9
  357. /* Combine the shifted mantissa with the sign and exponent,
  358. decrementing the exponent by a6. (The exponent has already
  359. been decremented by one due to the borrow from the subtraction,
  360. but adding the mantissa will increment the exponent by one.) */
  361. srli a2, a2, 23
  362. sub a2, a2, a6
  363. slli a2, a2, 23
  364. add a2, a2, a8
  365. j .Lsub_round
  366. .Lsub_exactlyhalf:
  367. /* Round down to the nearest even value. */
  368. srli a2, a2, 1
  369. slli a2, a2, 1
  370. leaf_return
  371. .Lsub_xzero:
  372. /* If there was a borrow from the exponent, and the mantissa and
  373. guard digits are all zero, then the inputs were equal and the
  374. result should be zero. */
  375. beqz a9, .Lsub_return_zero
  376. /* Only the guard digit is nonzero. Shift by min(24, a10). */
  377. addi a11, a10, -24
  378. movi a6, 24
  379. movltz a6, a10, a11
  380. j .Lsub_normalize_shift
  381. .Lsub_subnormal:
  382. /* The exponent is too small to shift away all the leading zeros.
  383. Set a6 to the current exponent (which has already been
  384. decremented by the borrow) so that the exponent of the result
  385. will be zero. Do not add 1 to a6 in this case, because: (1)
  386. adding the mantissa will not increment the exponent, so there is
  387. no need to subtract anything extra from the exponent to
  388. compensate, and (2) the effective exponent of a subnormal is 1
  389. not 0 so the shift amount must be 1 smaller than normal. */
  390. mov a6, a10
  391. j .Lsub_normalize_shift
  392. #endif /* L_addsubsf3 */
  393. #ifdef L_mulsf3
  394. /* Multiplication */
  395. #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
  396. #define XCHAL_NO_MUL 1
  397. #endif
  398. __mulsf3_aux:
  399. /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
  400. (This code is placed before the start of the function just to
  401. keep it in range of the limited branch displacements.) */
  402. .Lmul_xexpzero:
  403. /* Clear the sign bit of x. */
  404. slli a2, a2, 1
  405. srli a2, a2, 1
  406. /* If x is zero, return zero. */
  407. beqz a2, .Lmul_return_zero
  408. /* Normalize x. Adjust the exponent in a8. */
  409. do_nsau a10, a2, a11, a12
  410. addi a10, a10, -8
  411. ssl a10
  412. sll a2, a2
  413. movi a8, 1
  414. sub a8, a8, a10
  415. j .Lmul_xnormalized
  416. .Lmul_yexpzero:
  417. /* Clear the sign bit of y. */
  418. slli a3, a3, 1
  419. srli a3, a3, 1
  420. /* If y is zero, return zero. */
  421. beqz a3, .Lmul_return_zero
  422. /* Normalize y. Adjust the exponent in a9. */
  423. do_nsau a10, a3, a11, a12
  424. addi a10, a10, -8
  425. ssl a10
  426. sll a3, a3
  427. movi a9, 1
  428. sub a9, a9, a10
  429. j .Lmul_ynormalized
  430. .Lmul_return_zero:
  431. /* Return zero with the appropriate sign bit. */
  432. srli a2, a7, 31
  433. slli a2, a2, 31
  434. j .Lmul_done
  435. .Lmul_xnan_or_inf:
  436. /* If y is zero, return NaN. */
  437. slli a8, a3, 1
  438. bnez a8, 1f
  439. movi a4, 0x400000 /* make it a quiet NaN */
  440. or a2, a2, a4
  441. j .Lmul_done
  442. 1:
  443. /* If y is NaN, return y. */
  444. bnall a3, a6, .Lmul_returnx
  445. slli a8, a3, 9
  446. beqz a8, .Lmul_returnx
  447. .Lmul_returny:
  448. mov a2, a3
  449. .Lmul_returnx:
  450. /* Set the sign bit and return. */
  451. extui a7, a7, 31, 1
  452. slli a2, a2, 1
  453. ssai 1
  454. src a2, a7, a2
  455. j .Lmul_done
  456. .Lmul_ynan_or_inf:
  457. /* If x is zero, return NaN. */
  458. slli a8, a2, 1
  459. bnez a8, .Lmul_returny
  460. movi a7, 0x400000 /* make it a quiet NaN */
  461. or a2, a3, a7
  462. j .Lmul_done
  463. .align 4
  464. .global __mulsf3
  465. .type __mulsf3, @function
  466. __mulsf3:
  467. #if __XTENSA_CALL0_ABI__
  468. leaf_entry sp, 32
  469. addi sp, sp, -32
  470. s32i a12, sp, 16
  471. s32i a13, sp, 20
  472. s32i a14, sp, 24
  473. s32i a15, sp, 28
  474. #elif XCHAL_NO_MUL
  475. /* This is not really a leaf function; allocate enough stack space
  476. to allow CALL12s to a helper function. */
  477. leaf_entry sp, 64
  478. #else
  479. leaf_entry sp, 32
  480. #endif
  481. movi a6, 0x7f800000
  482. /* Get the sign of the result. */
  483. xor a7, a2, a3
  484. /* Check for NaN and infinity. */
  485. ball a2, a6, .Lmul_xnan_or_inf
  486. ball a3, a6, .Lmul_ynan_or_inf
  487. /* Extract the exponents. */
  488. extui a8, a2, 23, 8
  489. extui a9, a3, 23, 8
  490. beqz a8, .Lmul_xexpzero
  491. .Lmul_xnormalized:
  492. beqz a9, .Lmul_yexpzero
  493. .Lmul_ynormalized:
  494. /* Add the exponents. */
  495. add a8, a8, a9
  496. /* Replace sign/exponent fields with explicit "1.0". */
  497. movi a10, 0xffffff
  498. or a2, a2, a6
  499. and a2, a2, a10
  500. or a3, a3, a6
  501. and a3, a3, a10
  502. /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
  503. #if XCHAL_HAVE_MUL32_HIGH
  504. mull a6, a2, a3
  505. muluh a2, a2, a3
  506. #else
  507. /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
  508. products. These partial products are:
  509. 0 xl * yl
  510. 1 xl * yh
  511. 2 xh * yl
  512. 3 xh * yh
  513. If using the Mul16 or Mul32 multiplier options, these input
  514. chunks must be stored in separate registers. For Mac16, the
  515. UMUL.AA.* opcodes can specify that the inputs come from either
  516. half of the registers, so there is no need to shift them out
  517. ahead of time. If there is no multiply hardware, the 16-bit
  518. chunks can be extracted when setting up the arguments to the
  519. separate multiply function. */
  520. #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
  521. /* Calling a separate multiply function will clobber a0 and requires
  522. use of a8 as a temporary, so save those values now. (The function
  523. uses a custom ABI so nothing else needs to be saved.) */
  524. s32i a0, sp, 0
  525. s32i a8, sp, 4
  526. #endif
  527. #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
  528. #define a2h a4
  529. #define a3h a5
  530. /* Get the high halves of the inputs into registers. */
  531. srli a2h, a2, 16
  532. srli a3h, a3, 16
  533. #define a2l a2
  534. #define a3l a3
  535. #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
  536. /* Clear the high halves of the inputs. This does not matter
  537. for MUL16 because the high bits are ignored. */
  538. extui a2, a2, 0, 16
  539. extui a3, a3, 0, 16
  540. #endif
  541. #endif /* MUL16 || MUL32 */
  542. #if XCHAL_HAVE_MUL16
  543. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  544. mul16u dst, xreg ## xhalf, yreg ## yhalf
  545. #elif XCHAL_HAVE_MUL32
  546. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  547. mull dst, xreg ## xhalf, yreg ## yhalf
  548. #elif XCHAL_HAVE_MAC16
  549. /* The preprocessor insists on inserting a space when concatenating after
  550. a period in the definition of do_mul below. These macros are a workaround
  551. using underscores instead of periods when doing the concatenation. */
  552. #define umul_aa_ll umul.aa.ll
  553. #define umul_aa_lh umul.aa.lh
  554. #define umul_aa_hl umul.aa.hl
  555. #define umul_aa_hh umul.aa.hh
  556. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  557. umul_aa_ ## xhalf ## yhalf xreg, yreg; \
  558. rsr dst, ACCLO
  559. #else /* no multiply hardware */
  560. #define set_arg_l(dst, src) \
  561. extui dst, src, 0, 16
  562. #define set_arg_h(dst, src) \
  563. srli dst, src, 16
  564. #if __XTENSA_CALL0_ABI__
  565. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  566. set_arg_ ## xhalf (a13, xreg); \
  567. set_arg_ ## yhalf (a14, yreg); \
  568. call0 .Lmul_mulsi3; \
  569. mov dst, a12
  570. #else
  571. #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
  572. set_arg_ ## xhalf (a14, xreg); \
  573. set_arg_ ## yhalf (a15, yreg); \
  574. call12 .Lmul_mulsi3; \
  575. mov dst, a14
  576. #endif /* __XTENSA_CALL0_ABI__ */
  577. #endif /* no multiply hardware */
  578. /* Add pp1 and pp2 into a6 with carry-out in a9. */
  579. do_mul(a6, a2, l, a3, h) /* pp 1 */
  580. do_mul(a11, a2, h, a3, l) /* pp 2 */
  581. movi a9, 0
  582. add a6, a6, a11
  583. bgeu a6, a11, 1f
  584. addi a9, a9, 1
  585. 1:
  586. /* Shift the high half of a9/a6 into position in a9. Note that
  587. this value can be safely incremented without any carry-outs. */
  588. ssai 16
  589. src a9, a9, a6
  590. /* Compute the low word into a6. */
  591. do_mul(a11, a2, l, a3, l) /* pp 0 */
  592. sll a6, a6
  593. add a6, a6, a11
  594. bgeu a6, a11, 1f
  595. addi a9, a9, 1
  596. 1:
  597. /* Compute the high word into a2. */
  598. do_mul(a2, a2, h, a3, h) /* pp 3 */
  599. add a2, a2, a9
  600. #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
  601. /* Restore values saved on the stack during the multiplication. */
  602. l32i a0, sp, 0
  603. l32i a8, sp, 4
  604. #endif
  605. #endif /* ! XCHAL_HAVE_MUL32_HIGH */
  606. /* Shift left by 9 bits, unless there was a carry-out from the
  607. multiply, in which case, shift by 8 bits and increment the
  608. exponent. */
  609. movi a4, 9
  610. srli a5, a2, 24 - 9
  611. beqz a5, 1f
  612. addi a4, a4, -1
  613. addi a8, a8, 1
  614. 1: ssl a4
  615. src a2, a2, a6
  616. sll a6, a6
  617. /* Subtract the extra bias from the exponent sum (plus one to account
  618. for the explicit "1.0" of the mantissa that will be added to the
  619. exponent in the final result). */
  620. movi a4, 0x80
  621. sub a8, a8, a4
  622. /* Check for over/underflow. The value in a8 is one less than the
  623. final exponent, so values in the range 0..fd are OK here. */
  624. movi a4, 0xfe
  625. bgeu a8, a4, .Lmul_overflow
  626. .Lmul_round:
  627. /* Round. */
  628. bgez a6, .Lmul_rounded
  629. addi a2, a2, 1
  630. slli a6, a6, 1
  631. beqz a6, .Lmul_exactlyhalf
  632. .Lmul_rounded:
  633. /* Add the exponent to the mantissa. */
  634. slli a8, a8, 23
  635. add a2, a2, a8
  636. .Lmul_addsign:
  637. /* Add the sign bit. */
  638. srli a7, a7, 31
  639. slli a7, a7, 31
  640. or a2, a2, a7
  641. .Lmul_done:
  642. #if __XTENSA_CALL0_ABI__
  643. l32i a12, sp, 16
  644. l32i a13, sp, 20
  645. l32i a14, sp, 24
  646. l32i a15, sp, 28
  647. addi sp, sp, 32
  648. #endif
  649. leaf_return
  650. .Lmul_exactlyhalf:
  651. /* Round down to the nearest even value. */
  652. srli a2, a2, 1
  653. slli a2, a2, 1
  654. j .Lmul_rounded
  655. .Lmul_overflow:
  656. bltz a8, .Lmul_underflow
  657. /* Return +/- Infinity. */
  658. movi a8, 0xff
  659. slli a2, a8, 23
  660. j .Lmul_addsign
  661. .Lmul_underflow:
  662. /* Create a subnormal value, where the exponent field contains zero,
  663. but the effective exponent is 1. The value of a8 is one less than
  664. the actual exponent, so just negate it to get the shift amount. */
  665. neg a8, a8
  666. mov a9, a6
  667. ssr a8
  668. bgeui a8, 32, .Lmul_flush_to_zero
  669. /* Shift a2 right. Any bits that are shifted out of a2 are saved
  670. in a6 (combined with the shifted-out bits currently in a6) for
  671. rounding the result. */
  672. sll a6, a2
  673. srl a2, a2
  674. /* Set the exponent to zero. */
  675. movi a8, 0
  676. /* Pack any nonzero bits shifted out into a6. */
  677. beqz a9, .Lmul_round
  678. movi a9, 1
  679. or a6, a6, a9
  680. j .Lmul_round
  681. .Lmul_flush_to_zero:
  682. /* Return zero with the appropriate sign bit. */
  683. srli a2, a7, 31
  684. slli a2, a2, 31
  685. j .Lmul_done
  686. #if XCHAL_NO_MUL
  687. /* For Xtensa processors with no multiply hardware, this simplified
  688. version of _mulsi3 is used for multiplying 16-bit chunks of
  689. the floating-point mantissas. When using CALL0, this function
  690. uses a custom ABI: the inputs are passed in a13 and a14, the
  691. result is returned in a12, and a8 and a15 are clobbered. */
  692. .align 4
  693. .Lmul_mulsi3:
  694. leaf_entry sp, 16
  695. .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
  696. movi \dst, 0
  697. 1: add \tmp1, \src2, \dst
  698. extui \tmp2, \src1, 0, 1
  699. movnez \dst, \tmp1, \tmp2
  700. do_addx2 \tmp1, \src2, \dst, \tmp1
  701. extui \tmp2, \src1, 1, 1
  702. movnez \dst, \tmp1, \tmp2
  703. do_addx4 \tmp1, \src2, \dst, \tmp1
  704. extui \tmp2, \src1, 2, 1
  705. movnez \dst, \tmp1, \tmp2
  706. do_addx8 \tmp1, \src2, \dst, \tmp1
  707. extui \tmp2, \src1, 3, 1
  708. movnez \dst, \tmp1, \tmp2
  709. srli \src1, \src1, 4
  710. slli \src2, \src2, 4
  711. bnez \src1, 1b
  712. .endm
  713. #if __XTENSA_CALL0_ABI__
  714. mul_mulsi3_body a12, a13, a14, a15, a8
  715. #else
  716. /* The result will be written into a2, so save that argument in a4. */
  717. mov a4, a2
  718. mul_mulsi3_body a2, a4, a3, a5, a6
  719. #endif
  720. leaf_return
  721. #endif /* XCHAL_NO_MUL */
  722. #endif /* L_mulsf3 */
  723. #ifdef L_divsf3
  724. /* Division */
  725. __divsf3_aux:
  726. /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
  727. (This code is placed before the start of the function just to
  728. keep it in range of the limited branch displacements.) */
  729. .Ldiv_yexpzero:
  730. /* Clear the sign bit of y. */
  731. slli a3, a3, 1
  732. srli a3, a3, 1
  733. /* Check for division by zero. */
  734. beqz a3, .Ldiv_yzero
  735. /* Normalize y. Adjust the exponent in a9. */
  736. do_nsau a10, a3, a4, a5
  737. addi a10, a10, -8
  738. ssl a10
  739. sll a3, a3
  740. movi a9, 1
  741. sub a9, a9, a10
  742. j .Ldiv_ynormalized
  743. .Ldiv_yzero:
  744. /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
  745. slli a4, a2, 1
  746. srli a4, a4, 1
  747. srli a2, a7, 31
  748. slli a2, a2, 31
  749. or a2, a2, a6
  750. bnez a4, 1f
  751. movi a4, 0x400000 /* make it a quiet NaN */
  752. or a2, a2, a4
  753. 1: leaf_return
  754. .Ldiv_xexpzero:
  755. /* Clear the sign bit of x. */
  756. slli a2, a2, 1
  757. srli a2, a2, 1
  758. /* If x is zero, return zero. */
  759. beqz a2, .Ldiv_return_zero
  760. /* Normalize x. Adjust the exponent in a8. */
  761. do_nsau a10, a2, a4, a5
  762. addi a10, a10, -8
  763. ssl a10
  764. sll a2, a2
  765. movi a8, 1
  766. sub a8, a8, a10
  767. j .Ldiv_xnormalized
  768. .Ldiv_return_zero:
  769. /* Return zero with the appropriate sign bit. */
  770. srli a2, a7, 31
  771. slli a2, a2, 31
  772. leaf_return
  773. .Ldiv_xnan_or_inf:
  774. /* Set the sign bit of the result. */
  775. srli a7, a3, 31
  776. slli a7, a7, 31
  777. xor a2, a2, a7
  778. /* If y is NaN or Inf, return NaN. */
  779. bnall a3, a6, 1f
  780. movi a4, 0x400000 /* make it a quiet NaN */
  781. or a2, a2, a4
  782. 1: leaf_return
  783. .Ldiv_ynan_or_inf:
  784. /* If y is Infinity, return zero. */
  785. slli a8, a3, 9
  786. beqz a8, .Ldiv_return_zero
  787. /* y is NaN; return it. */
  788. mov a2, a3
  789. leaf_return
  790. .align 4
  791. .global __divsf3
  792. .type __divsf3, @function
  793. __divsf3:
  794. leaf_entry sp, 16
  795. movi a6, 0x7f800000
  796. /* Get the sign of the result. */
  797. xor a7, a2, a3
  798. /* Check for NaN and infinity. */
  799. ball a2, a6, .Ldiv_xnan_or_inf
  800. ball a3, a6, .Ldiv_ynan_or_inf
  801. /* Extract the exponents. */
  802. extui a8, a2, 23, 8
  803. extui a9, a3, 23, 8
  804. beqz a9, .Ldiv_yexpzero
  805. .Ldiv_ynormalized:
  806. beqz a8, .Ldiv_xexpzero
  807. .Ldiv_xnormalized:
  808. /* Subtract the exponents. */
  809. sub a8, a8, a9
  810. /* Replace sign/exponent fields with explicit "1.0". */
  811. movi a10, 0xffffff
  812. or a2, a2, a6
  813. and a2, a2, a10
  814. or a3, a3, a6
  815. and a3, a3, a10
  816. /* The first digit of the mantissa division must be a one.
  817. Shift x (and adjust the exponent) as needed to make this true. */
  818. bltu a3, a2, 1f
  819. slli a2, a2, 1
  820. addi a8, a8, -1
  821. 1:
  822. /* Do the first subtraction and shift. */
  823. sub a2, a2, a3
  824. slli a2, a2, 1
  825. /* Put the quotient into a10. */
  826. movi a10, 1
  827. /* Divide one bit at a time for 23 bits. */
  828. movi a9, 23
  829. #if XCHAL_HAVE_LOOPS
  830. loop a9, .Ldiv_loopend
  831. #endif
  832. .Ldiv_loop:
  833. /* Shift the quotient << 1. */
  834. slli a10, a10, 1
  835. /* Is this digit a 0 or 1? */
  836. bltu a2, a3, 1f
  837. /* Output a 1 and subtract. */
  838. addi a10, a10, 1
  839. sub a2, a2, a3
  840. /* Shift the dividend << 1. */
  841. 1: slli a2, a2, 1
  842. #if !XCHAL_HAVE_LOOPS
  843. addi a9, a9, -1
  844. bnez a9, .Ldiv_loop
  845. #endif
  846. .Ldiv_loopend:
  847. /* Add the exponent bias (less one to account for the explicit "1.0"
  848. of the mantissa that will be added to the exponent in the final
  849. result). */
  850. addi a8, a8, 0x7e
  851. /* Check for over/underflow. The value in a8 is one less than the
  852. final exponent, so values in the range 0..fd are OK here. */
  853. movi a4, 0xfe
  854. bgeu a8, a4, .Ldiv_overflow
  855. .Ldiv_round:
  856. /* Round. The remainder (<< 1) is in a2. */
  857. bltu a2, a3, .Ldiv_rounded
  858. addi a10, a10, 1
  859. beq a2, a3, .Ldiv_exactlyhalf
  860. .Ldiv_rounded:
  861. /* Add the exponent to the mantissa. */
  862. slli a8, a8, 23
  863. add a2, a10, a8
  864. .Ldiv_addsign:
  865. /* Add the sign bit. */
  866. srli a7, a7, 31
  867. slli a7, a7, 31
  868. or a2, a2, a7
  869. leaf_return
  870. .Ldiv_overflow:
  871. bltz a8, .Ldiv_underflow
  872. /* Return +/- Infinity. */
  873. addi a8, a4, 1 /* 0xff */
  874. slli a2, a8, 23
  875. j .Ldiv_addsign
  876. .Ldiv_exactlyhalf:
  877. /* Remainder is exactly half the divisor. Round even. */
  878. srli a10, a10, 1
  879. slli a10, a10, 1
  880. j .Ldiv_rounded
  881. .Ldiv_underflow:
  882. /* Create a subnormal value, where the exponent field contains zero,
  883. but the effective exponent is 1. The value of a8 is one less than
  884. the actual exponent, so just negate it to get the shift amount. */
  885. neg a8, a8
  886. ssr a8
  887. bgeui a8, 32, .Ldiv_flush_to_zero
  888. /* Shift a10 right. Any bits that are shifted out of a10 are
  889. saved in a6 for rounding the result. */
  890. sll a6, a10
  891. srl a10, a10
  892. /* Set the exponent to zero. */
  893. movi a8, 0
  894. /* Pack any nonzero remainder (in a2) into a6. */
  895. beqz a2, 1f
  896. movi a9, 1
  897. or a6, a6, a9
  898. /* Round a10 based on the bits shifted out into a6. */
  899. 1: bgez a6, .Ldiv_rounded
  900. addi a10, a10, 1
  901. slli a6, a6, 1
  902. bnez a6, .Ldiv_rounded
  903. srli a10, a10, 1
  904. slli a10, a10, 1
  905. j .Ldiv_rounded
  906. .Ldiv_flush_to_zero:
  907. /* Return zero with the appropriate sign bit. */
  908. srli a2, a7, 31
  909. slli a2, a2, 31
  910. leaf_return
  911. #endif /* L_divsf3 */
  912. #ifdef L_cmpsf2
  913. /* Equal and Not Equal */
  914. .align 4
  915. .global __eqsf2
  916. .global __nesf2
  917. .set __nesf2, __eqsf2
  918. .type __eqsf2, @function
  919. __eqsf2:
  920. leaf_entry sp, 16
  921. bne a2, a3, 4f
  922. /* The values are equal but NaN != NaN. Check the exponent. */
  923. movi a6, 0x7f800000
  924. ball a2, a6, 3f
  925. /* Equal. */
  926. movi a2, 0
  927. leaf_return
  928. /* Not equal. */
  929. 2: movi a2, 1
  930. leaf_return
  931. /* Check if the mantissas are nonzero. */
  932. 3: slli a7, a2, 9
  933. j 5f
  934. /* Check if x and y are zero with different signs. */
  935. 4: or a7, a2, a3
  936. slli a7, a7, 1
  937. /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
  938. or x when exponent(x) = 0x7f8 and x == y. */
  939. 5: movi a2, 0
  940. movi a3, 1
  941. movnez a2, a3, a7
  942. leaf_return
  943. /* Greater Than */
  944. .align 4
  945. .global __gtsf2
  946. .type __gtsf2, @function
  947. __gtsf2:
  948. leaf_entry sp, 16
  949. movi a6, 0x7f800000
  950. ball a2, a6, 2f
  951. 1: bnall a3, a6, .Lle_cmp
  952. /* Check if y is a NaN. */
  953. slli a7, a3, 9
  954. beqz a7, .Lle_cmp
  955. movi a2, 0
  956. leaf_return
  957. /* Check if x is a NaN. */
  958. 2: slli a7, a2, 9
  959. beqz a7, 1b
  960. movi a2, 0
  961. leaf_return
  962. /* Less Than or Equal */
  963. .align 4
  964. .global __lesf2
  965. .type __lesf2, @function
  966. __lesf2:
  967. leaf_entry sp, 16
  968. movi a6, 0x7f800000
  969. ball a2, a6, 2f
  970. 1: bnall a3, a6, .Lle_cmp
  971. /* Check if y is a NaN. */
  972. slli a7, a3, 9
  973. beqz a7, .Lle_cmp
  974. movi a2, 1
  975. leaf_return
  976. /* Check if x is a NaN. */
  977. 2: slli a7, a2, 9
  978. beqz a7, 1b
  979. movi a2, 1
  980. leaf_return
  981. .Lle_cmp:
  982. /* Check if x and y have different signs. */
  983. xor a7, a2, a3
  984. bltz a7, .Lle_diff_signs
  985. /* Check if x is negative. */
  986. bltz a2, .Lle_xneg
  987. /* Check if x <= y. */
  988. bltu a3, a2, 5f
  989. 4: movi a2, 0
  990. leaf_return
  991. .Lle_xneg:
  992. /* Check if y <= x. */
  993. bgeu a2, a3, 4b
  994. 5: movi a2, 1
  995. leaf_return
  996. .Lle_diff_signs:
  997. bltz a2, 4b
  998. /* Check if both x and y are zero. */
  999. or a7, a2, a3
  1000. slli a7, a7, 1
  1001. movi a2, 1
  1002. movi a3, 0
  1003. moveqz a2, a3, a7
  1004. leaf_return
  1005. /* Greater Than or Equal */
  1006. .align 4
  1007. .global __gesf2
  1008. .type __gesf2, @function
  1009. __gesf2:
  1010. leaf_entry sp, 16
  1011. movi a6, 0x7f800000
  1012. ball a2, a6, 2f
  1013. 1: bnall a3, a6, .Llt_cmp
  1014. /* Check if y is a NaN. */
  1015. slli a7, a3, 9
  1016. beqz a7, .Llt_cmp
  1017. movi a2, -1
  1018. leaf_return
  1019. /* Check if x is a NaN. */
  1020. 2: slli a7, a2, 9
  1021. beqz a7, 1b
  1022. movi a2, -1
  1023. leaf_return
  1024. /* Less Than */
  1025. .align 4
  1026. .global __ltsf2
  1027. .type __ltsf2, @function
  1028. __ltsf2:
  1029. leaf_entry sp, 16
  1030. movi a6, 0x7f800000
  1031. ball a2, a6, 2f
  1032. 1: bnall a3, a6, .Llt_cmp
  1033. /* Check if y is a NaN. */
  1034. slli a7, a3, 9
  1035. beqz a7, .Llt_cmp
  1036. movi a2, 0
  1037. leaf_return
  1038. /* Check if x is a NaN. */
  1039. 2: slli a7, a2, 9
  1040. beqz a7, 1b
  1041. movi a2, 0
  1042. leaf_return
  1043. .Llt_cmp:
  1044. /* Check if x and y have different signs. */
  1045. xor a7, a2, a3
  1046. bltz a7, .Llt_diff_signs
  1047. /* Check if x is negative. */
  1048. bltz a2, .Llt_xneg
  1049. /* Check if x < y. */
  1050. bgeu a2, a3, 5f
  1051. 4: movi a2, -1
  1052. leaf_return
  1053. .Llt_xneg:
  1054. /* Check if y < x. */
  1055. bltu a3, a2, 4b
  1056. 5: movi a2, 0
  1057. leaf_return
  1058. .Llt_diff_signs:
  1059. bgez a2, 5b
  1060. /* Check if both x and y are nonzero. */
  1061. or a7, a2, a3
  1062. slli a7, a7, 1
  1063. movi a2, 0
  1064. movi a3, -1
  1065. movnez a2, a3, a7
  1066. leaf_return
  1067. /* Unordered */
  1068. .align 4
  1069. .global __unordsf2
  1070. .type __unordsf2, @function
  1071. __unordsf2:
  1072. leaf_entry sp, 16
  1073. movi a6, 0x7f800000
  1074. ball a2, a6, 3f
  1075. 1: ball a3, a6, 4f
  1076. 2: movi a2, 0
  1077. leaf_return
  1078. 3: slli a7, a2, 9
  1079. beqz a7, 1b
  1080. movi a2, 1
  1081. leaf_return
  1082. 4: slli a7, a3, 9
  1083. beqz a7, 2b
  1084. movi a2, 1
  1085. leaf_return
  1086. #endif /* L_cmpsf2 */
  1087. #ifdef L_fixsfsi
  1088. .align 4
  1089. .global __fixsfsi
  1090. .type __fixsfsi, @function
  1091. __fixsfsi:
  1092. leaf_entry sp, 16
  1093. /* Check for NaN and Infinity. */
  1094. movi a6, 0x7f800000
  1095. ball a2, a6, .Lfixsfsi_nan_or_inf
  1096. /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
  1097. extui a4, a2, 23, 8
  1098. addi a4, a4, -0x7e
  1099. bgei a4, 32, .Lfixsfsi_maxint
  1100. blti a4, 1, .Lfixsfsi_zero
  1101. /* Add explicit "1.0" and shift << 8. */
  1102. or a7, a2, a6
  1103. slli a5, a7, 8
  1104. /* Shift back to the right, based on the exponent. */
  1105. ssl a4 /* shift by 32 - a4 */
  1106. srl a5, a5
  1107. /* Negate the result if sign != 0. */
  1108. neg a2, a5
  1109. movgez a2, a5, a7
  1110. leaf_return
  1111. .Lfixsfsi_nan_or_inf:
  1112. /* Handle Infinity and NaN. */
  1113. slli a4, a2, 9
  1114. beqz a4, .Lfixsfsi_maxint
  1115. /* Translate NaN to +maxint. */
  1116. movi a2, 0
  1117. .Lfixsfsi_maxint:
  1118. slli a4, a6, 8 /* 0x80000000 */
  1119. addi a5, a4, -1 /* 0x7fffffff */
  1120. movgez a4, a5, a2
  1121. mov a2, a4
  1122. leaf_return
  1123. .Lfixsfsi_zero:
  1124. movi a2, 0
  1125. leaf_return
  1126. #endif /* L_fixsfsi */
  1127. #ifdef L_fixsfdi
  1128. .align 4
  1129. .global __fixsfdi
  1130. .type __fixsfdi, @function
  1131. __fixsfdi:
  1132. leaf_entry sp, 16
  1133. /* Check for NaN and Infinity. */
  1134. movi a6, 0x7f800000
  1135. ball a2, a6, .Lfixsfdi_nan_or_inf
  1136. /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
  1137. extui a4, a2, 23, 8
  1138. addi a4, a4, -0x7e
  1139. bgei a4, 64, .Lfixsfdi_maxint
  1140. blti a4, 1, .Lfixsfdi_zero
  1141. /* Add explicit "1.0" and shift << 8. */
  1142. or a7, a2, a6
  1143. slli xh, a7, 8
  1144. /* Shift back to the right, based on the exponent. */
  1145. ssl a4 /* shift by 64 - a4 */
  1146. bgei a4, 32, .Lfixsfdi_smallshift
  1147. srl xl, xh
  1148. movi xh, 0
  1149. .Lfixsfdi_shifted:
  1150. /* Negate the result if sign != 0. */
  1151. bgez a7, 1f
  1152. neg xl, xl
  1153. neg xh, xh
  1154. beqz xl, 1f
  1155. addi xh, xh, -1
  1156. 1: leaf_return
  1157. .Lfixsfdi_smallshift:
  1158. movi xl, 0
  1159. sll xl, xh
  1160. srl xh, xh
  1161. j .Lfixsfdi_shifted
  1162. .Lfixsfdi_nan_or_inf:
  1163. /* Handle Infinity and NaN. */
  1164. slli a4, a2, 9
  1165. beqz a4, .Lfixsfdi_maxint
  1166. /* Translate NaN to +maxint. */
  1167. movi a2, 0
  1168. .Lfixsfdi_maxint:
  1169. slli a7, a6, 8 /* 0x80000000 */
  1170. bgez a2, 1f
  1171. mov xh, a7
  1172. movi xl, 0
  1173. leaf_return
  1174. 1: addi xh, a7, -1 /* 0x7fffffff */
  1175. movi xl, -1
  1176. leaf_return
  1177. .Lfixsfdi_zero:
  1178. movi xh, 0
  1179. movi xl, 0
  1180. leaf_return
  1181. #endif /* L_fixsfdi */
  1182. #ifdef L_fixunssfsi
  1183. .align 4
  1184. .global __fixunssfsi
  1185. .type __fixunssfsi, @function
  1186. __fixunssfsi:
  1187. leaf_entry sp, 16
  1188. /* Check for NaN and Infinity. */
  1189. movi a6, 0x7f800000
  1190. ball a2, a6, .Lfixunssfsi_nan_or_inf
  1191. /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
  1192. extui a4, a2, 23, 8
  1193. addi a4, a4, -0x7f
  1194. bgei a4, 32, .Lfixunssfsi_maxint
  1195. bltz a4, .Lfixunssfsi_zero
  1196. /* Add explicit "1.0" and shift << 8. */
  1197. or a7, a2, a6
  1198. slli a5, a7, 8
  1199. /* Shift back to the right, based on the exponent. */
  1200. addi a4, a4, 1
  1201. beqi a4, 32, .Lfixunssfsi_bigexp
  1202. ssl a4 /* shift by 32 - a4 */
  1203. srl a5, a5
  1204. /* Negate the result if sign != 0. */
  1205. neg a2, a5
  1206. movgez a2, a5, a7
  1207. leaf_return
  1208. .Lfixunssfsi_nan_or_inf:
  1209. /* Handle Infinity and NaN. */
  1210. slli a4, a2, 9
  1211. beqz a4, .Lfixunssfsi_maxint
  1212. /* Translate NaN to 0xffffffff. */
  1213. movi a2, -1
  1214. leaf_return
  1215. .Lfixunssfsi_maxint:
  1216. slli a4, a6, 8 /* 0x80000000 */
  1217. movi a5, -1 /* 0xffffffff */
  1218. movgez a4, a5, a2
  1219. mov a2, a4
  1220. leaf_return
  1221. .Lfixunssfsi_zero:
  1222. movi a2, 0
  1223. leaf_return
  1224. .Lfixunssfsi_bigexp:
  1225. /* Handle unsigned maximum exponent case. */
  1226. bltz a2, 1f
  1227. mov a2, a5 /* no shift needed */
  1228. leaf_return
  1229. /* Return 0x80000000 if negative. */
  1230. 1: slli a2, a6, 8
  1231. leaf_return
  1232. #endif /* L_fixunssfsi */
  1233. #ifdef L_fixunssfdi
  1234. .align 4
  1235. .global __fixunssfdi
  1236. .type __fixunssfdi, @function
  1237. __fixunssfdi:
  1238. leaf_entry sp, 16
  1239. /* Check for NaN and Infinity. */
  1240. movi a6, 0x7f800000
  1241. ball a2, a6, .Lfixunssfdi_nan_or_inf
  1242. /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
  1243. extui a4, a2, 23, 8
  1244. addi a4, a4, -0x7f
  1245. bgei a4, 64, .Lfixunssfdi_maxint
  1246. bltz a4, .Lfixunssfdi_zero
  1247. /* Add explicit "1.0" and shift << 8. */
  1248. or a7, a2, a6
  1249. slli xh, a7, 8
  1250. /* Shift back to the right, based on the exponent. */
  1251. addi a4, a4, 1
  1252. beqi a4, 64, .Lfixunssfdi_bigexp
  1253. ssl a4 /* shift by 64 - a4 */
  1254. bgei a4, 32, .Lfixunssfdi_smallshift
  1255. srl xl, xh
  1256. movi xh, 0
  1257. .Lfixunssfdi_shifted:
  1258. /* Negate the result if sign != 0. */
  1259. bgez a7, 1f
  1260. neg xl, xl
  1261. neg xh, xh
  1262. beqz xl, 1f
  1263. addi xh, xh, -1
  1264. 1: leaf_return
  1265. .Lfixunssfdi_smallshift:
  1266. movi xl, 0
  1267. src xl, xh, xl
  1268. srl xh, xh
  1269. j .Lfixunssfdi_shifted
  1270. .Lfixunssfdi_nan_or_inf:
  1271. /* Handle Infinity and NaN. */
  1272. slli a4, a2, 9
  1273. beqz a4, .Lfixunssfdi_maxint
  1274. /* Translate NaN to 0xffffffff.... */
  1275. 1: movi xh, -1
  1276. movi xl, -1
  1277. leaf_return
  1278. .Lfixunssfdi_maxint:
  1279. bgez a2, 1b
  1280. 2: slli xh, a6, 8 /* 0x80000000 */
  1281. movi xl, 0
  1282. leaf_return
  1283. .Lfixunssfdi_zero:
  1284. movi xh, 0
  1285. movi xl, 0
  1286. leaf_return
  1287. .Lfixunssfdi_bigexp:
  1288. /* Handle unsigned maximum exponent case. */
  1289. bltz a7, 2b
  1290. movi xl, 0
  1291. leaf_return /* no shift needed */
  1292. #endif /* L_fixunssfdi */
  1293. #ifdef L_floatsisf
  1294. .align 4
  1295. .global __floatunsisf
  1296. .type __floatunsisf, @function
  1297. __floatunsisf:
  1298. leaf_entry sp, 16
  1299. beqz a2, .Lfloatsisf_return
  1300. /* Set the sign to zero and jump to the floatsisf code. */
  1301. movi a7, 0
  1302. j .Lfloatsisf_normalize
  1303. .align 4
  1304. .global __floatsisf
  1305. .type __floatsisf, @function
  1306. __floatsisf:
  1307. leaf_entry sp, 16
  1308. /* Check for zero. */
  1309. beqz a2, .Lfloatsisf_return
  1310. /* Save the sign. */
  1311. extui a7, a2, 31, 1
  1312. /* Get the absolute value. */
  1313. #if XCHAL_HAVE_ABS
  1314. abs a2, a2
  1315. #else
  1316. neg a4, a2
  1317. movltz a2, a4, a2
  1318. #endif
  1319. .Lfloatsisf_normalize:
  1320. /* Normalize with the first 1 bit in the msb. */
  1321. do_nsau a4, a2, a5, a6
  1322. ssl a4
  1323. sll a5, a2
  1324. /* Shift the mantissa into position, with rounding bits in a6. */
  1325. srli a2, a5, 8
  1326. slli a6, a5, (32 - 8)
  1327. /* Set the exponent. */
  1328. movi a5, 0x9d /* 0x7e + 31 */
  1329. sub a5, a5, a4
  1330. slli a5, a5, 23
  1331. add a2, a2, a5
  1332. /* Add the sign. */
  1333. slli a7, a7, 31
  1334. or a2, a2, a7
  1335. /* Round up if the leftover fraction is >= 1/2. */
  1336. bgez a6, .Lfloatsisf_return
  1337. addi a2, a2, 1 /* Overflow to the exponent is OK. */
  1338. /* Check if the leftover fraction is exactly 1/2. */
  1339. slli a6, a6, 1
  1340. beqz a6, .Lfloatsisf_exactlyhalf
  1341. .Lfloatsisf_return:
  1342. leaf_return
  1343. .Lfloatsisf_exactlyhalf:
  1344. /* Round down to the nearest even value. */
  1345. srli a2, a2, 1
  1346. slli a2, a2, 1
  1347. leaf_return
  1348. #endif /* L_floatsisf */
  1349. #ifdef L_floatdisf
  1350. .align 4
  1351. .global __floatundisf
  1352. .type __floatundisf, @function
  1353. __floatundisf:
  1354. leaf_entry sp, 16
  1355. /* Check for zero. */
  1356. or a4, xh, xl
  1357. beqz a4, 2f
  1358. /* Set the sign to zero and jump to the floatdisf code. */
  1359. movi a7, 0
  1360. j .Lfloatdisf_normalize
  1361. .align 4
  1362. .global __floatdisf
  1363. .type __floatdisf, @function
  1364. __floatdisf:
  1365. leaf_entry sp, 16
  1366. /* Check for zero. */
  1367. or a4, xh, xl
  1368. beqz a4, 2f
  1369. /* Save the sign. */
  1370. extui a7, xh, 31, 1
  1371. /* Get the absolute value. */
  1372. bgez xh, .Lfloatdisf_normalize
  1373. neg xl, xl
  1374. neg xh, xh
  1375. beqz xl, .Lfloatdisf_normalize
  1376. addi xh, xh, -1
  1377. .Lfloatdisf_normalize:
  1378. /* Normalize with the first 1 bit in the msb of xh. */
  1379. beqz xh, .Lfloatdisf_bigshift
  1380. do_nsau a4, xh, a5, a6
  1381. ssl a4
  1382. src xh, xh, xl
  1383. sll xl, xl
  1384. .Lfloatdisf_shifted:
  1385. /* Shift the mantissa into position, with rounding bits in a6. */
  1386. ssai 8
  1387. sll a5, xl
  1388. src a6, xh, xl
  1389. srl xh, xh
  1390. beqz a5, 1f
  1391. movi a5, 1
  1392. or a6, a6, a5
  1393. 1:
  1394. /* Set the exponent. */
  1395. movi a5, 0xbd /* 0x7e + 63 */
  1396. sub a5, a5, a4
  1397. slli a5, a5, 23
  1398. add a2, xh, a5
  1399. /* Add the sign. */
  1400. slli a7, a7, 31
  1401. or a2, a2, a7
  1402. /* Round up if the leftover fraction is >= 1/2. */
  1403. bgez a6, 2f
  1404. addi a2, a2, 1 /* Overflow to the exponent is OK. */
  1405. /* Check if the leftover fraction is exactly 1/2. */
  1406. slli a6, a6, 1
  1407. beqz a6, .Lfloatdisf_exactlyhalf
  1408. 2: leaf_return
  1409. .Lfloatdisf_bigshift:
  1410. /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
  1411. do_nsau a4, xl, a5, a6
  1412. ssl a4
  1413. sll xh, xl
  1414. movi xl, 0
  1415. addi a4, a4, 32
  1416. j .Lfloatdisf_shifted
  1417. .Lfloatdisf_exactlyhalf:
  1418. /* Round down to the nearest even value. */
  1419. srli a2, a2, 1
  1420. slli a2, a2, 1
  1421. leaf_return
  1422. #endif /* L_floatdisf */