PageRenderTime 112ms CodeModel.GetById 34ms RepoModel.GetById 1ms app.codeStats 1ms

/arch/m68k/ifpsp060/src/pfpsp.S

https://bitbucket.org/evzijst/gittest
Assembly | 14745 lines | 13552 code | 1193 blank | 0 comment | 323 complexity | 175cf69aa2f13a5f5a79a01ab9b41c33 MD5 | raw file
Possible License(s): CC-BY-SA-3.0, GPL-2.0, LGPL-2.0
  1. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  2. MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
  3. M68000 Hi-Performance Microprocessor Division
  4. M68060 Software Package
  5. Production Release P1.00 -- October 10, 1994
  6. M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
  7. THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  8. To the maximum extent permitted by applicable law,
  9. MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  10. INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
  11. and any warranty against infringement with regard to the SOFTWARE
  12. (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
  13. To the maximum extent permitted by applicable law,
  14. IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  15. (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  16. BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  17. ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  18. Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
  19. You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
  20. so long as this entire notice is retained without alteration in any modified and/or
  21. redistributed versions, and that such modified versions are clearly identified as such.
  22. No licenses are granted by implication, estoppel or otherwise under any patents
  23. or trademarks of Motorola, Inc.
  24. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  25. # freal.s:
  26. # This file is appended to the top of the 060FPSP package
  27. # and contains the entry points into the package. The user, in
  28. # effect, branches to one of the branch table entries located
  29. # after _060FPSP_TABLE.
  30. # Also, subroutine stubs exist in this file (_fpsp_done for
  31. # example) that are referenced by the FPSP package itself in order
  32. # to call a given routine. The stub routine actually performs the
  33. # callout. The FPSP code does a "bsr" to the stub routine. This
  34. # extra layer of hierarchy adds a slight performance penalty but
  35. # it makes the FPSP code easier to read and more mainatinable.
  36. #
  37. set _off_bsun, 0x00
  38. set _off_snan, 0x04
  39. set _off_operr, 0x08
  40. set _off_ovfl, 0x0c
  41. set _off_unfl, 0x10
  42. set _off_dz, 0x14
  43. set _off_inex, 0x18
  44. set _off_fline, 0x1c
  45. set _off_fpu_dis, 0x20
  46. set _off_trap, 0x24
  47. set _off_trace, 0x28
  48. set _off_access, 0x2c
  49. set _off_done, 0x30
  50. set _off_imr, 0x40
  51. set _off_dmr, 0x44
  52. set _off_dmw, 0x48
  53. set _off_irw, 0x4c
  54. set _off_irl, 0x50
  55. set _off_drb, 0x54
  56. set _off_drw, 0x58
  57. set _off_drl, 0x5c
  58. set _off_dwb, 0x60
  59. set _off_dww, 0x64
  60. set _off_dwl, 0x68
  61. _060FPSP_TABLE:
  62. ###############################################################
  63. # Here's the table of ENTRY POINTS for those linking the package.
  64. bra.l _fpsp_snan
  65. short 0x0000
  66. bra.l _fpsp_operr
  67. short 0x0000
  68. bra.l _fpsp_ovfl
  69. short 0x0000
  70. bra.l _fpsp_unfl
  71. short 0x0000
  72. bra.l _fpsp_dz
  73. short 0x0000
  74. bra.l _fpsp_inex
  75. short 0x0000
  76. bra.l _fpsp_fline
  77. short 0x0000
  78. bra.l _fpsp_unsupp
  79. short 0x0000
  80. bra.l _fpsp_effadd
  81. short 0x0000
  82. space 56
  83. ###############################################################
  84. global _fpsp_done
  85. _fpsp_done:
  86. mov.l %d0,-(%sp)
  87. mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
  88. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  89. mov.l 0x4(%sp),%d0
  90. rtd &0x4
  91. global _real_ovfl
  92. _real_ovfl:
  93. mov.l %d0,-(%sp)
  94. mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
  95. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  96. mov.l 0x4(%sp),%d0
  97. rtd &0x4
  98. global _real_unfl
  99. _real_unfl:
  100. mov.l %d0,-(%sp)
  101. mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
  102. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  103. mov.l 0x4(%sp),%d0
  104. rtd &0x4
  105. global _real_inex
  106. _real_inex:
  107. mov.l %d0,-(%sp)
  108. mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
  109. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  110. mov.l 0x4(%sp),%d0
  111. rtd &0x4
  112. global _real_bsun
  113. _real_bsun:
  114. mov.l %d0,-(%sp)
  115. mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
  116. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  117. mov.l 0x4(%sp),%d0
  118. rtd &0x4
  119. global _real_operr
  120. _real_operr:
  121. mov.l %d0,-(%sp)
  122. mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
  123. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  124. mov.l 0x4(%sp),%d0
  125. rtd &0x4
  126. global _real_snan
  127. _real_snan:
  128. mov.l %d0,-(%sp)
  129. mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
  130. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  131. mov.l 0x4(%sp),%d0
  132. rtd &0x4
  133. global _real_dz
  134. _real_dz:
  135. mov.l %d0,-(%sp)
  136. mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
  137. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  138. mov.l 0x4(%sp),%d0
  139. rtd &0x4
  140. global _real_fline
  141. _real_fline:
  142. mov.l %d0,-(%sp)
  143. mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
  144. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  145. mov.l 0x4(%sp),%d0
  146. rtd &0x4
  147. global _real_fpu_disabled
  148. _real_fpu_disabled:
  149. mov.l %d0,-(%sp)
  150. mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
  151. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  152. mov.l 0x4(%sp),%d0
  153. rtd &0x4
  154. global _real_trap
  155. _real_trap:
  156. mov.l %d0,-(%sp)
  157. mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
  158. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  159. mov.l 0x4(%sp),%d0
  160. rtd &0x4
  161. global _real_trace
  162. _real_trace:
  163. mov.l %d0,-(%sp)
  164. mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
  165. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  166. mov.l 0x4(%sp),%d0
  167. rtd &0x4
  168. global _real_access
  169. _real_access:
  170. mov.l %d0,-(%sp)
  171. mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
  172. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  173. mov.l 0x4(%sp),%d0
  174. rtd &0x4
  175. #######################################
  176. global _imem_read
  177. _imem_read:
  178. mov.l %d0,-(%sp)
  179. mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
  180. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  181. mov.l 0x4(%sp),%d0
  182. rtd &0x4
  183. global _dmem_read
  184. _dmem_read:
  185. mov.l %d0,-(%sp)
  186. mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
  187. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  188. mov.l 0x4(%sp),%d0
  189. rtd &0x4
  190. global _dmem_write
  191. _dmem_write:
  192. mov.l %d0,-(%sp)
  193. mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
  194. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  195. mov.l 0x4(%sp),%d0
  196. rtd &0x4
  197. global _imem_read_word
  198. _imem_read_word:
  199. mov.l %d0,-(%sp)
  200. mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
  201. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  202. mov.l 0x4(%sp),%d0
  203. rtd &0x4
  204. global _imem_read_long
  205. _imem_read_long:
  206. mov.l %d0,-(%sp)
  207. mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
  208. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  209. mov.l 0x4(%sp),%d0
  210. rtd &0x4
  211. global _dmem_read_byte
  212. _dmem_read_byte:
  213. mov.l %d0,-(%sp)
  214. mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
  215. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  216. mov.l 0x4(%sp),%d0
  217. rtd &0x4
  218. global _dmem_read_word
  219. _dmem_read_word:
  220. mov.l %d0,-(%sp)
  221. mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
  222. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  223. mov.l 0x4(%sp),%d0
  224. rtd &0x4
  225. global _dmem_read_long
  226. _dmem_read_long:
  227. mov.l %d0,-(%sp)
  228. mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
  229. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  230. mov.l 0x4(%sp),%d0
  231. rtd &0x4
  232. global _dmem_write_byte
  233. _dmem_write_byte:
  234. mov.l %d0,-(%sp)
  235. mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
  236. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  237. mov.l 0x4(%sp),%d0
  238. rtd &0x4
  239. global _dmem_write_word
  240. _dmem_write_word:
  241. mov.l %d0,-(%sp)
  242. mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
  243. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  244. mov.l 0x4(%sp),%d0
  245. rtd &0x4
  246. global _dmem_write_long
  247. _dmem_write_long:
  248. mov.l %d0,-(%sp)
  249. mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
  250. pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
  251. mov.l 0x4(%sp),%d0
  252. rtd &0x4
  253. #
  254. # This file contains a set of define statements for constants
  255. # in order to promote readability within the corecode itself.
  256. #
  257. set LOCAL_SIZE, 192 # stack frame size(bytes)
  258. set LV, -LOCAL_SIZE # stack offset
  259. set EXC_SR, 0x4 # stack status register
  260. set EXC_PC, 0x6 # stack pc
  261. set EXC_VOFF, 0xa # stacked vector offset
  262. set EXC_EA, 0xc # stacked <ea>
  263. set EXC_FP, 0x0 # frame pointer
  264. set EXC_AREGS, -68 # offset of all address regs
  265. set EXC_DREGS, -100 # offset of all data regs
  266. set EXC_FPREGS, -36 # offset of all fp regs
  267. set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
  268. set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
  269. set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
  270. set EXC_A5, EXC_AREGS+(5*4)
  271. set EXC_A4, EXC_AREGS+(4*4)
  272. set EXC_A3, EXC_AREGS+(3*4)
  273. set EXC_A2, EXC_AREGS+(2*4)
  274. set EXC_A1, EXC_AREGS+(1*4)
  275. set EXC_A0, EXC_AREGS+(0*4)
  276. set EXC_D7, EXC_DREGS+(7*4)
  277. set EXC_D6, EXC_DREGS+(6*4)
  278. set EXC_D5, EXC_DREGS+(5*4)
  279. set EXC_D4, EXC_DREGS+(4*4)
  280. set EXC_D3, EXC_DREGS+(3*4)
  281. set EXC_D2, EXC_DREGS+(2*4)
  282. set EXC_D1, EXC_DREGS+(1*4)
  283. set EXC_D0, EXC_DREGS+(0*4)
  284. set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
  285. set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
  286. set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
  287. set FP_SCR1, LV+80 # fp scratch 1
  288. set FP_SCR1_EX, FP_SCR1+0
  289. set FP_SCR1_SGN, FP_SCR1+2
  290. set FP_SCR1_HI, FP_SCR1+4
  291. set FP_SCR1_LO, FP_SCR1+8
  292. set FP_SCR0, LV+68 # fp scratch 0
  293. set FP_SCR0_EX, FP_SCR0+0
  294. set FP_SCR0_SGN, FP_SCR0+2
  295. set FP_SCR0_HI, FP_SCR0+4
  296. set FP_SCR0_LO, FP_SCR0+8
  297. set FP_DST, LV+56 # fp destination operand
  298. set FP_DST_EX, FP_DST+0
  299. set FP_DST_SGN, FP_DST+2
  300. set FP_DST_HI, FP_DST+4
  301. set FP_DST_LO, FP_DST+8
  302. set FP_SRC, LV+44 # fp source operand
  303. set FP_SRC_EX, FP_SRC+0
  304. set FP_SRC_SGN, FP_SRC+2
  305. set FP_SRC_HI, FP_SRC+4
  306. set FP_SRC_LO, FP_SRC+8
  307. set USER_FPIAR, LV+40 # FP instr address register
  308. set USER_FPSR, LV+36 # FP status register
  309. set FPSR_CC, USER_FPSR+0 # FPSR condition codes
  310. set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
  311. set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
  312. set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
  313. set USER_FPCR, LV+32 # FP control register
  314. set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
  315. set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
  316. set L_SCR3, LV+28 # integer scratch 3
  317. set L_SCR2, LV+24 # integer scratch 2
  318. set L_SCR1, LV+20 # integer scratch 1
  319. set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
  320. set EXC_TEMP2, LV+24 # temporary space
  321. set EXC_TEMP, LV+16 # temporary space
  322. set DTAG, LV+15 # destination operand type
  323. set STAG, LV+14 # source operand type
  324. set SPCOND_FLG, LV+10 # flag: special case (see below)
  325. set EXC_CC, LV+8 # saved condition codes
  326. set EXC_EXTWPTR, LV+4 # saved current PC (active)
  327. set EXC_EXTWORD, LV+2 # saved extension word
  328. set EXC_CMDREG, LV+2 # saved extension word
  329. set EXC_OPWORD, LV+0 # saved operation word
  330. ################################
  331. # Helpful macros
  332. set FTEMP, 0 # offsets within an
  333. set FTEMP_EX, 0 # extended precision
  334. set FTEMP_SGN, 2 # value saved in memory.
  335. set FTEMP_HI, 4
  336. set FTEMP_LO, 8
  337. set FTEMP_GRS, 12
  338. set LOCAL, 0 # offsets within an
  339. set LOCAL_EX, 0 # extended precision
  340. set LOCAL_SGN, 2 # value saved in memory.
  341. set LOCAL_HI, 4
  342. set LOCAL_LO, 8
  343. set LOCAL_GRS, 12
  344. set DST, 0 # offsets within an
  345. set DST_EX, 0 # extended precision
  346. set DST_HI, 4 # value saved in memory.
  347. set DST_LO, 8
  348. set SRC, 0 # offsets within an
  349. set SRC_EX, 0 # extended precision
  350. set SRC_HI, 4 # value saved in memory.
  351. set SRC_LO, 8
  352. set SGL_LO, 0x3f81 # min sgl prec exponent
  353. set SGL_HI, 0x407e # max sgl prec exponent
  354. set DBL_LO, 0x3c01 # min dbl prec exponent
  355. set DBL_HI, 0x43fe # max dbl prec exponent
  356. set EXT_LO, 0x0 # min ext prec exponent
  357. set EXT_HI, 0x7ffe # max ext prec exponent
  358. set EXT_BIAS, 0x3fff # extended precision bias
  359. set SGL_BIAS, 0x007f # single precision bias
  360. set DBL_BIAS, 0x03ff # double precision bias
  361. set NORM, 0x00 # operand type for STAG/DTAG
  362. set ZERO, 0x01 # operand type for STAG/DTAG
  363. set INF, 0x02 # operand type for STAG/DTAG
  364. set QNAN, 0x03 # operand type for STAG/DTAG
  365. set DENORM, 0x04 # operand type for STAG/DTAG
  366. set SNAN, 0x05 # operand type for STAG/DTAG
  367. set UNNORM, 0x06 # operand type for STAG/DTAG
  368. ##################
  369. # FPSR/FPCR bits #
  370. ##################
  371. set neg_bit, 0x3 # negative result
  372. set z_bit, 0x2 # zero result
  373. set inf_bit, 0x1 # infinite result
  374. set nan_bit, 0x0 # NAN result
  375. set q_sn_bit, 0x7 # sign bit of quotient byte
  376. set bsun_bit, 7 # branch on unordered
  377. set snan_bit, 6 # signalling NAN
  378. set operr_bit, 5 # operand error
  379. set ovfl_bit, 4 # overflow
  380. set unfl_bit, 3 # underflow
  381. set dz_bit, 2 # divide by zero
  382. set inex2_bit, 1 # inexact result 2
  383. set inex1_bit, 0 # inexact result 1
  384. set aiop_bit, 7 # accrued inexact operation bit
  385. set aovfl_bit, 6 # accrued overflow bit
  386. set aunfl_bit, 5 # accrued underflow bit
  387. set adz_bit, 4 # accrued dz bit
  388. set ainex_bit, 3 # accrued inexact bit
  389. #############################
  390. # FPSR individual bit masks #
  391. #############################
  392. set neg_mask, 0x08000000 # negative bit mask (lw)
  393. set inf_mask, 0x02000000 # infinity bit mask (lw)
  394. set z_mask, 0x04000000 # zero bit mask (lw)
  395. set nan_mask, 0x01000000 # nan bit mask (lw)
  396. set neg_bmask, 0x08 # negative bit mask (byte)
  397. set inf_bmask, 0x02 # infinity bit mask (byte)
  398. set z_bmask, 0x04 # zero bit mask (byte)
  399. set nan_bmask, 0x01 # nan bit mask (byte)
  400. set bsun_mask, 0x00008000 # bsun exception mask
  401. set snan_mask, 0x00004000 # snan exception mask
  402. set operr_mask, 0x00002000 # operr exception mask
  403. set ovfl_mask, 0x00001000 # overflow exception mask
  404. set unfl_mask, 0x00000800 # underflow exception mask
  405. set dz_mask, 0x00000400 # dz exception mask
  406. set inex2_mask, 0x00000200 # inex2 exception mask
  407. set inex1_mask, 0x00000100 # inex1 exception mask
  408. set aiop_mask, 0x00000080 # accrued illegal operation
  409. set aovfl_mask, 0x00000040 # accrued overflow
  410. set aunfl_mask, 0x00000020 # accrued underflow
  411. set adz_mask, 0x00000010 # accrued divide by zero
  412. set ainex_mask, 0x00000008 # accrued inexact
  413. ######################################
  414. # FPSR combinations used in the FPSP #
  415. ######################################
  416. set dzinf_mask, inf_mask+dz_mask+adz_mask
  417. set opnan_mask, nan_mask+operr_mask+aiop_mask
  418. set nzi_mask, 0x01ffffff #clears N, Z, and I
  419. set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
  420. set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
  421. set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
  422. set inx1a_mask, inex1_mask+ainex_mask
  423. set inx2a_mask, inex2_mask+ainex_mask
  424. set snaniop_mask, nan_mask+snan_mask+aiop_mask
  425. set snaniop2_mask, snan_mask+aiop_mask
  426. set naniop_mask, nan_mask+aiop_mask
  427. set neginf_mask, neg_mask+inf_mask
  428. set infaiop_mask, inf_mask+aiop_mask
  429. set negz_mask, neg_mask+z_mask
  430. set opaop_mask, operr_mask+aiop_mask
  431. set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
  432. set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
  433. #########
  434. # misc. #
  435. #########
  436. set rnd_stky_bit, 29 # stky bit pos in longword
  437. set sign_bit, 0x7 # sign bit
  438. set signan_bit, 0x6 # signalling nan bit
  439. set sgl_thresh, 0x3f81 # minimum sgl exponent
  440. set dbl_thresh, 0x3c01 # minimum dbl exponent
  441. set x_mode, 0x0 # extended precision
  442. set s_mode, 0x4 # single precision
  443. set d_mode, 0x8 # double precision
  444. set rn_mode, 0x0 # round-to-nearest
  445. set rz_mode, 0x1 # round-to-zero
  446. set rm_mode, 0x2 # round-tp-minus-infinity
  447. set rp_mode, 0x3 # round-to-plus-infinity
  448. set mantissalen, 64 # length of mantissa in bits
  449. set BYTE, 1 # len(byte) == 1 byte
  450. set WORD, 2 # len(word) == 2 bytes
  451. set LONG, 4 # len(longword) == 2 bytes
  452. set BSUN_VEC, 0xc0 # bsun vector offset
  453. set INEX_VEC, 0xc4 # inexact vector offset
  454. set DZ_VEC, 0xc8 # dz vector offset
  455. set UNFL_VEC, 0xcc # unfl vector offset
  456. set OPERR_VEC, 0xd0 # operr vector offset
  457. set OVFL_VEC, 0xd4 # ovfl vector offset
  458. set SNAN_VEC, 0xd8 # snan vector offset
  459. ###########################
  460. # SPecial CONDition FLaGs #
  461. ###########################
  462. set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
  463. set fbsun_flg, 0x02 # flag bit: bsun exception
  464. set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
  465. set mda7_flg, 0x08 # flag bit: -(a7) <ea>
  466. set fmovm_flg, 0x40 # flag bit: fmovm instruction
  467. set immed_flg, 0x80 # flag bit: &<data> <ea>
  468. set ftrapcc_bit, 0x0
  469. set fbsun_bit, 0x1
  470. set mia7_bit, 0x2
  471. set mda7_bit, 0x3
  472. set immed_bit, 0x7
  473. ##################################
  474. # TRANSCENDENTAL "LAST-OP" FLAGS #
  475. ##################################
  476. set FMUL_OP, 0x0 # fmul instr performed last
  477. set FDIV_OP, 0x1 # fdiv performed last
  478. set FADD_OP, 0x2 # fadd performed last
  479. set FMOV_OP, 0x3 # fmov performed last
  480. #############
  481. # CONSTANTS #
  482. #############
  483. T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
  484. T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
  485. PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
  486. PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
  487. TWOBYPI:
  488. long 0x3FE45F30,0x6DC9C883
  489. #########################################################################
  490. # XDEF **************************************************************** #
  491. # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
  492. # #
  493. # This handler should be the first code executed upon taking the #
  494. # FP Overflow exception in an operating system. #
  495. # #
  496. # XREF **************************************************************** #
  497. # _imem_read_long() - read instruction longword #
  498. # fix_skewed_ops() - adjust src operand in fsave frame #
  499. # set_tag_x() - determine optype of src/dst operands #
  500. # store_fpreg() - store opclass 0 or 2 result to FP regfile #
  501. # unnorm_fix() - change UNNORM operands to NORM or ZERO #
  502. # load_fpn2() - load dst operand from FP regfile #
  503. # fout() - emulate an opclass 3 instruction #
  504. # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
  505. # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
  506. # _real_ovfl() - "callout" for Overflow exception enabled code #
  507. # _real_inex() - "callout" for Inexact exception enabled code #
  508. # _real_trace() - "callout" for Trace exception code #
  509. # #
  510. # INPUT *************************************************************** #
  511. # - The system stack contains the FP Ovfl exception stack frame #
  512. # - The fsave frame contains the source operand #
  513. # #
  514. # OUTPUT ************************************************************** #
  515. # Overflow Exception enabled: #
  516. # - The system stack is unchanged #
  517. # - The fsave frame contains the adjusted src op for opclass 0,2 #
  518. # Overflow Exception disabled: #
  519. # - The system stack is unchanged #
  520. # - The "exception present" flag in the fsave frame is cleared #
  521. # #
  522. # ALGORITHM *********************************************************** #
  523. # On the 060, if an FP overflow is present as the result of any #
  524. # instruction, the 060 will take an overflow exception whether the #
  525. # exception is enabled or disabled in the FPCR. For the disabled case, #
  526. # This handler emulates the instruction to determine what the correct #
  527. # default result should be for the operation. This default result is #
  528. # then stored in either the FP regfile, data regfile, or memory. #
  529. # Finally, the handler exits through the "callout" _fpsp_done() #
  530. # denoting that no exceptional conditions exist within the machine. #
  531. # If the exception is enabled, then this handler must create the #
  532. # exceptional operand and plave it in the fsave state frame, and store #
  533. # the default result (only if the instruction is opclass 3). For #
  534. # exceptions enabled, this handler must exit through the "callout" #
  535. # _real_ovfl() so that the operating system enabled overflow handler #
  536. # can handle this case. #
  537. # Two other conditions exist. First, if overflow was disabled #
  538. # but the inexact exception was enabled, this handler must exit #
  539. # through the "callout" _real_inex() regardless of whether the result #
  540. # was inexact. #
  541. # Also, in the case of an opclass three instruction where #
  542. # overflow was disabled and the trace exception was enabled, this #
  543. # handler must exit through the "callout" _real_trace(). #
  544. # #
  545. #########################################################################
  546. global _fpsp_ovfl
  547. _fpsp_ovfl:
  548. #$# sub.l &24,%sp # make room for src/dst
  549. link.w %a6,&-LOCAL_SIZE # init stack frame
  550. fsave FP_SRC(%a6) # grab the "busy" frame
  551. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  552. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  553. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  554. # the FPIAR holds the "current PC" of the faulting instruction
  555. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  556. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  557. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  558. bsr.l _imem_read_long # fetch the instruction words
  559. mov.l %d0,EXC_OPWORD(%a6)
  560. ##############################################################################
  561. btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
  562. bne.w fovfl_out
  563. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  564. bsr.l fix_skewed_ops # fix src op
  565. # since, I believe, only NORMs and DENORMs can come through here,
  566. # maybe we can avoid the subroutine call.
  567. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  568. bsr.l set_tag_x # tag the operand type
  569. mov.b %d0,STAG(%a6) # maybe NORM,DENORM
  570. # bit five of the fp extension word separates the monadic and dyadic operations
  571. # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
  572. # will never take this exception.
  573. btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
  574. beq.b fovfl_extract # monadic
  575. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  576. bsr.l load_fpn2 # load dst into FP_DST
  577. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  578. bsr.l set_tag_x # tag the operand type
  579. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  580. bne.b fovfl_op2_done # no
  581. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  582. fovfl_op2_done:
  583. mov.b %d0,DTAG(%a6) # save dst optype tag
  584. fovfl_extract:
  585. #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
  586. #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
  587. #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
  588. #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
  589. #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
  590. #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
  591. clr.l %d0
  592. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
  593. mov.b 1+EXC_CMDREG(%a6),%d1
  594. andi.w &0x007f,%d1 # extract extension
  595. andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
  596. fmov.l &0x0,%fpcr # zero current control regs
  597. fmov.l &0x0,%fpsr
  598. lea FP_SRC(%a6),%a0
  599. lea FP_DST(%a6),%a1
  600. # maybe we can make these entry points ONLY the OVFL entry points of each routine.
  601. mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
  602. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  603. # the operation has been emulated. the result is in fp0.
  604. # the EXOP, if an exception occurred, is in fp1.
  605. # we must save the default result regardless of whether
  606. # traps are enabled or disabled.
  607. bfextu EXC_CMDREG(%a6){&6:&3},%d0
  608. bsr.l store_fpreg
  609. # the exceptional possibilities we have left ourselves with are ONLY overflow
  610. # and inexact. and, the inexact is such that overflow occurred and was disabled
  611. # but inexact was enabled.
  612. btst &ovfl_bit,FPCR_ENABLE(%a6)
  613. bne.b fovfl_ovfl_on
  614. btst &inex2_bit,FPCR_ENABLE(%a6)
  615. bne.b fovfl_inex_on
  616. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  617. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  618. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  619. unlk %a6
  620. #$# add.l &24,%sp
  621. bra.l _fpsp_done
  622. # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
  623. # in fp1. now, simply jump to _real_ovfl()!
  624. fovfl_ovfl_on:
  625. fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
  626. mov.w &0xe005,2+FP_SRC(%a6) # save exc status
  627. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  628. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  629. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  630. frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
  631. unlk %a6
  632. bra.l _real_ovfl
  633. # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
  634. # we must jump to real_inex().
  635. fovfl_inex_on:
  636. fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
  637. mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
  638. mov.w &0xe001,2+FP_SRC(%a6) # save exc status
  639. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  640. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  641. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  642. frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
  643. unlk %a6
  644. bra.l _real_inex
  645. ########################################################################
  646. fovfl_out:
  647. #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
  648. #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
  649. #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
  650. # the src operand is definitely a NORM(!), so tag it as such
  651. mov.b &NORM,STAG(%a6) # set src optype tag
  652. clr.l %d0
  653. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
  654. and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
  655. fmov.l &0x0,%fpcr # zero current control regs
  656. fmov.l &0x0,%fpsr
  657. lea FP_SRC(%a6),%a0 # pass ptr to src operand
  658. bsr.l fout
  659. btst &ovfl_bit,FPCR_ENABLE(%a6)
  660. bne.w fovfl_ovfl_on
  661. btst &inex2_bit,FPCR_ENABLE(%a6)
  662. bne.w fovfl_inex_on
  663. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  664. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  665. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  666. unlk %a6
  667. #$# add.l &24,%sp
  668. btst &0x7,(%sp) # is trace on?
  669. beq.l _fpsp_done # no
  670. fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
  671. mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
  672. bra.l _real_trace
  673. #########################################################################
  674. # XDEF **************************************************************** #
  675. # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
  676. # #
  677. # This handler should be the first code executed upon taking the #
  678. # FP Underflow exception in an operating system. #
  679. # #
  680. # XREF **************************************************************** #
  681. # _imem_read_long() - read instruction longword #
  682. # fix_skewed_ops() - adjust src operand in fsave frame #
  683. # set_tag_x() - determine optype of src/dst operands #
  684. # store_fpreg() - store opclass 0 or 2 result to FP regfile #
  685. # unnorm_fix() - change UNNORM operands to NORM or ZERO #
  686. # load_fpn2() - load dst operand from FP regfile #
  687. # fout() - emulate an opclass 3 instruction #
  688. # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
  689. # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
  690. # _real_ovfl() - "callout" for Overflow exception enabled code #
  691. # _real_inex() - "callout" for Inexact exception enabled code #
  692. # _real_trace() - "callout" for Trace exception code #
  693. # #
  694. # INPUT *************************************************************** #
  695. # - The system stack contains the FP Unfl exception stack frame #
  696. # - The fsave frame contains the source operand #
  697. # #
  698. # OUTPUT ************************************************************** #
  699. # Underflow Exception enabled: #
  700. # - The system stack is unchanged #
  701. # - The fsave frame contains the adjusted src op for opclass 0,2 #
  702. # Underflow Exception disabled: #
  703. # - The system stack is unchanged #
  704. # - The "exception present" flag in the fsave frame is cleared #
  705. # #
  706. # ALGORITHM *********************************************************** #
  707. # On the 060, if an FP underflow is present as the result of any #
  708. # instruction, the 060 will take an underflow exception whether the #
  709. # exception is enabled or disabled in the FPCR. For the disabled case, #
  710. # This handler emulates the instruction to determine what the correct #
  711. # default result should be for the operation. This default result is #
  712. # then stored in either the FP regfile, data regfile, or memory. #
  713. # Finally, the handler exits through the "callout" _fpsp_done() #
  714. # denoting that no exceptional conditions exist within the machine. #
  715. # If the exception is enabled, then this handler must create the #
  716. # exceptional operand and plave it in the fsave state frame, and store #
  717. # the default result (only if the instruction is opclass 3). For #
  718. # exceptions enabled, this handler must exit through the "callout" #
  719. # _real_unfl() so that the operating system enabled overflow handler #
  720. # can handle this case. #
  721. # Two other conditions exist. First, if underflow was disabled #
  722. # but the inexact exception was enabled and the result was inexact, #
  723. # this handler must exit through the "callout" _real_inex(). #
  724. # was inexact. #
  725. # Also, in the case of an opclass three instruction where #
  726. # underflow was disabled and the trace exception was enabled, this #
  727. # handler must exit through the "callout" _real_trace(). #
  728. # #
  729. #########################################################################
  730. global _fpsp_unfl
  731. _fpsp_unfl:
  732. #$# sub.l &24,%sp # make room for src/dst
  733. link.w %a6,&-LOCAL_SIZE # init stack frame
  734. fsave FP_SRC(%a6) # grab the "busy" frame
  735. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  736. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  737. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  738. # the FPIAR holds the "current PC" of the faulting instruction
  739. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  740. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  741. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  742. bsr.l _imem_read_long # fetch the instruction words
  743. mov.l %d0,EXC_OPWORD(%a6)
  744. ##############################################################################
  745. btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
  746. bne.w funfl_out
  747. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  748. bsr.l fix_skewed_ops # fix src op
  749. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  750. bsr.l set_tag_x # tag the operand type
  751. mov.b %d0,STAG(%a6) # maybe NORM,DENORM
  752. # bit five of the fp ext word separates the monadic and dyadic operations
  753. # that can pass through fpsp_unfl(). remember that fcmp, and ftst
  754. # will never take this exception.
  755. btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
  756. beq.b funfl_extract # monadic
  757. # now, what's left that's not dyadic is fsincos. we can distinguish it
  758. # from all dyadics by the '0110xxx pattern
  759. btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
  760. bne.b funfl_extract # yes
  761. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  762. bsr.l load_fpn2 # load dst into FP_DST
  763. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  764. bsr.l set_tag_x # tag the operand type
  765. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  766. bne.b funfl_op2_done # no
  767. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  768. funfl_op2_done:
  769. mov.b %d0,DTAG(%a6) # save dst optype tag
  770. funfl_extract:
  771. #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
  772. #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
  773. #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
  774. #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
  775. #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
  776. #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
  777. clr.l %d0
  778. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
  779. mov.b 1+EXC_CMDREG(%a6),%d1
  780. andi.w &0x007f,%d1 # extract extension
  781. andi.l &0x00ff01ff,USER_FPSR(%a6)
  782. fmov.l &0x0,%fpcr # zero current control regs
  783. fmov.l &0x0,%fpsr
  784. lea FP_SRC(%a6),%a0
  785. lea FP_DST(%a6),%a1
  786. # maybe we can make these entry points ONLY the OVFL entry points of each routine.
  787. mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
  788. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  789. bfextu EXC_CMDREG(%a6){&6:&3},%d0
  790. bsr.l store_fpreg
  791. # The `060 FPU multiplier hardware is such that if the result of a
  792. # multiply operation is the smallest possible normalized number
  793. # (0x00000000_80000000_00000000), then the machine will take an
  794. # underflow exception. Since this is incorrect, we need to check
  795. # if our emulation, after re-doing the operation, decided that
  796. # no underflow was called for. We do these checks only in
  797. # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
  798. # special case will simply exit gracefully with the correct result.
  799. # the exceptional possibilities we have left ourselves with are ONLY overflow
  800. # and inexact. and, the inexact is such that overflow occurred and was disabled
  801. # but inexact was enabled.
  802. btst &unfl_bit,FPCR_ENABLE(%a6)
  803. bne.b funfl_unfl_on
  804. funfl_chkinex:
  805. btst &inex2_bit,FPCR_ENABLE(%a6)
  806. bne.b funfl_inex_on
  807. funfl_exit:
  808. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  809. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  810. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  811. unlk %a6
  812. #$# add.l &24,%sp
  813. bra.l _fpsp_done
  814. # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
  815. # in fp1 (don't forget to save fp0). what to do now?
  816. # well, we simply have to get to go to _real_unfl()!
  817. funfl_unfl_on:
  818. # The `060 FPU multiplier hardware is such that if the result of a
  819. # multiply operation is the smallest possible normalized number
  820. # (0x00000000_80000000_00000000), then the machine will take an
  821. # underflow exception. Since this is incorrect, we check here to see
  822. # if our emulation, after re-doing the operation, decided that
  823. # no underflow was called for.
  824. btst &unfl_bit,FPSR_EXCEPT(%a6)
  825. beq.w funfl_chkinex
  826. funfl_unfl_on2:
  827. fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
  828. mov.w &0xe003,2+FP_SRC(%a6) # save exc status
  829. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  830. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  831. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  832. frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
  833. unlk %a6
  834. bra.l _real_unfl
  835. # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
  836. # we must jump to real_inex().
  837. funfl_inex_on:
  838. # The `060 FPU multiplier hardware is such that if the result of a
  839. # multiply operation is the smallest possible normalized number
  840. # (0x00000000_80000000_00000000), then the machine will take an
  841. # underflow exception.
  842. # But, whether bogus or not, if inexact is enabled AND it occurred,
  843. # then we have to branch to real_inex.
  844. btst &inex2_bit,FPSR_EXCEPT(%a6)
  845. beq.w funfl_exit
  846. funfl_inex_on2:
  847. fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
  848. mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
  849. mov.w &0xe001,2+FP_SRC(%a6) # save exc status
  850. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  851. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  852. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  853. frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
  854. unlk %a6
  855. bra.l _real_inex
  856. #######################################################################
  857. funfl_out:
  858. #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
  859. #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
  860. #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
  861. # the src operand is definitely a NORM(!), so tag it as such
  862. mov.b &NORM,STAG(%a6) # set src optype tag
  863. clr.l %d0
  864. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
  865. and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
  866. fmov.l &0x0,%fpcr # zero current control regs
  867. fmov.l &0x0,%fpsr
  868. lea FP_SRC(%a6),%a0 # pass ptr to src operand
  869. bsr.l fout
  870. btst &unfl_bit,FPCR_ENABLE(%a6)
  871. bne.w funfl_unfl_on2
  872. btst &inex2_bit,FPCR_ENABLE(%a6)
  873. bne.w funfl_inex_on2
  874. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  875. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  876. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  877. unlk %a6
  878. #$# add.l &24,%sp
  879. btst &0x7,(%sp) # is trace on?
  880. beq.l _fpsp_done # no
  881. fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
  882. mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
  883. bra.l _real_trace
  884. #########################################################################
  885. # XDEF **************************************************************** #
  886. # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
  887. # Data Type" exception. #
  888. # #
  889. # This handler should be the first code executed upon taking the #
  890. # FP Unimplemented Data Type exception in an operating system. #
  891. # #
  892. # XREF **************************************************************** #
  893. # _imem_read_{word,long}() - read instruction word/longword #
  894. # fix_skewed_ops() - adjust src operand in fsave frame #
  895. # set_tag_x() - determine optype of src/dst operands #
  896. # store_fpreg() - store opclass 0 or 2 result to FP regfile #
  897. # unnorm_fix() - change UNNORM operands to NORM or ZERO #
  898. # load_fpn2() - load dst operand from FP regfile #
  899. # load_fpn1() - load src operand from FP regfile #
  900. # fout() - emulate an opclass 3 instruction #
  901. # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
  902. # _real_inex() - "callout" to operating system inexact handler #
  903. # _fpsp_done() - "callout" for exit; work all done #
  904. # _real_trace() - "callout" for Trace enabled exception #
  905. # funimp_skew() - adjust fsave src ops to "incorrect" value #
  906. # _real_snan() - "callout" for SNAN exception #
  907. # _real_operr() - "callout" for OPERR exception #
  908. # _real_ovfl() - "callout" for OVFL exception #
  909. # _real_unfl() - "callout" for UNFL exception #
  910. # get_packed() - fetch packed operand from memory #
  911. # #
  912. # INPUT *************************************************************** #
  913. # - The system stack contains the "Unimp Data Type" stk frame #
  914. # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
  915. # #
  916. # OUTPUT ************************************************************** #
  917. # If Inexact exception (opclass 3): #
  918. # - The system stack is changed to an Inexact exception stk frame #
  919. # If SNAN exception (opclass 3): #
  920. # - The system stack is changed to an SNAN exception stk frame #
  921. # If OPERR exception (opclass 3): #
  922. # - The system stack is changed to an OPERR exception stk frame #
  923. # If OVFL exception (opclass 3): #
  924. # - The system stack is changed to an OVFL exception stk frame #
  925. # If UNFL exception (opclass 3): #
  926. # - The system stack is changed to an UNFL exception stack frame #
  927. # If Trace exception enabled: #
  928. # - The system stack is changed to a Trace exception stack frame #
  929. # Else: (normal case) #
  930. # - Correct result has been stored as appropriate #
  931. # #
  932. # ALGORITHM *********************************************************** #
  933. # Two main instruction types can enter here: (1) DENORM or UNNORM #
  934. # unimplemented data types. These can be either opclass 0,2 or 3 #
  935. # instructions, and (2) PACKED unimplemented data format instructions #
  936. # also of opclasses 0,2, or 3. #
  937. # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
  938. # operand from the fsave state frame and the dst operand (if dyadic) #
  939. # from the FP register file. The instruction is then emulated by #
  940. # choosing an emulation routine from a table of routines indexed by #
  941. # instruction type. Once the instruction has been emulated and result #
  942. # saved, then we check to see if any enabled exceptions resulted from #
  943. # instruction emulation. If none, then we exit through the "callout" #
  944. # _fpsp_done(). If there is an enabled FP exception, then we insert #
  945. # this exception into the FPU in the fsave state frame and then exit #
  946. # through _fpsp_done(). #
  947. # PACKED opclass 0 and 2 is similar in how the instruction is #
  948. # emulated and exceptions handled. The differences occur in how the #
  949. # handler loads the packed op (by calling get_packed() routine) and #
  950. # by the fact that a Trace exception could be pending for PACKED ops. #
  951. # If a Trace exception is pending, then the current exception stack #
  952. # frame is changed to a Trace exception stack frame and an exit is #
  953. # made through _real_trace(). #
  954. # For UNNORM/DENORM opclass 3, the actual move out to memory is #
  955. # performed by calling the routine fout(). If no exception should occur #
  956. # as the result of emulation, then an exit either occurs through #
  957. # _fpsp_done() or through _real_trace() if a Trace exception is pending #
  958. # (a Trace stack frame must be created here, too). If an FP exception #
  959. # should occur, then we must create an exception stack frame of that #
  960. # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
  961. # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
  962. # emulation is performed in a similar manner. #
  963. # #
  964. #########################################################################
  965. #
  966. # (1) DENORM and UNNORM (unimplemented) data types:
  967. #
  968. # post-instruction
  969. # *****************
  970. # * EA *
  971. # pre-instruction * *
  972. # ***************** *****************
  973. # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
  974. # ***************** *****************
  975. # * Next * * Next *
  976. # * PC * * PC *
  977. # ***************** *****************
  978. # * SR * * SR *
  979. # ***************** *****************
  980. #
  981. # (2) PACKED format (unsupported) opclasses two and three:
  982. # *****************
  983. # * EA *
  984. # * *
  985. # *****************
  986. # * 0x2 * 0x0dc *
  987. # *****************
  988. # * Next *
  989. # * PC *
  990. # *****************
  991. # * SR *
  992. # *****************
  993. #
  994. global _fpsp_unsupp
  995. _fpsp_unsupp:
  996. link.w %a6,&-LOCAL_SIZE # init stack frame
  997. fsave FP_SRC(%a6) # save fp state
  998. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  999. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  1000. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  1001. btst &0x5,EXC_SR(%a6) # user or supervisor mode?
  1002. bne.b fu_s
  1003. fu_u:
  1004. mov.l %usp,%a0 # fetch user stack pointer
  1005. mov.l %a0,EXC_A7(%a6) # save on stack
  1006. bra.b fu_cont
  1007. # if the exception is an opclass zero or two unimplemented data type
  1008. # exception, then the a7' calculated here is wrong since it doesn't
  1009. # stack an ea. however, we don't need an a7' for this case anyways.
  1010. fu_s:
  1011. lea 0x4+EXC_EA(%a6),%a0 # load old a7'
  1012. mov.l %a0,EXC_A7(%a6) # save on stack
  1013. fu_cont:
  1014. # the FPIAR holds the "current PC" of the faulting instruction
  1015. # the FPIAR should be set correctly for ALL exceptions passing through
  1016. # this point.
  1017. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  1018. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  1019. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  1020. bsr.l _imem_read_long # fetch the instruction words
  1021. mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
  1022. ############################
  1023. clr.b SPCOND_FLG(%a6) # clear special condition flag
  1024. # Separate opclass three (fpn-to-mem) ops since they have a different
  1025. # stack frame and protocol.
  1026. btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
  1027. bne.w fu_out # yes
  1028. # Separate packed opclass two instructions.
  1029. bfextu EXC_CMDREG(%a6){&0:&6},%d0
  1030. cmpi.b %d0,&0x13
  1031. beq.w fu_in_pack
  1032. # I'm not sure at this point what FPSR bits are valid for this instruction.
  1033. # so, since the emulation routines re-create them anyways, zero exception field
  1034. andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
  1035. fmov.l &0x0,%fpcr # zero current control regs
  1036. fmov.l &0x0,%fpsr
  1037. # Opclass two w/ memory-to-fpn operation will have an incorrect extended
  1038. # precision format if the src format was single or double and the
  1039. # source data type was an INF, NAN, DENORM, or UNNORM
  1040. lea FP_SRC(%a6),%a0 # pass ptr to input
  1041. bsr.l fix_skewed_ops
  1042. # we don't know whether the src operand or the dst operand (or both) is the
  1043. # UNNORM or DENORM. call the function that tags the operand type. if the
  1044. # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
  1045. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  1046. bsr.l set_tag_x # tag the operand type
  1047. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  1048. bne.b fu_op2 # no
  1049. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  1050. fu_op2:
  1051. mov.b %d0,STAG(%a6) # save src optype tag
  1052. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  1053. # bit five of the fp extension word separates the monadic and dyadic operations
  1054. # at this point
  1055. btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
  1056. beq.b fu_extract # monadic
  1057. cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
  1058. beq.b fu_extract # yes, so it's monadic, too
  1059. bsr.l load_fpn2 # load dst into FP_DST
  1060. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  1061. bsr.l set_tag_x # tag the operand type
  1062. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  1063. bne.b fu_op2_done # no
  1064. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  1065. fu_op2_done:
  1066. mov.b %d0,DTAG(%a6) # save dst optype tag
  1067. fu_extract:
  1068. clr.l %d0
  1069. mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
  1070. bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
  1071. lea FP_SRC(%a6),%a0
  1072. lea FP_DST(%a6),%a1
  1073. mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
  1074. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  1075. #
  1076. # Exceptions in order of precedence:
  1077. # BSUN : none
  1078. # SNAN : all dyadic ops
  1079. # OPERR : fsqrt(-NORM)
  1080. # OVFL : all except ftst,fcmp
  1081. # UNFL : all except ftst,fcmp
  1082. # DZ : fdiv
  1083. # INEX2 : all except ftst,fcmp
  1084. # INEX1 : none (packed doesn't go through here)
  1085. #
  1086. # we determine the highest priority exception(if any) set by the
  1087. # emulation routine that has also been enabled by the user.
  1088. mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
  1089. bne.b fu_in_ena # some are enabled
  1090. fu_in_cont:
  1091. # fcmp and ftst do not store any result.
  1092. mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
  1093. andi.b &0x38,%d0 # extract bits 3-5
  1094. cmpi.b %d0,&0x38 # is instr fcmp or ftst?
  1095. beq.b fu_in_exit # yes
  1096. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  1097. bsr.l store_fpreg # store the result
  1098. fu_in_exit:
  1099. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1100. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1101. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1102. unlk %a6
  1103. bra.l _fpsp_done
  1104. fu_in_ena:
  1105. and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
  1106. bfffo %d0{&24:&8},%d0 # find highest priority exception
  1107. bne.b fu_in_exc # there is at least one set
  1108. #
  1109. # No exceptions occurred that were also enabled. Now:
  1110. #
  1111. # if (OVFL && ovfl_disabled && inexact_enabled) {
  1112. # branch to _real_inex() (even if the result was exact!);
  1113. # } else {
  1114. # save the result in the proper fp reg (unless the op is fcmp or ftst);
  1115. # return;
  1116. # }
  1117. #
  1118. btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
  1119. beq.b fu_in_cont # no
  1120. fu_in_ovflchk:
  1121. btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
  1122. beq.b fu_in_cont # no
  1123. bra.w fu_in_exc_ovfl # go insert overflow frame
  1124. #
  1125. # An exception occurred and that exception was enabled:
  1126. #
  1127. # shift enabled exception field into lo byte of d0;
  1128. # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
  1129. # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
  1130. # /*
  1131. # * this is the case where we must call _real_inex() now or else
  1132. # * there will be no other way to pass it the exceptional operand
  1133. # */
  1134. # call _real_inex();
  1135. # } else {
  1136. # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
  1137. # }
  1138. #
  1139. fu_in_exc:
  1140. subi.l &24,%d0 # fix offset to be 0-8
  1141. cmpi.b %d0,&0x6 # is exception INEX? (6)
  1142. bne.b fu_in_exc_exit # no
  1143. # the enabled exception was inexact
  1144. btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
  1145. bne.w fu_in_exc_unfl # yes
  1146. btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
  1147. bne.w fu_in_exc_ovfl # yes
  1148. # here, we insert the correct fsave status value into the fsave frame for the
  1149. # corresponding exception. the operand in the fsave frame should be the original
  1150. # src operand.
  1151. fu_in_exc_exit:
  1152. mov.l %d0,-(%sp) # save d0
  1153. bsr.l funimp_skew # skew sgl or dbl inputs
  1154. mov.l (%sp)+,%d0 # restore d0
  1155. mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
  1156. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1157. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1158. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1159. frestore FP_SRC(%a6) # restore src op
  1160. unlk %a6
  1161. bra.l _fpsp_done
  1162. tbl_except:
  1163. short 0xe000,0xe006,0xe004,0xe005
  1164. short 0xe003,0xe002,0xe001,0xe001
  1165. fu_in_exc_unfl:
  1166. mov.w &0x4,%d0
  1167. bra.b fu_in_exc_exit
  1168. fu_in_exc_ovfl:
  1169. mov.w &0x03,%d0
  1170. bra.b fu_in_exc_exit
  1171. # If the input operand to this operation was opclass two and a single
  1172. # or double precision denorm, inf, or nan, the operand needs to be
  1173. # "corrected" in order to have the proper equivalent extended precision
  1174. # number.
  1175. global fix_skewed_ops
  1176. fix_skewed_ops:
  1177. bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
  1178. cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
  1179. beq.b fso_sgl # yes
  1180. cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
  1181. beq.b fso_dbl # yes
  1182. rts # no
  1183. fso_sgl:
  1184. mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
  1185. andi.w &0x7fff,%d0 # strip sign
  1186. cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
  1187. beq.b fso_sgl_dnrm_zero # yes
  1188. cmpi.w %d0,&0x407f # no; is |exp| == $407f?
  1189. beq.b fso_infnan # yes
  1190. rts # no
  1191. fso_sgl_dnrm_zero:
  1192. andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
  1193. beq.b fso_zero # it's a skewed zero
  1194. fso_sgl_dnrm:
  1195. # here, we count on norm not to alter a0...
  1196. bsr.l norm # normalize mantissa
  1197. neg.w %d0 # -shft amt
  1198. addi.w &0x3f81,%d0 # adjust new exponent
  1199. andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
  1200. or.w %d0,LOCAL_EX(%a0) # insert new exponent
  1201. rts
  1202. fso_zero:
  1203. andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
  1204. rts
  1205. fso_infnan:
  1206. andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
  1207. ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
  1208. rts
  1209. fso_dbl:
  1210. mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
  1211. andi.w &0x7fff,%d0 # strip sign
  1212. cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
  1213. beq.b fso_dbl_dnrm_zero # yes
  1214. cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
  1215. beq.b fso_infnan # yes
  1216. rts # no
  1217. fso_dbl_dnrm_zero:
  1218. andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
  1219. bne.b fso_dbl_dnrm # it's a skewed denorm
  1220. tst.l LOCAL_LO(%a0) # is it a zero?
  1221. beq.b fso_zero # yes
  1222. fso_dbl_dnrm:
  1223. # here, we count on norm not to alter a0...
  1224. bsr.l norm # normalize mantissa
  1225. neg.w %d0 # -shft amt
  1226. addi.w &0x3c01,%d0 # adjust new exponent
  1227. andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
  1228. or.w %d0,LOCAL_EX(%a0) # insert new exponent
  1229. rts
  1230. #################################################################
  1231. # fmove out took an unimplemented data type exception.
  1232. # the src operand is in FP_SRC. Call _fout() to write out the result and
  1233. # to determine which exceptions, if any, to take.
  1234. fu_out:
  1235. # Separate packed move outs from the UNNORM and DENORM move outs.
  1236. bfextu EXC_CMDREG(%a6){&3:&3},%d0
  1237. cmpi.b %d0,&0x3
  1238. beq.w fu_out_pack
  1239. cmpi.b %d0,&0x7
  1240. beq.w fu_out_pack
  1241. # I'm not sure at this point what FPSR bits are valid for this instruction.
  1242. # so, since the emulation routines re-create them anyways, zero exception field.
  1243. # fmove out doesn't affect ccodes.
  1244. and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
  1245. fmov.l &0x0,%fpcr # zero current control regs
  1246. fmov.l &0x0,%fpsr
  1247. # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
  1248. # call here. just figure out what it is...
  1249. mov.w FP_SRC_EX(%a6),%d0 # get exponent
  1250. andi.w &0x7fff,%d0 # strip sign
  1251. beq.b fu_out_denorm # it's a DENORM
  1252. lea FP_SRC(%a6),%a0
  1253. bsr.l unnorm_fix # yes; fix it
  1254. mov.b %d0,STAG(%a6)
  1255. bra.b fu_out_cont
  1256. fu_out_denorm:
  1257. mov.b &DENORM,STAG(%a6)
  1258. fu_out_cont:
  1259. clr.l %d0
  1260. mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
  1261. lea FP_SRC(%a6),%a0 # pass ptr to src operand
  1262. mov.l (%a6),EXC_A6(%a6) # in case a6 changes
  1263. bsr.l fout # call fmove out routine
  1264. # Exceptions in order of precedence:
  1265. # BSUN : none
  1266. # SNAN : none
  1267. # OPERR : fmove.{b,w,l} out of large UNNORM
  1268. # OVFL : fmove.{s,d}
  1269. # UNFL : fmove.{s,d,x}
  1270. # DZ : none
  1271. # INEX2 : all
  1272. # INEX1 : none (packed doesn't travel through here)
  1273. # determine the highest priority exception(if any) set by the
  1274. # emulation routine that has also been enabled by the user.
  1275. mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
  1276. bne.w fu_out_ena # some are enabled
  1277. fu_out_done:
  1278. mov.l EXC_A6(%a6),(%a6) # in case a6 changed
  1279. # on extended precision opclass three instructions using pre-decrement or
  1280. # post-increment addressing mode, the address register is not updated. is the
  1281. # address register was the stack pointer used from user mode, then let's update
  1282. # it here. if it was used from supervisor mode, then we have to handle this
  1283. # as a special case.
  1284. btst &0x5,EXC_SR(%a6)
  1285. bne.b fu_out_done_s
  1286. mov.l EXC_A7(%a6),%a0 # restore a7
  1287. mov.l %a0,%usp
  1288. fu_out_done_cont:
  1289. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1290. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1291. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1292. unlk %a6
  1293. btst &0x7,(%sp) # is trace on?
  1294. bne.b fu_out_trace # yes
  1295. bra.l _fpsp_done
  1296. # is the ea mode pre-decrement of the stack pointer from supervisor mode?
  1297. # ("fmov.x fpm,-(a7)") if so,
  1298. fu_out_done_s:
  1299. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  1300. bne.b fu_out_done_cont
  1301. # the extended precision result is still in fp0. but, we need to save it
  1302. # somewhere on the stack until we can copy it to its final resting place.
  1303. # here, we're counting on the top of the stack to be the old place-holders
  1304. # for fp0/fp1 which have already been restored. that way, we can write
  1305. # over those destinations with the shifted stack frame.
  1306. fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
  1307. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1308. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1309. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1310. mov.l (%a6),%a6 # restore frame pointer
  1311. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1312. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1313. # now, copy the result to the proper place on the stack
  1314. mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
  1315. mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
  1316. mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
  1317. add.l &LOCAL_SIZE-0x8,%sp
  1318. btst &0x7,(%sp)
  1319. bne.b fu_out_trace
  1320. bra.l _fpsp_done
  1321. fu_out_ena:
  1322. and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
  1323. bfffo %d0{&24:&8},%d0 # find highest priority exception
  1324. bne.b fu_out_exc # there is at least one set
  1325. # no exceptions were set.
  1326. # if a disabled overflow occurred and inexact was enabled but the result
  1327. # was exact, then a branch to _real_inex() is made.
  1328. btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
  1329. beq.w fu_out_done # no
  1330. fu_out_ovflchk:
  1331. btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
  1332. beq.w fu_out_done # no
  1333. bra.w fu_inex # yes
  1334. #
  1335. # The fp move out that took the "Unimplemented Data Type" exception was
  1336. # being traced. Since the stack frames are similar, get the "current" PC
  1337. # from FPIAR and put it in the trace stack frame then jump to _real_trace().
  1338. #
  1339. # UNSUPP FRAME TRACE FRAME
  1340. # ***************** *****************
  1341. # * EA * * Current *
  1342. # * * * PC *
  1343. # ***************** *****************
  1344. # * 0x3 * 0x0dc * * 0x2 * 0x024 *
  1345. # ***************** *****************
  1346. # * Next * * Next *
  1347. # * PC * * PC *
  1348. # ***************** *****************
  1349. # * SR * * SR *
  1350. # ***************** *****************
  1351. #
  1352. fu_out_trace:
  1353. mov.w &0x2024,0x6(%sp)
  1354. fmov.l %fpiar,0x8(%sp)
  1355. bra.l _real_trace
  1356. # an exception occurred and that exception was enabled.
  1357. fu_out_exc:
  1358. subi.l &24,%d0 # fix offset to be 0-8
  1359. # we don't mess with the existing fsave frame. just re-insert it and
  1360. # jump to the "_real_{}()" handler...
  1361. mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
  1362. jmp (tbl_fu_out.b,%pc,%d0.w*1)
  1363. swbeg &0x8
  1364. tbl_fu_out:
  1365. short tbl_fu_out - tbl_fu_out # BSUN can't happen
  1366. short tbl_fu_out - tbl_fu_out # SNAN can't happen
  1367. short fu_operr - tbl_fu_out # OPERR
  1368. short fu_ovfl - tbl_fu_out # OVFL
  1369. short fu_unfl - tbl_fu_out # UNFL
  1370. short tbl_fu_out - tbl_fu_out # DZ can't happen
  1371. short fu_inex - tbl_fu_out # INEX2
  1372. short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
  1373. # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
  1374. # frestore it.
  1375. fu_snan:
  1376. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1377. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1378. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1379. mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
  1380. mov.w &0xe006,2+FP_SRC(%a6)
  1381. frestore FP_SRC(%a6)
  1382. unlk %a6
  1383. bra.l _real_snan
  1384. fu_operr:
  1385. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1386. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1387. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1388. mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
  1389. mov.w &0xe004,2+FP_SRC(%a6)
  1390. frestore FP_SRC(%a6)
  1391. unlk %a6
  1392. bra.l _real_operr
  1393. fu_ovfl:
  1394. fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
  1395. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1396. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1397. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1398. mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
  1399. mov.w &0xe005,2+FP_SRC(%a6)
  1400. frestore FP_SRC(%a6) # restore EXOP
  1401. unlk %a6
  1402. bra.l _real_ovfl
  1403. # underflow can happen for extended precision. extended precision opclass
  1404. # three instruction exceptions don't update the stack pointer. so, if the
  1405. # exception occurred from user mode, then simply update a7 and exit normally.
  1406. # if the exception occurred from supervisor mode, check if
  1407. fu_unfl:
  1408. mov.l EXC_A6(%a6),(%a6) # restore a6
  1409. btst &0x5,EXC_SR(%a6)
  1410. bne.w fu_unfl_s
  1411. mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
  1412. mov.l %a0,%usp # to or not...
  1413. fu_unfl_cont:
  1414. fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
  1415. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1416. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1417. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1418. mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
  1419. mov.w &0xe003,2+FP_SRC(%a6)
  1420. frestore FP_SRC(%a6) # restore EXOP
  1421. unlk %a6
  1422. bra.l _real_unfl
  1423. fu_unfl_s:
  1424. cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
  1425. bne.b fu_unfl_cont
  1426. # the extended precision result is still in fp0. but, we need to save it
  1427. # somewhere on the stack until we can copy it to its final resting place
  1428. # (where the exc frame is currently). make sure it's not at the top of the
  1429. # frame or it will get overwritten when the exc stack frame is shifted "down".
  1430. fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
  1431. fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
  1432. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1433. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1434. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1435. mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
  1436. mov.w &0xe003,2+FP_DST(%a6)
  1437. frestore FP_DST(%a6) # restore EXOP
  1438. mov.l (%a6),%a6 # restore frame pointer
  1439. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1440. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1441. mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
  1442. # now, copy the result to the proper place on the stack
  1443. mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
  1444. mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
  1445. mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
  1446. add.l &LOCAL_SIZE-0x8,%sp
  1447. bra.l _real_unfl
  1448. # fmove in and out enter here.
  1449. fu_inex:
  1450. fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
  1451. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1452. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1453. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1454. mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
  1455. mov.w &0xe001,2+FP_SRC(%a6)
  1456. frestore FP_SRC(%a6) # restore EXOP
  1457. unlk %a6
  1458. bra.l _real_inex
  1459. #########################################################################
  1460. #########################################################################
  1461. fu_in_pack:
  1462. # I'm not sure at this point what FPSR bits are valid for this instruction.
  1463. # so, since the emulation routines re-create them anyways, zero exception field
  1464. andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
  1465. fmov.l &0x0,%fpcr # zero current control regs
  1466. fmov.l &0x0,%fpsr
  1467. bsr.l get_packed # fetch packed src operand
  1468. lea FP_SRC(%a6),%a0 # pass ptr to src
  1469. bsr.l set_tag_x # set src optype tag
  1470. mov.b %d0,STAG(%a6) # save src optype tag
  1471. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  1472. # bit five of the fp extension word separates the monadic and dyadic operations
  1473. # at this point
  1474. btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
  1475. beq.b fu_extract_p # monadic
  1476. cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
  1477. beq.b fu_extract_p # yes, so it's monadic, too
  1478. bsr.l load_fpn2 # load dst into FP_DST
  1479. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  1480. bsr.l set_tag_x # tag the operand type
  1481. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  1482. bne.b fu_op2_done_p # no
  1483. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  1484. fu_op2_done_p:
  1485. mov.b %d0,DTAG(%a6) # save dst optype tag
  1486. fu_extract_p:
  1487. clr.l %d0
  1488. mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
  1489. bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
  1490. lea FP_SRC(%a6),%a0
  1491. lea FP_DST(%a6),%a1
  1492. mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
  1493. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  1494. #
  1495. # Exceptions in order of precedence:
  1496. # BSUN : none
  1497. # SNAN : all dyadic ops
  1498. # OPERR : fsqrt(-NORM)
  1499. # OVFL : all except ftst,fcmp
  1500. # UNFL : all except ftst,fcmp
  1501. # DZ : fdiv
  1502. # INEX2 : all except ftst,fcmp
  1503. # INEX1 : all
  1504. #
  1505. # we determine the highest priority exception(if any) set by the
  1506. # emulation routine that has also been enabled by the user.
  1507. mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
  1508. bne.w fu_in_ena_p # some are enabled
  1509. fu_in_cont_p:
  1510. # fcmp and ftst do not store any result.
  1511. mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
  1512. andi.b &0x38,%d0 # extract bits 3-5
  1513. cmpi.b %d0,&0x38 # is instr fcmp or ftst?
  1514. beq.b fu_in_exit_p # yes
  1515. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  1516. bsr.l store_fpreg # store the result
  1517. fu_in_exit_p:
  1518. btst &0x5,EXC_SR(%a6) # user or supervisor?
  1519. bne.w fu_in_exit_s_p # supervisor
  1520. mov.l EXC_A7(%a6),%a0 # update user a7
  1521. mov.l %a0,%usp
  1522. fu_in_exit_cont_p:
  1523. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1524. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1525. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1526. unlk %a6 # unravel stack frame
  1527. btst &0x7,(%sp) # is trace on?
  1528. bne.w fu_trace_p # yes
  1529. bra.l _fpsp_done # exit to os
  1530. # the exception occurred in supervisor mode. check to see if the
  1531. # addressing mode was (a7)+. if so, we'll need to shift the
  1532. # stack frame "up".
  1533. fu_in_exit_s_p:
  1534. btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
  1535. beq.b fu_in_exit_cont_p # no
  1536. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1537. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1538. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1539. unlk %a6 # unravel stack frame
  1540. # shift the stack frame "up". we don't really care about the <ea> field.
  1541. mov.l 0x4(%sp),0x10(%sp)
  1542. mov.l 0x0(%sp),0xc(%sp)
  1543. add.l &0xc,%sp
  1544. btst &0x7,(%sp) # is trace on?
  1545. bne.w fu_trace_p # yes
  1546. bra.l _fpsp_done # exit to os
  1547. fu_in_ena_p:
  1548. and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
  1549. bfffo %d0{&24:&8},%d0 # find highest priority exception
  1550. bne.b fu_in_exc_p # at least one was set
  1551. #
  1552. # No exceptions occurred that were also enabled. Now:
  1553. #
  1554. # if (OVFL && ovfl_disabled && inexact_enabled) {
  1555. # branch to _real_inex() (even if the result was exact!);
  1556. # } else {
  1557. # save the result in the proper fp reg (unless the op is fcmp or ftst);
  1558. # return;
  1559. # }
  1560. #
  1561. btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
  1562. beq.w fu_in_cont_p # no
  1563. fu_in_ovflchk_p:
  1564. btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
  1565. beq.w fu_in_cont_p # no
  1566. bra.w fu_in_exc_ovfl_p # do _real_inex() now
  1567. #
  1568. # An exception occurred and that exception was enabled:
  1569. #
  1570. # shift enabled exception field into lo byte of d0;
  1571. # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
  1572. # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
  1573. # /*
  1574. # * this is the case where we must call _real_inex() now or else
  1575. # * there will be no other way to pass it the exceptional operand
  1576. # */
  1577. # call _real_inex();
  1578. # } else {
  1579. # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
  1580. # }
  1581. #
  1582. fu_in_exc_p:
  1583. subi.l &24,%d0 # fix offset to be 0-8
  1584. cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
  1585. blt.b fu_in_exc_exit_p # no
  1586. # the enabled exception was inexact
  1587. btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
  1588. bne.w fu_in_exc_unfl_p # yes
  1589. btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
  1590. bne.w fu_in_exc_ovfl_p # yes
  1591. # here, we insert the correct fsave status value into the fsave frame for the
  1592. # corresponding exception. the operand in the fsave frame should be the original
  1593. # src operand.
  1594. # as a reminder for future predicted pain and agony, we are passing in fsave the
  1595. # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
  1596. # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
  1597. fu_in_exc_exit_p:
  1598. btst &0x5,EXC_SR(%a6) # user or supervisor?
  1599. bne.w fu_in_exc_exit_s_p # supervisor
  1600. mov.l EXC_A7(%a6),%a0 # update user a7
  1601. mov.l %a0,%usp
  1602. fu_in_exc_exit_cont_p:
  1603. mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
  1604. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1605. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1606. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1607. frestore FP_SRC(%a6) # restore src op
  1608. unlk %a6
  1609. btst &0x7,(%sp) # is trace enabled?
  1610. bne.w fu_trace_p # yes
  1611. bra.l _fpsp_done
  1612. tbl_except_p:
  1613. short 0xe000,0xe006,0xe004,0xe005
  1614. short 0xe003,0xe002,0xe001,0xe001
  1615. fu_in_exc_ovfl_p:
  1616. mov.w &0x3,%d0
  1617. bra.w fu_in_exc_exit_p
  1618. fu_in_exc_unfl_p:
  1619. mov.w &0x4,%d0
  1620. bra.w fu_in_exc_exit_p
  1621. fu_in_exc_exit_s_p:
  1622. btst &mia7_bit,SPCOND_FLG(%a6)
  1623. beq.b fu_in_exc_exit_cont_p
  1624. mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
  1625. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1626. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1627. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1628. frestore FP_SRC(%a6) # restore src op
  1629. unlk %a6 # unravel stack frame
  1630. # shift stack frame "up". who cares about <ea> field.
  1631. mov.l 0x4(%sp),0x10(%sp)
  1632. mov.l 0x0(%sp),0xc(%sp)
  1633. add.l &0xc,%sp
  1634. btst &0x7,(%sp) # is trace on?
  1635. bne.b fu_trace_p # yes
  1636. bra.l _fpsp_done # exit to os
  1637. #
  1638. # The opclass two PACKED instruction that took an "Unimplemented Data Type"
  1639. # exception was being traced. Make the "current" PC the FPIAR and put it in the
  1640. # trace stack frame then jump to _real_trace().
  1641. #
  1642. # UNSUPP FRAME TRACE FRAME
  1643. # ***************** *****************
  1644. # * EA * * Current *
  1645. # * * * PC *
  1646. # ***************** *****************
  1647. # * 0x2 * 0x0dc * * 0x2 * 0x024 *
  1648. # ***************** *****************
  1649. # * Next * * Next *
  1650. # * PC * * PC *
  1651. # ***************** *****************
  1652. # * SR * * SR *
  1653. # ***************** *****************
  1654. fu_trace_p:
  1655. mov.w &0x2024,0x6(%sp)
  1656. fmov.l %fpiar,0x8(%sp)
  1657. bra.l _real_trace
  1658. #########################################################
  1659. #########################################################
  1660. fu_out_pack:
  1661. # I'm not sure at this point what FPSR bits are valid for this instruction.
  1662. # so, since the emulation routines re-create them anyways, zero exception field.
  1663. # fmove out doesn't affect ccodes.
  1664. and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
  1665. fmov.l &0x0,%fpcr # zero current control regs
  1666. fmov.l &0x0,%fpsr
  1667. bfextu EXC_CMDREG(%a6){&6:&3},%d0
  1668. bsr.l load_fpn1
  1669. # unlike other opclass 3, unimplemented data type exceptions, packed must be
  1670. # able to detect all operand types.
  1671. lea FP_SRC(%a6),%a0
  1672. bsr.l set_tag_x # tag the operand type
  1673. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  1674. bne.b fu_op2_p # no
  1675. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  1676. fu_op2_p:
  1677. mov.b %d0,STAG(%a6) # save src optype tag
  1678. clr.l %d0
  1679. mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
  1680. lea FP_SRC(%a6),%a0 # pass ptr to src operand
  1681. mov.l (%a6),EXC_A6(%a6) # in case a6 changes
  1682. bsr.l fout # call fmove out routine
  1683. # Exceptions in order of precedence:
  1684. # BSUN : no
  1685. # SNAN : yes
  1686. # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
  1687. # OVFL : no
  1688. # UNFL : no
  1689. # DZ : no
  1690. # INEX2 : yes
  1691. # INEX1 : no
  1692. # determine the highest priority exception(if any) set by the
  1693. # emulation routine that has also been enabled by the user.
  1694. mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
  1695. bne.w fu_out_ena_p # some are enabled
  1696. fu_out_exit_p:
  1697. mov.l EXC_A6(%a6),(%a6) # restore a6
  1698. btst &0x5,EXC_SR(%a6) # user or supervisor?
  1699. bne.b fu_out_exit_s_p # supervisor
  1700. mov.l EXC_A7(%a6),%a0 # update user a7
  1701. mov.l %a0,%usp
  1702. fu_out_exit_cont_p:
  1703. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1704. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1705. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1706. unlk %a6 # unravel stack frame
  1707. btst &0x7,(%sp) # is trace on?
  1708. bne.w fu_trace_p # yes
  1709. bra.l _fpsp_done # exit to os
  1710. # the exception occurred in supervisor mode. check to see if the
  1711. # addressing mode was -(a7). if so, we'll need to shift the
  1712. # stack frame "down".
  1713. fu_out_exit_s_p:
  1714. btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
  1715. beq.b fu_out_exit_cont_p # no
  1716. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1717. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1718. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1719. mov.l (%a6),%a6 # restore frame pointer
  1720. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1721. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1722. # now, copy the result to the proper place on the stack
  1723. mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
  1724. mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
  1725. mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
  1726. add.l &LOCAL_SIZE-0x8,%sp
  1727. btst &0x7,(%sp)
  1728. bne.w fu_trace_p
  1729. bra.l _fpsp_done
  1730. fu_out_ena_p:
  1731. and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
  1732. bfffo %d0{&24:&8},%d0 # find highest priority exception
  1733. beq.w fu_out_exit_p
  1734. mov.l EXC_A6(%a6),(%a6) # restore a6
  1735. # an exception occurred and that exception was enabled.
  1736. # the only exception possible on packed move out are INEX, OPERR, and SNAN.
  1737. fu_out_exc_p:
  1738. cmpi.b %d0,&0x1a
  1739. bgt.w fu_inex_p2
  1740. beq.w fu_operr_p
  1741. fu_snan_p:
  1742. btst &0x5,EXC_SR(%a6)
  1743. bne.b fu_snan_s_p
  1744. mov.l EXC_A7(%a6),%a0
  1745. mov.l %a0,%usp
  1746. bra.w fu_snan
  1747. fu_snan_s_p:
  1748. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  1749. bne.w fu_snan
  1750. # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
  1751. # the strategy is to move the exception frame "down" 12 bytes. then, we
  1752. # can store the default result where the exception frame was.
  1753. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1754. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1755. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1756. mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
  1757. mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
  1758. frestore FP_SRC(%a6) # restore src operand
  1759. mov.l (%a6),%a6 # restore frame pointer
  1760. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1761. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1762. mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
  1763. # now, we copy the default result to its proper location
  1764. mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
  1765. mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
  1766. mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
  1767. add.l &LOCAL_SIZE-0x8,%sp
  1768. bra.l _real_snan
  1769. fu_operr_p:
  1770. btst &0x5,EXC_SR(%a6)
  1771. bne.w fu_operr_p_s
  1772. mov.l EXC_A7(%a6),%a0
  1773. mov.l %a0,%usp
  1774. bra.w fu_operr
  1775. fu_operr_p_s:
  1776. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  1777. bne.w fu_operr
  1778. # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
  1779. # the strategy is to move the exception frame "down" 12 bytes. then, we
  1780. # can store the default result where the exception frame was.
  1781. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1782. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1783. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1784. mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
  1785. mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
  1786. frestore FP_SRC(%a6) # restore src operand
  1787. mov.l (%a6),%a6 # restore frame pointer
  1788. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1789. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1790. mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
  1791. # now, we copy the default result to its proper location
  1792. mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
  1793. mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
  1794. mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
  1795. add.l &LOCAL_SIZE-0x8,%sp
  1796. bra.l _real_operr
  1797. fu_inex_p2:
  1798. btst &0x5,EXC_SR(%a6)
  1799. bne.w fu_inex_s_p2
  1800. mov.l EXC_A7(%a6),%a0
  1801. mov.l %a0,%usp
  1802. bra.w fu_inex
  1803. fu_inex_s_p2:
  1804. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  1805. bne.w fu_inex
  1806. # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
  1807. # the strategy is to move the exception frame "down" 12 bytes. then, we
  1808. # can store the default result where the exception frame was.
  1809. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
  1810. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  1811. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  1812. mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
  1813. mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
  1814. frestore FP_SRC(%a6) # restore src operand
  1815. mov.l (%a6),%a6 # restore frame pointer
  1816. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  1817. mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
  1818. mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
  1819. # now, we copy the default result to its proper location
  1820. mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
  1821. mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
  1822. mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
  1823. add.l &LOCAL_SIZE-0x8,%sp
  1824. bra.l _real_inex
  1825. #########################################################################
  1826. #
  1827. # if we're stuffing a source operand back into an fsave frame then we
  1828. # have to make sure that for single or double source operands that the
  1829. # format stuffed is as weird as the hardware usually makes it.
  1830. #
  1831. global funimp_skew
  1832. funimp_skew:
  1833. bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
  1834. cmpi.b %d0,&0x1 # was src sgl?
  1835. beq.b funimp_skew_sgl # yes
  1836. cmpi.b %d0,&0x5 # was src dbl?
  1837. beq.b funimp_skew_dbl # yes
  1838. rts
  1839. funimp_skew_sgl:
  1840. mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
  1841. andi.w &0x7fff,%d0 # strip sign
  1842. beq.b funimp_skew_sgl_not
  1843. cmpi.w %d0,&0x3f80
  1844. bgt.b funimp_skew_sgl_not
  1845. neg.w %d0 # make exponent negative
  1846. addi.w &0x3f81,%d0 # find amt to shift
  1847. mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
  1848. lsr.l %d0,%d1 # shift it
  1849. bset &31,%d1 # set j-bit
  1850. mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
  1851. andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
  1852. ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
  1853. funimp_skew_sgl_not:
  1854. rts
  1855. funimp_skew_dbl:
  1856. mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
  1857. andi.w &0x7fff,%d0 # strip sign
  1858. beq.b funimp_skew_dbl_not
  1859. cmpi.w %d0,&0x3c00
  1860. bgt.b funimp_skew_dbl_not
  1861. tst.b FP_SRC_EX(%a6) # make "internal format"
  1862. smi.b 0x2+FP_SRC(%a6)
  1863. mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
  1864. clr.l %d0 # clear g,r,s
  1865. lea FP_SRC(%a6),%a0 # pass ptr to src op
  1866. mov.w &0x3c01,%d1 # pass denorm threshold
  1867. bsr.l dnrm_lp # denorm it
  1868. mov.w &0x3c00,%d0 # new exponent
  1869. tst.b 0x2+FP_SRC(%a6) # is sign set?
  1870. beq.b fss_dbl_denorm_done # no
  1871. bset &15,%d0 # set sign
  1872. fss_dbl_denorm_done:
  1873. bset &0x7,FP_SRC_HI(%a6) # set j-bit
  1874. mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
  1875. funimp_skew_dbl_not:
  1876. rts
  1877. #########################################################################
  1878. global _mem_write2
  1879. _mem_write2:
  1880. btst &0x5,EXC_SR(%a6)
  1881. beq.l _dmem_write
  1882. mov.l 0x0(%a0),FP_DST_EX(%a6)
  1883. mov.l 0x4(%a0),FP_DST_HI(%a6)
  1884. mov.l 0x8(%a0),FP_DST_LO(%a6)
  1885. clr.l %d1
  1886. rts
  1887. #########################################################################
  1888. # XDEF **************************************************************** #
  1889. # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
  1890. # effective address" exception. #
  1891. # #
  1892. # This handler should be the first code executed upon taking the #
  1893. # FP Unimplemented Effective Address exception in an operating #
  1894. # system. #
  1895. # #
  1896. # XREF **************************************************************** #
  1897. # _imem_read_long() - read instruction longword #
  1898. # fix_skewed_ops() - adjust src operand in fsave frame #
  1899. # set_tag_x() - determine optype of src/dst operands #
  1900. # store_fpreg() - store opclass 0 or 2 result to FP regfile #
  1901. # unnorm_fix() - change UNNORM operands to NORM or ZERO #
  1902. # load_fpn2() - load dst operand from FP regfile #
  1903. # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
  1904. # decbin() - convert packed data to FP binary data #
  1905. # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
  1906. # _real_access() - "callout" for access error exception #
  1907. # _mem_read() - read extended immediate operand from memory #
  1908. # _fpsp_done() - "callout" for exit; work all done #
  1909. # _real_trace() - "callout" for Trace enabled exception #
  1910. # fmovm_dynamic() - emulate dynamic fmovm instruction #
  1911. # fmovm_ctrl() - emulate fmovm control instruction #
  1912. # #
  1913. # INPUT *************************************************************** #
  1914. # - The system stack contains the "Unimplemented <ea>" stk frame #
  1915. # #
  1916. # OUTPUT ************************************************************** #
  1917. # If access error: #
  1918. # - The system stack is changed to an access error stack frame #
  1919. # If FPU disabled: #
  1920. # - The system stack is changed to an FPU disabled stack frame #
  1921. # If Trace exception enabled: #
  1922. # - The system stack is changed to a Trace exception stack frame #
  1923. # Else: (normal case) #
  1924. # - None (correct result has been stored as appropriate) #
  1925. # #
  1926. # ALGORITHM *********************************************************** #
  1927. # This exception handles 3 types of operations: #
  1928. # (1) FP Instructions using extended precision or packed immediate #
  1929. # addressing mode. #
  1930. # (2) The "fmovm.x" instruction w/ dynamic register specification. #
  1931. # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
  1932. # #
  1933. # For immediate data operations, the data is read in w/ a #
  1934. # _mem_read() "callout", converted to FP binary (if packed), and used #
  1935. # as the source operand to the instruction specified by the instruction #
  1936. # word. If no FP exception should be reported ads a result of the #
  1937. # emulation, then the result is stored to the destination register and #
  1938. # the handler exits through _fpsp_done(). If an enabled exc has been #
  1939. # signalled as a result of emulation, then an fsave state frame #
  1940. # corresponding to the FP exception type must be entered into the 060 #
  1941. # FPU before exiting. In either the enabled or disabled cases, we #
  1942. # must also check if a Trace exception is pending, in which case, we #
  1943. # must create a Trace exception stack frame from the current exception #
  1944. # stack frame. If no Trace is pending, we simply exit through #
  1945. # _fpsp_done(). #
  1946. # For "fmovm.x", call the routine fmovm_dynamic() which will #
  1947. # decode and emulate the instruction. No FP exceptions can be pending #
  1948. # as a result of this operation emulation. A Trace exception can be #
  1949. # pending, though, which means the current stack frame must be changed #
  1950. # to a Trace stack frame and an exit made through _real_trace(). #
  1951. # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
  1952. # was executed from supervisor mode, this handler must store the FP #
  1953. # register file values to the system stack by itself since #
  1954. # fmovm_dynamic() can't handle this. A normal exit is made through #
  1955. # fpsp_done(). #
  1956. # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
  1957. # Again, a Trace exception may be pending and an exit made through #
  1958. # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
  1959. # #
  1960. # Before any of the above is attempted, it must be checked to #
  1961. # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
  1962. # before the "FPU disabled" exception, but the "FPU disabled" exception #
  1963. # has higher priority, we check the disabled bit in the PCR. If set, #
  1964. # then we must create an 8 word "FPU disabled" exception stack frame #
  1965. # from the current 4 word exception stack frame. This includes #
  1966. # reproducing the effective address of the instruction to put on the #
  1967. # new stack frame. #
  1968. # #
  1969. # In the process of all emulation work, if a _mem_read() #
  1970. # "callout" returns a failing result indicating an access error, then #
  1971. # we must create an access error stack frame from the current stack #
  1972. # frame. This information includes a faulting address and a fault- #
  1973. # status-longword. These are created within this handler. #
  1974. # #
  1975. #########################################################################
  1976. global _fpsp_effadd
  1977. _fpsp_effadd:
  1978. # This exception type takes priority over the "Line F Emulator"
  1979. # exception. Therefore, the FPU could be disabled when entering here.
  1980. # So, we must check to see if it's disabled and handle that case separately.
  1981. mov.l %d0,-(%sp) # save d0
  1982. movc %pcr,%d0 # load proc cr
  1983. btst &0x1,%d0 # is FPU disabled?
  1984. bne.w iea_disabled # yes
  1985. mov.l (%sp)+,%d0 # restore d0
  1986. link %a6,&-LOCAL_SIZE # init stack frame
  1987. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  1988. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  1989. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  1990. # PC of instruction that took the exception is the PC in the frame
  1991. mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
  1992. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  1993. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  1994. bsr.l _imem_read_long # fetch the instruction words
  1995. mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
  1996. #########################################################################
  1997. tst.w %d0 # is operation fmovem?
  1998. bmi.w iea_fmovm # yes
  1999. #
  2000. # here, we will have:
  2001. # fabs fdabs fsabs facos fmod
  2002. # fadd fdadd fsadd fasin frem
  2003. # fcmp fatan fscale
  2004. # fdiv fddiv fsdiv fatanh fsin
  2005. # fint fcos fsincos
  2006. # fintrz fcosh fsinh
  2007. # fmove fdmove fsmove fetox ftan
  2008. # fmul fdmul fsmul fetoxm1 ftanh
  2009. # fneg fdneg fsneg fgetexp ftentox
  2010. # fsgldiv fgetman ftwotox
  2011. # fsglmul flog10
  2012. # fsqrt flog2
  2013. # fsub fdsub fssub flogn
  2014. # ftst flognp1
  2015. # which can all use f<op>.{x,p}
  2016. # so, now it's immediate data extended precision AND PACKED FORMAT!
  2017. #
  2018. iea_op:
  2019. andi.l &0x00ff00ff,USER_FPSR(%a6)
  2020. btst &0xa,%d0 # is src fmt x or p?
  2021. bne.b iea_op_pack # packed
  2022. mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
  2023. lea FP_SRC(%a6),%a1 # pass: ptr to super addr
  2024. mov.l &0xc,%d0 # pass: 12 bytes
  2025. bsr.l _imem_read # read extended immediate
  2026. tst.l %d1 # did ifetch fail?
  2027. bne.w iea_iacc # yes
  2028. bra.b iea_op_setsrc
  2029. iea_op_pack:
  2030. mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
  2031. lea FP_SRC(%a6),%a1 # pass: ptr to super dst
  2032. mov.l &0xc,%d0 # pass: 12 bytes
  2033. bsr.l _imem_read # read packed operand
  2034. tst.l %d1 # did ifetch fail?
  2035. bne.w iea_iacc # yes
  2036. # The packed operand is an INF or a NAN if the exponent field is all ones.
  2037. bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
  2038. cmpi.w %d0,&0x7fff # INF or NAN?
  2039. beq.b iea_op_setsrc # operand is an INF or NAN
  2040. # The packed operand is a zero if the mantissa is all zero, else it's
  2041. # a normal packed op.
  2042. mov.b 3+FP_SRC(%a6),%d0 # get byte 4
  2043. andi.b &0x0f,%d0 # clear all but last nybble
  2044. bne.b iea_op_gp_not_spec # not a zero
  2045. tst.l FP_SRC_HI(%a6) # is lw 2 zero?
  2046. bne.b iea_op_gp_not_spec # not a zero
  2047. tst.l FP_SRC_LO(%a6) # is lw 3 zero?
  2048. beq.b iea_op_setsrc # operand is a ZERO
  2049. iea_op_gp_not_spec:
  2050. lea FP_SRC(%a6),%a0 # pass: ptr to packed op
  2051. bsr.l decbin # convert to extended
  2052. fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
  2053. iea_op_setsrc:
  2054. addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
  2055. # FP_SRC now holds the src operand.
  2056. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  2057. bsr.l set_tag_x # tag the operand type
  2058. mov.b %d0,STAG(%a6) # could be ANYTHING!!!
  2059. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  2060. bne.b iea_op_getdst # no
  2061. bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
  2062. mov.b %d0,STAG(%a6) # set new optype tag
  2063. iea_op_getdst:
  2064. clr.b STORE_FLG(%a6) # clear "store result" boolean
  2065. btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
  2066. beq.b iea_op_extract # monadic
  2067. btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
  2068. bne.b iea_op_spec # yes
  2069. iea_op_loaddst:
  2070. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
  2071. bsr.l load_fpn2 # load dst operand
  2072. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  2073. bsr.l set_tag_x # tag the operand type
  2074. mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
  2075. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  2076. bne.b iea_op_extract # no
  2077. bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
  2078. mov.b %d0,DTAG(%a6) # set new optype tag
  2079. bra.b iea_op_extract
  2080. # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
  2081. iea_op_spec:
  2082. btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
  2083. beq.b iea_op_extract # yes
  2084. # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
  2085. # store a result. then, only fcmp will branch back and pick up a dst operand.
  2086. st STORE_FLG(%a6) # don't store a final result
  2087. btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
  2088. beq.b iea_op_loaddst # yes
  2089. iea_op_extract:
  2090. clr.l %d0
  2091. mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
  2092. mov.b 1+EXC_CMDREG(%a6),%d1
  2093. andi.w &0x007f,%d1 # extract extension
  2094. fmov.l &0x0,%fpcr
  2095. fmov.l &0x0,%fpsr
  2096. lea FP_SRC(%a6),%a0
  2097. lea FP_DST(%a6),%a1
  2098. mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
  2099. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  2100. #
  2101. # Exceptions in order of precedence:
  2102. # BSUN : none
  2103. # SNAN : all operations
  2104. # OPERR : all reg-reg or mem-reg operations that can normally operr
  2105. # OVFL : same as OPERR
  2106. # UNFL : same as OPERR
  2107. # DZ : same as OPERR
  2108. # INEX2 : same as OPERR
  2109. # INEX1 : all packed immediate operations
  2110. #
  2111. # we determine the highest priority exception(if any) set by the
  2112. # emulation routine that has also been enabled by the user.
  2113. mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
  2114. bne.b iea_op_ena # some are enabled
  2115. # now, we save the result, unless, of course, the operation was ftst or fcmp.
  2116. # these don't save results.
  2117. iea_op_save:
  2118. tst.b STORE_FLG(%a6) # does this op store a result?
  2119. bne.b iea_op_exit1 # exit with no frestore
  2120. iea_op_store:
  2121. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
  2122. bsr.l store_fpreg # store the result
  2123. iea_op_exit1:
  2124. mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
  2125. mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
  2126. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2127. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2128. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2129. unlk %a6 # unravel the frame
  2130. btst &0x7,(%sp) # is trace on?
  2131. bne.w iea_op_trace # yes
  2132. bra.l _fpsp_done # exit to os
  2133. iea_op_ena:
  2134. and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
  2135. bfffo %d0{&24:&8},%d0 # find highest priority exception
  2136. bne.b iea_op_exc # at least one was set
  2137. # no exception occurred. now, did a disabled, exact overflow occur with inexact
  2138. # enabled? if so, then we have to stuff an overflow frame into the FPU.
  2139. btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
  2140. beq.b iea_op_save
  2141. iea_op_ovfl:
  2142. btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
  2143. beq.b iea_op_store # no
  2144. bra.b iea_op_exc_ovfl # yes
  2145. # an enabled exception occurred. we have to insert the exception type back into
  2146. # the machine.
  2147. iea_op_exc:
  2148. subi.l &24,%d0 # fix offset to be 0-8
  2149. cmpi.b %d0,&0x6 # is exception INEX?
  2150. bne.b iea_op_exc_force # no
  2151. # the enabled exception was inexact. so, if it occurs with an overflow
  2152. # or underflow that was disabled, then we have to force an overflow or
  2153. # underflow frame.
  2154. btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
  2155. bne.b iea_op_exc_ovfl # yes
  2156. btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
  2157. bne.b iea_op_exc_unfl # yes
  2158. iea_op_exc_force:
  2159. mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
  2160. bra.b iea_op_exit2 # exit with frestore
  2161. tbl_iea_except:
  2162. short 0xe002, 0xe006, 0xe004, 0xe005
  2163. short 0xe003, 0xe002, 0xe001, 0xe001
  2164. iea_op_exc_ovfl:
  2165. mov.w &0xe005,2+FP_SRC(%a6)
  2166. bra.b iea_op_exit2
  2167. iea_op_exc_unfl:
  2168. mov.w &0xe003,2+FP_SRC(%a6)
  2169. iea_op_exit2:
  2170. mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
  2171. mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
  2172. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2173. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2174. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2175. frestore FP_SRC(%a6) # restore exceptional state
  2176. unlk %a6 # unravel the frame
  2177. btst &0x7,(%sp) # is trace on?
  2178. bne.b iea_op_trace # yes
  2179. bra.l _fpsp_done # exit to os
  2180. #
  2181. # The opclass two instruction that took an "Unimplemented Effective Address"
  2182. # exception was being traced. Make the "current" PC the FPIAR and put it in
  2183. # the trace stack frame then jump to _real_trace().
  2184. #
  2185. # UNIMP EA FRAME TRACE FRAME
  2186. # ***************** *****************
  2187. # * 0x0 * 0x0f0 * * Current *
  2188. # ***************** * PC *
  2189. # * Current * *****************
  2190. # * PC * * 0x2 * 0x024 *
  2191. # ***************** *****************
  2192. # * SR * * Next *
  2193. # ***************** * PC *
  2194. # *****************
  2195. # * SR *
  2196. # *****************
  2197. iea_op_trace:
  2198. mov.l (%sp),-(%sp) # shift stack frame "down"
  2199. mov.w 0x8(%sp),0x4(%sp)
  2200. mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
  2201. fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
  2202. bra.l _real_trace
  2203. #########################################################################
  2204. iea_fmovm:
  2205. btst &14,%d0 # ctrl or data reg
  2206. beq.w iea_fmovm_ctrl
  2207. iea_fmovm_data:
  2208. btst &0x5,EXC_SR(%a6) # user or supervisor mode
  2209. bne.b iea_fmovm_data_s
  2210. iea_fmovm_data_u:
  2211. mov.l %usp,%a0
  2212. mov.l %a0,EXC_A7(%a6) # store current a7
  2213. bsr.l fmovm_dynamic # do dynamic fmovm
  2214. mov.l EXC_A7(%a6),%a0 # load possibly new a7
  2215. mov.l %a0,%usp # update usp
  2216. bra.w iea_fmovm_exit
  2217. iea_fmovm_data_s:
  2218. clr.b SPCOND_FLG(%a6)
  2219. lea 0x2+EXC_VOFF(%a6),%a0
  2220. mov.l %a0,EXC_A7(%a6)
  2221. bsr.l fmovm_dynamic # do dynamic fmovm
  2222. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  2223. beq.w iea_fmovm_data_predec
  2224. cmpi.b SPCOND_FLG(%a6),&mia7_flg
  2225. bne.w iea_fmovm_exit
  2226. # right now, d0 = the size.
  2227. # the data has been fetched from the supervisor stack, but we have not
  2228. # incremented the stack pointer by the appropriate number of bytes.
  2229. # do it here.
  2230. iea_fmovm_data_postinc:
  2231. btst &0x7,EXC_SR(%a6)
  2232. bne.b iea_fmovm_data_pi_trace
  2233. mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
  2234. mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
  2235. mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
  2236. lea (EXC_SR,%a6,%d0),%a0
  2237. mov.l %a0,EXC_SR(%a6)
  2238. fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
  2239. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2240. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2241. unlk %a6
  2242. mov.l (%sp)+,%sp
  2243. bra.l _fpsp_done
  2244. iea_fmovm_data_pi_trace:
  2245. mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
  2246. mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
  2247. mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
  2248. mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
  2249. lea (EXC_SR-0x4,%a6,%d0),%a0
  2250. mov.l %a0,EXC_SR(%a6)
  2251. fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
  2252. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2253. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2254. unlk %a6
  2255. mov.l (%sp)+,%sp
  2256. bra.l _real_trace
  2257. # right now, d1 = size and d0 = the strg.
  2258. iea_fmovm_data_predec:
  2259. mov.b %d1,EXC_VOFF(%a6) # store strg
  2260. mov.b %d0,0x1+EXC_VOFF(%a6) # store size
  2261. fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
  2262. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2263. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2264. mov.l (%a6),-(%sp) # make a copy of a6
  2265. mov.l %d0,-(%sp) # save d0
  2266. mov.l %d1,-(%sp) # save d1
  2267. mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
  2268. clr.l %d0
  2269. mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
  2270. neg.l %d0 # get negative of size
  2271. btst &0x7,EXC_SR(%a6) # is trace enabled?
  2272. beq.b iea_fmovm_data_p2
  2273. mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
  2274. mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
  2275. mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
  2276. mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
  2277. pea (%a6,%d0) # create final sp
  2278. bra.b iea_fmovm_data_p3
  2279. iea_fmovm_data_p2:
  2280. mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
  2281. mov.l (%sp)+,(EXC_PC,%a6,%d0)
  2282. mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
  2283. pea (0x4,%a6,%d0) # create final sp
  2284. iea_fmovm_data_p3:
  2285. clr.l %d1
  2286. mov.b EXC_VOFF(%a6),%d1 # fetch strg
  2287. tst.b %d1
  2288. bpl.b fm_1
  2289. fmovm.x &0x80,(0x4+0x8,%a6,%d0)
  2290. addi.l &0xc,%d0
  2291. fm_1:
  2292. lsl.b &0x1,%d1
  2293. bpl.b fm_2
  2294. fmovm.x &0x40,(0x4+0x8,%a6,%d0)
  2295. addi.l &0xc,%d0
  2296. fm_2:
  2297. lsl.b &0x1,%d1
  2298. bpl.b fm_3
  2299. fmovm.x &0x20,(0x4+0x8,%a6,%d0)
  2300. addi.l &0xc,%d0
  2301. fm_3:
  2302. lsl.b &0x1,%d1
  2303. bpl.b fm_4
  2304. fmovm.x &0x10,(0x4+0x8,%a6,%d0)
  2305. addi.l &0xc,%d0
  2306. fm_4:
  2307. lsl.b &0x1,%d1
  2308. bpl.b fm_5
  2309. fmovm.x &0x08,(0x4+0x8,%a6,%d0)
  2310. addi.l &0xc,%d0
  2311. fm_5:
  2312. lsl.b &0x1,%d1
  2313. bpl.b fm_6
  2314. fmovm.x &0x04,(0x4+0x8,%a6,%d0)
  2315. addi.l &0xc,%d0
  2316. fm_6:
  2317. lsl.b &0x1,%d1
  2318. bpl.b fm_7
  2319. fmovm.x &0x02,(0x4+0x8,%a6,%d0)
  2320. addi.l &0xc,%d0
  2321. fm_7:
  2322. lsl.b &0x1,%d1
  2323. bpl.b fm_end
  2324. fmovm.x &0x01,(0x4+0x8,%a6,%d0)
  2325. fm_end:
  2326. mov.l 0x4(%sp),%d1
  2327. mov.l 0x8(%sp),%d0
  2328. mov.l 0xc(%sp),%a6
  2329. mov.l (%sp)+,%sp
  2330. btst &0x7,(%sp) # is trace enabled?
  2331. beq.l _fpsp_done
  2332. bra.l _real_trace
  2333. #########################################################################
  2334. iea_fmovm_ctrl:
  2335. bsr.l fmovm_ctrl # load ctrl regs
  2336. iea_fmovm_exit:
  2337. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2338. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2339. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2340. btst &0x7,EXC_SR(%a6) # is trace on?
  2341. bne.b iea_fmovm_trace # yes
  2342. mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
  2343. unlk %a6 # unravel the frame
  2344. bra.l _fpsp_done # exit to os
  2345. #
  2346. # The control reg instruction that took an "Unimplemented Effective Address"
  2347. # exception was being traced. The "Current PC" for the trace frame is the
  2348. # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
  2349. # After fixing the stack frame, jump to _real_trace().
  2350. #
  2351. # UNIMP EA FRAME TRACE FRAME
  2352. # ***************** *****************
  2353. # * 0x0 * 0x0f0 * * Current *
  2354. # ***************** * PC *
  2355. # * Current * *****************
  2356. # * PC * * 0x2 * 0x024 *
  2357. # ***************** *****************
  2358. # * SR * * Next *
  2359. # ***************** * PC *
  2360. # *****************
  2361. # * SR *
  2362. # *****************
  2363. # this ain't a pretty solution, but it works:
  2364. # -restore a6 (not with unlk)
  2365. # -shift stack frame down over where old a6 used to be
  2366. # -add LOCAL_SIZE to stack pointer
  2367. iea_fmovm_trace:
  2368. mov.l (%a6),%a6 # restore frame pointer
  2369. mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
  2370. mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
  2371. mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
  2372. mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
  2373. add.l &LOCAL_SIZE,%sp # clear stack frame
  2374. bra.l _real_trace
  2375. #########################################################################
  2376. # The FPU is disabled and so we should really have taken the "Line
  2377. # F Emulator" exception. So, here we create an 8-word stack frame
  2378. # from our 4-word stack frame. This means we must calculate the length
  2379. # the faulting instruction to get the "next PC". This is trivial for
  2380. # immediate operands but requires some extra work for fmovm dynamic
  2381. # which can use most addressing modes.
  2382. iea_disabled:
  2383. mov.l (%sp)+,%d0 # restore d0
  2384. link %a6,&-LOCAL_SIZE # init stack frame
  2385. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  2386. # PC of instruction that took the exception is the PC in the frame
  2387. mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
  2388. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  2389. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  2390. bsr.l _imem_read_long # fetch the instruction words
  2391. mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
  2392. tst.w %d0 # is instr fmovm?
  2393. bmi.b iea_dis_fmovm # yes
  2394. # instruction is using an extended precision immediate operand. therefore,
  2395. # the total instruction length is 16 bytes.
  2396. iea_dis_immed:
  2397. mov.l &0x10,%d0 # 16 bytes of instruction
  2398. bra.b iea_dis_cont
  2399. iea_dis_fmovm:
  2400. btst &0xe,%d0 # is instr fmovm ctrl
  2401. bne.b iea_dis_fmovm_data # no
  2402. # the instruction is a fmovm.l with 2 or 3 registers.
  2403. bfextu %d0{&19:&3},%d1
  2404. mov.l &0xc,%d0
  2405. cmpi.b %d1,&0x7 # move all regs?
  2406. bne.b iea_dis_cont
  2407. addq.l &0x4,%d0
  2408. bra.b iea_dis_cont
  2409. # the instruction is an fmovm.x dynamic which can use many addressing
  2410. # modes and thus can have several different total instruction lengths.
  2411. # call fmovm_calc_ea which will go through the ea calc process and,
  2412. # as a by-product, will tell us how long the instruction is.
  2413. iea_dis_fmovm_data:
  2414. clr.l %d0
  2415. bsr.l fmovm_calc_ea
  2416. mov.l EXC_EXTWPTR(%a6),%d0
  2417. sub.l EXC_PC(%a6),%d0
  2418. iea_dis_cont:
  2419. mov.w %d0,EXC_VOFF(%a6) # store stack shift value
  2420. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2421. unlk %a6
  2422. # here, we actually create the 8-word frame from the 4-word frame,
  2423. # with the "next PC" as additional info.
  2424. # the <ea> field is let as undefined.
  2425. subq.l &0x8,%sp # make room for new stack
  2426. mov.l %d0,-(%sp) # save d0
  2427. mov.w 0xc(%sp),0x4(%sp) # move SR
  2428. mov.l 0xe(%sp),0x6(%sp) # move Current PC
  2429. clr.l %d0
  2430. mov.w 0x12(%sp),%d0
  2431. mov.l 0x6(%sp),0x10(%sp) # move Current PC
  2432. add.l %d0,0x6(%sp) # make Next PC
  2433. mov.w &0x402c,0xa(%sp) # insert offset,frame format
  2434. mov.l (%sp)+,%d0 # restore d0
  2435. bra.l _real_fpu_disabled
  2436. ##########
  2437. iea_iacc:
  2438. movc %pcr,%d0
  2439. btst &0x1,%d0
  2440. bne.b iea_iacc_cont
  2441. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2442. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
  2443. iea_iacc_cont:
  2444. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2445. unlk %a6
  2446. subq.w &0x8,%sp # make stack frame bigger
  2447. mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
  2448. mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
  2449. mov.w &0x4008,0x6(%sp) # store voff
  2450. mov.l 0x2(%sp),0x8(%sp) # store ea
  2451. mov.l &0x09428001,0xc(%sp) # store fslw
  2452. iea_acc_done:
  2453. btst &0x5,(%sp) # user or supervisor mode?
  2454. beq.b iea_acc_done2 # user
  2455. bset &0x2,0xd(%sp) # set supervisor TM bit
  2456. iea_acc_done2:
  2457. bra.l _real_access
  2458. iea_dacc:
  2459. lea -LOCAL_SIZE(%a6),%sp
  2460. movc %pcr,%d1
  2461. btst &0x1,%d1
  2462. bne.b iea_dacc_cont
  2463. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
  2464. fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2465. iea_dacc_cont:
  2466. mov.l (%a6),%a6
  2467. mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
  2468. mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
  2469. mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
  2470. mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
  2471. mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
  2472. mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
  2473. movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
  2474. add.w &LOCAL_SIZE-0x4,%sp
  2475. bra.b iea_acc_done
  2476. #########################################################################
  2477. # XDEF **************************************************************** #
  2478. # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
  2479. # #
  2480. # This handler should be the first code executed upon taking the #
  2481. # FP Operand Error exception in an operating system. #
  2482. # #
  2483. # XREF **************************************************************** #
  2484. # _imem_read_long() - read instruction longword #
  2485. # fix_skewed_ops() - adjust src operand in fsave frame #
  2486. # _real_operr() - "callout" to operating system operr handler #
  2487. # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
  2488. # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
  2489. # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
  2490. # #
  2491. # INPUT *************************************************************** #
  2492. # - The system stack contains the FP Operr exception frame #
  2493. # - The fsave frame contains the source operand #
  2494. # #
  2495. # OUTPUT ************************************************************** #
  2496. # No access error: #
  2497. # - The system stack is unchanged #
  2498. # - The fsave frame contains the adjusted src op for opclass 0,2 #
  2499. # #
  2500. # ALGORITHM *********************************************************** #
  2501. # In a system where the FP Operr exception is enabled, the goal #
  2502. # is to get to the handler specified at _real_operr(). But, on the 060, #
  2503. # for opclass zero and two instruction taking this exception, the #
  2504. # input operand in the fsave frame may be incorrect for some cases #
  2505. # and needs to be corrected. This handler calls fix_skewed_ops() to #
  2506. # do just this and then exits through _real_operr(). #
  2507. # For opclass 3 instructions, the 060 doesn't store the default #
  2508. # operr result out to memory or data register file as it should. #
  2509. # This code must emulate the move out before finally exiting through #
  2510. # _real_inex(). The move out, if to memory, is performed using #
  2511. # _mem_write() "callout" routines that may return a failing result. #
  2512. # In this special case, the handler must exit through facc_out() #
  2513. # which creates an access error stack frame from the current operr #
  2514. # stack frame. #
  2515. # #
  2516. #########################################################################
  2517. global _fpsp_operr
  2518. _fpsp_operr:
  2519. link.w %a6,&-LOCAL_SIZE # init stack frame
  2520. fsave FP_SRC(%a6) # grab the "busy" frame
  2521. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  2522. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  2523. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  2524. # the FPIAR holds the "current PC" of the faulting instruction
  2525. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  2526. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  2527. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  2528. bsr.l _imem_read_long # fetch the instruction words
  2529. mov.l %d0,EXC_OPWORD(%a6)
  2530. ##############################################################################
  2531. btst &13,%d0 # is instr an fmove out?
  2532. bne.b foperr_out # fmove out
  2533. # here, we simply see if the operand in the fsave frame needs to be "unskewed".
  2534. # this would be the case for opclass two operations with a source infinity or
  2535. # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
  2536. # cause an operr so we don't need to check for them here.
  2537. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  2538. bsr.l fix_skewed_ops # fix src op
  2539. foperr_exit:
  2540. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2541. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2542. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2543. frestore FP_SRC(%a6)
  2544. unlk %a6
  2545. bra.l _real_operr
  2546. ########################################################################
  2547. #
  2548. # the hardware does not save the default result to memory on enabled
  2549. # operand error exceptions. we do this here before passing control to
  2550. # the user operand error handler.
  2551. #
  2552. # byte, word, and long destination format operations can pass
  2553. # through here. we simply need to test the sign of the src
  2554. # operand and save the appropriate minimum or maximum integer value
  2555. # to the effective address as pointed to by the stacked effective address.
  2556. #
  2557. # although packed opclass three operations can take operand error
  2558. # exceptions, they won't pass through here since they are caught
  2559. # first by the unsupported data format exception handler. that handler
  2560. # sends them directly to _real_operr() if necessary.
  2561. #
  2562. foperr_out:
  2563. mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
  2564. andi.w &0x7fff,%d1
  2565. cmpi.w %d1,&0x7fff
  2566. bne.b foperr_out_not_qnan
  2567. # the operand is either an infinity or a QNAN.
  2568. tst.l FP_SRC_LO(%a6)
  2569. bne.b foperr_out_qnan
  2570. mov.l FP_SRC_HI(%a6),%d1
  2571. andi.l &0x7fffffff,%d1
  2572. beq.b foperr_out_not_qnan
  2573. foperr_out_qnan:
  2574. mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
  2575. bra.b foperr_out_jmp
  2576. foperr_out_not_qnan:
  2577. mov.l &0x7fffffff,%d1
  2578. tst.b FP_SRC_EX(%a6)
  2579. bpl.b foperr_out_not_qnan2
  2580. addq.l &0x1,%d1
  2581. foperr_out_not_qnan2:
  2582. mov.l %d1,L_SCR1(%a6)
  2583. foperr_out_jmp:
  2584. bfextu %d0{&19:&3},%d0 # extract dst format field
  2585. mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
  2586. mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
  2587. jmp (tbl_operr.b,%pc,%a0)
  2588. tbl_operr:
  2589. short foperr_out_l - tbl_operr # long word integer
  2590. short tbl_operr - tbl_operr # sgl prec shouldn't happen
  2591. short tbl_operr - tbl_operr # ext prec shouldn't happen
  2592. short foperr_exit - tbl_operr # packed won't enter here
  2593. short foperr_out_w - tbl_operr # word integer
  2594. short tbl_operr - tbl_operr # dbl prec shouldn't happen
  2595. short foperr_out_b - tbl_operr # byte integer
  2596. short tbl_operr - tbl_operr # packed won't enter here
  2597. foperr_out_b:
  2598. mov.b L_SCR1(%a6),%d0 # load positive default result
  2599. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2600. ble.b foperr_out_b_save_dn # yes
  2601. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2602. bsr.l _dmem_write_byte # write the default result
  2603. tst.l %d1 # did dstore fail?
  2604. bne.l facc_out_b # yes
  2605. bra.w foperr_exit
  2606. foperr_out_b_save_dn:
  2607. andi.w &0x0007,%d1
  2608. bsr.l store_dreg_b # store result to regfile
  2609. bra.w foperr_exit
  2610. foperr_out_w:
  2611. mov.w L_SCR1(%a6),%d0 # load positive default result
  2612. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2613. ble.b foperr_out_w_save_dn # yes
  2614. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2615. bsr.l _dmem_write_word # write the default result
  2616. tst.l %d1 # did dstore fail?
  2617. bne.l facc_out_w # yes
  2618. bra.w foperr_exit
  2619. foperr_out_w_save_dn:
  2620. andi.w &0x0007,%d1
  2621. bsr.l store_dreg_w # store result to regfile
  2622. bra.w foperr_exit
  2623. foperr_out_l:
  2624. mov.l L_SCR1(%a6),%d0 # load positive default result
  2625. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2626. ble.b foperr_out_l_save_dn # yes
  2627. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2628. bsr.l _dmem_write_long # write the default result
  2629. tst.l %d1 # did dstore fail?
  2630. bne.l facc_out_l # yes
  2631. bra.w foperr_exit
  2632. foperr_out_l_save_dn:
  2633. andi.w &0x0007,%d1
  2634. bsr.l store_dreg_l # store result to regfile
  2635. bra.w foperr_exit
  2636. #########################################################################
  2637. # XDEF **************************************************************** #
  2638. # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
  2639. # #
  2640. # This handler should be the first code executed upon taking the #
  2641. # FP Signalling NAN exception in an operating system. #
  2642. # #
  2643. # XREF **************************************************************** #
  2644. # _imem_read_long() - read instruction longword #
  2645. # fix_skewed_ops() - adjust src operand in fsave frame #
  2646. # _real_snan() - "callout" to operating system SNAN handler #
  2647. # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
  2648. # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
  2649. # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
  2650. # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
  2651. # #
  2652. # INPUT *************************************************************** #
  2653. # - The system stack contains the FP SNAN exception frame #
  2654. # - The fsave frame contains the source operand #
  2655. # #
  2656. # OUTPUT ************************************************************** #
  2657. # No access error: #
  2658. # - The system stack is unchanged #
  2659. # - The fsave frame contains the adjusted src op for opclass 0,2 #
  2660. # #
  2661. # ALGORITHM *********************************************************** #
  2662. # In a system where the FP SNAN exception is enabled, the goal #
  2663. # is to get to the handler specified at _real_snan(). But, on the 060, #
  2664. # for opclass zero and two instructions taking this exception, the #
  2665. # input operand in the fsave frame may be incorrect for some cases #
  2666. # and needs to be corrected. This handler calls fix_skewed_ops() to #
  2667. # do just this and then exits through _real_snan(). #
  2668. # For opclass 3 instructions, the 060 doesn't store the default #
  2669. # SNAN result out to memory or data register file as it should. #
  2670. # This code must emulate the move out before finally exiting through #
  2671. # _real_snan(). The move out, if to memory, is performed using #
  2672. # _mem_write() "callout" routines that may return a failing result. #
  2673. # In this special case, the handler must exit through facc_out() #
  2674. # which creates an access error stack frame from the current SNAN #
  2675. # stack frame. #
  2676. # For the case of an extended precision opclass 3 instruction, #
  2677. # if the effective addressing mode was -() or ()+, then the address #
  2678. # register must get updated by calling _calc_ea_fout(). If the <ea> #
  2679. # was -(a7) from supervisor mode, then the exception frame currently #
  2680. # on the system stack must be carefully moved "down" to make room #
  2681. # for the operand being moved. #
  2682. # #
  2683. #########################################################################
  2684. global _fpsp_snan
  2685. _fpsp_snan:
  2686. link.w %a6,&-LOCAL_SIZE # init stack frame
  2687. fsave FP_SRC(%a6) # grab the "busy" frame
  2688. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  2689. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  2690. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  2691. # the FPIAR holds the "current PC" of the faulting instruction
  2692. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  2693. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  2694. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  2695. bsr.l _imem_read_long # fetch the instruction words
  2696. mov.l %d0,EXC_OPWORD(%a6)
  2697. ##############################################################################
  2698. btst &13,%d0 # is instr an fmove out?
  2699. bne.w fsnan_out # fmove out
  2700. # here, we simply see if the operand in the fsave frame needs to be "unskewed".
  2701. # this would be the case for opclass two operations with a source infinity or
  2702. # denorm operand in the sgl or dbl format. NANs also become skewed and must be
  2703. # fixed here.
  2704. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  2705. bsr.l fix_skewed_ops # fix src op
  2706. fsnan_exit:
  2707. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2708. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2709. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2710. frestore FP_SRC(%a6)
  2711. unlk %a6
  2712. bra.l _real_snan
  2713. ########################################################################
  2714. #
  2715. # the hardware does not save the default result to memory on enabled
  2716. # snan exceptions. we do this here before passing control to
  2717. # the user snan handler.
  2718. #
  2719. # byte, word, long, and packed destination format operations can pass
  2720. # through here. since packed format operations already were handled by
  2721. # fpsp_unsupp(), then we need to do nothing else for them here.
  2722. # for byte, word, and long, we simply need to test the sign of the src
  2723. # operand and save the appropriate minimum or maximum integer value
  2724. # to the effective address as pointed to by the stacked effective address.
  2725. #
  2726. fsnan_out:
  2727. bfextu %d0{&19:&3},%d0 # extract dst format field
  2728. mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
  2729. mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
  2730. jmp (tbl_snan.b,%pc,%a0)
  2731. tbl_snan:
  2732. short fsnan_out_l - tbl_snan # long word integer
  2733. short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
  2734. short fsnan_out_x - tbl_snan # ext prec shouldn't happen
  2735. short tbl_snan - tbl_snan # packed needs no help
  2736. short fsnan_out_w - tbl_snan # word integer
  2737. short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
  2738. short fsnan_out_b - tbl_snan # byte integer
  2739. short tbl_snan - tbl_snan # packed needs no help
  2740. fsnan_out_b:
  2741. mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
  2742. bset &6,%d0 # set SNAN bit
  2743. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2744. ble.b fsnan_out_b_dn # yes
  2745. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2746. bsr.l _dmem_write_byte # write the default result
  2747. tst.l %d1 # did dstore fail?
  2748. bne.l facc_out_b # yes
  2749. bra.w fsnan_exit
  2750. fsnan_out_b_dn:
  2751. andi.w &0x0007,%d1
  2752. bsr.l store_dreg_b # store result to regfile
  2753. bra.w fsnan_exit
  2754. fsnan_out_w:
  2755. mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
  2756. bset &14,%d0 # set SNAN bit
  2757. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2758. ble.b fsnan_out_w_dn # yes
  2759. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2760. bsr.l _dmem_write_word # write the default result
  2761. tst.l %d1 # did dstore fail?
  2762. bne.l facc_out_w # yes
  2763. bra.w fsnan_exit
  2764. fsnan_out_w_dn:
  2765. andi.w &0x0007,%d1
  2766. bsr.l store_dreg_w # store result to regfile
  2767. bra.w fsnan_exit
  2768. fsnan_out_l:
  2769. mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
  2770. bset &30,%d0 # set SNAN bit
  2771. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2772. ble.b fsnan_out_l_dn # yes
  2773. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2774. bsr.l _dmem_write_long # write the default result
  2775. tst.l %d1 # did dstore fail?
  2776. bne.l facc_out_l # yes
  2777. bra.w fsnan_exit
  2778. fsnan_out_l_dn:
  2779. andi.w &0x0007,%d1
  2780. bsr.l store_dreg_l # store result to regfile
  2781. bra.w fsnan_exit
  2782. fsnan_out_s:
  2783. cmpi.b %d1,&0x7 # is <ea> mode a data reg?
  2784. ble.b fsnan_out_d_dn # yes
  2785. mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
  2786. andi.l &0x80000000,%d0 # keep sign
  2787. ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
  2788. mov.l FP_SRC_HI(%a6),%d1 # load mantissa
  2789. lsr.l &0x8,%d1 # shift mantissa for sgl
  2790. or.l %d1,%d0 # create sgl SNAN
  2791. mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
  2792. bsr.l _dmem_write_long # write the default result
  2793. tst.l %d1 # did dstore fail?
  2794. bne.l facc_out_l # yes
  2795. bra.w fsnan_exit
  2796. fsnan_out_d_dn:
  2797. mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
  2798. andi.l &0x80000000,%d0 # keep sign
  2799. ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
  2800. mov.l %d1,-(%sp)
  2801. mov.l FP_SRC_HI(%a6),%d1 # load mantissa
  2802. lsr.l &0x8,%d1 # shift mantissa for sgl
  2803. or.l %d1,%d0 # create sgl SNAN
  2804. mov.l (%sp)+,%d1
  2805. andi.w &0x0007,%d1
  2806. bsr.l store_dreg_l # store result to regfile
  2807. bra.w fsnan_exit
  2808. fsnan_out_d:
  2809. mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
  2810. andi.l &0x80000000,%d0 # keep sign
  2811. ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
  2812. mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
  2813. mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
  2814. mov.l &11,%d0 # load shift amt
  2815. lsr.l %d0,%d1
  2816. or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
  2817. mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
  2818. andi.l &0x000007ff,%d1
  2819. ror.l %d0,%d1
  2820. mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
  2821. mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
  2822. lsr.l %d0,%d1
  2823. or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
  2824. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  2825. mov.l EXC_EA(%a6),%a1 # pass: dst addr
  2826. movq.l &0x8,%d0 # pass: size of 8 bytes
  2827. bsr.l _dmem_write # write the default result
  2828. tst.l %d1 # did dstore fail?
  2829. bne.l facc_out_d # yes
  2830. bra.w fsnan_exit
  2831. # for extended precision, if the addressing mode is pre-decrement or
  2832. # post-increment, then the address register did not get updated.
  2833. # in addition, for pre-decrement, the stacked <ea> is incorrect.
  2834. fsnan_out_x:
  2835. clr.b SPCOND_FLG(%a6) # clear special case flag
  2836. mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
  2837. clr.w 2+FP_SCR0(%a6)
  2838. mov.l FP_SRC_HI(%a6),%d0
  2839. bset &30,%d0
  2840. mov.l %d0,FP_SCR0_HI(%a6)
  2841. mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
  2842. btst &0x5,EXC_SR(%a6) # supervisor mode exception?
  2843. bne.b fsnan_out_x_s # yes
  2844. mov.l %usp,%a0 # fetch user stack pointer
  2845. mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
  2846. mov.l (%a6),EXC_A6(%a6)
  2847. bsr.l _calc_ea_fout # find the correct ea,update An
  2848. mov.l %a0,%a1
  2849. mov.l %a0,EXC_EA(%a6) # stack correct <ea>
  2850. mov.l EXC_A7(%a6),%a0
  2851. mov.l %a0,%usp # restore user stack pointer
  2852. mov.l EXC_A6(%a6),(%a6)
  2853. fsnan_out_x_save:
  2854. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  2855. movq.l &0xc,%d0 # pass: size of extended
  2856. bsr.l _dmem_write # write the default result
  2857. tst.l %d1 # did dstore fail?
  2858. bne.l facc_out_x # yes
  2859. bra.w fsnan_exit
  2860. fsnan_out_x_s:
  2861. mov.l (%a6),EXC_A6(%a6)
  2862. bsr.l _calc_ea_fout # find the correct ea,update An
  2863. mov.l %a0,%a1
  2864. mov.l %a0,EXC_EA(%a6) # stack correct <ea>
  2865. mov.l EXC_A6(%a6),(%a6)
  2866. cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
  2867. bne.b fsnan_out_x_save # no
  2868. # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
  2869. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2870. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2871. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  2872. frestore FP_SRC(%a6)
  2873. mov.l EXC_A6(%a6),%a6 # restore frame pointer
  2874. mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
  2875. mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
  2876. mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
  2877. mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
  2878. mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
  2879. mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
  2880. add.l &LOCAL_SIZE-0x8,%sp
  2881. bra.l _real_snan
  2882. #########################################################################
  2883. # XDEF **************************************************************** #
  2884. # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
  2885. # #
  2886. # This handler should be the first code executed upon taking the #
  2887. # FP Inexact exception in an operating system. #
  2888. # #
  2889. # XREF **************************************************************** #
  2890. # _imem_read_long() - read instruction longword #
  2891. # fix_skewed_ops() - adjust src operand in fsave frame #
  2892. # set_tag_x() - determine optype of src/dst operands #
  2893. # store_fpreg() - store opclass 0 or 2 result to FP regfile #
  2894. # unnorm_fix() - change UNNORM operands to NORM or ZERO #
  2895. # load_fpn2() - load dst operand from FP regfile #
  2896. # smovcr() - emulate an "fmovcr" instruction #
  2897. # fout() - emulate an opclass 3 instruction #
  2898. # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
  2899. # _real_inex() - "callout" to operating system inexact handler #
  2900. # #
  2901. # INPUT *************************************************************** #
  2902. # - The system stack contains the FP Inexact exception frame #
  2903. # - The fsave frame contains the source operand #
  2904. # #
  2905. # OUTPUT ************************************************************** #
  2906. # - The system stack is unchanged #
  2907. # - The fsave frame contains the adjusted src op for opclass 0,2 #
  2908. # #
  2909. # ALGORITHM *********************************************************** #
  2910. # In a system where the FP Inexact exception is enabled, the goal #
  2911. # is to get to the handler specified at _real_inex(). But, on the 060, #
  2912. # for opclass zero and two instruction taking this exception, the #
  2913. # hardware doesn't store the correct result to the destination FP #
  2914. # register as did the '040 and '881/2. This handler must emulate the #
  2915. # instruction in order to get this value and then store it to the #
  2916. # correct register before calling _real_inex(). #
  2917. # For opclass 3 instructions, the 060 doesn't store the default #
  2918. # inexact result out to memory or data register file as it should. #
  2919. # This code must emulate the move out by calling fout() before finally #
  2920. # exiting through _real_inex(). #
  2921. # #
  2922. #########################################################################
  2923. global _fpsp_inex
  2924. _fpsp_inex:
  2925. link.w %a6,&-LOCAL_SIZE # init stack frame
  2926. fsave FP_SRC(%a6) # grab the "busy" frame
  2927. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  2928. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  2929. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  2930. # the FPIAR holds the "current PC" of the faulting instruction
  2931. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  2932. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  2933. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  2934. bsr.l _imem_read_long # fetch the instruction words
  2935. mov.l %d0,EXC_OPWORD(%a6)
  2936. ##############################################################################
  2937. btst &13,%d0 # is instr an fmove out?
  2938. bne.w finex_out # fmove out
  2939. # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
  2940. # longword integer directly into the upper longword of the mantissa along
  2941. # w/ an exponent value of 0x401e. we convert this to extended precision here.
  2942. bfextu %d0{&19:&3},%d0 # fetch instr size
  2943. bne.b finex_cont # instr size is not long
  2944. cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
  2945. bne.b finex_cont # no
  2946. fmov.l &0x0,%fpcr
  2947. fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
  2948. fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
  2949. mov.w &0xe001,0x2+FP_SRC(%a6)
  2950. finex_cont:
  2951. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  2952. bsr.l fix_skewed_ops # fix src op
  2953. # Here, we zero the ccode and exception byte field since we're going to
  2954. # emulate the whole instruction. Notice, though, that we don't kill the
  2955. # INEX1 bit. This is because a packed op has long since been converted
  2956. # to extended before arriving here. Therefore, we need to retain the
  2957. # INEX1 bit from when the operand was first converted.
  2958. andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
  2959. fmov.l &0x0,%fpcr # zero current control regs
  2960. fmov.l &0x0,%fpsr
  2961. bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
  2962. cmpi.b %d1,&0x17 # is op an fmovecr?
  2963. beq.w finex_fmovcr # yes
  2964. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  2965. bsr.l set_tag_x # tag the operand type
  2966. mov.b %d0,STAG(%a6) # maybe NORM,DENORM
  2967. # bits four and five of the fp extension word separate the monadic and dyadic
  2968. # operations that can pass through fpsp_inex(). remember that fcmp and ftst
  2969. # will never take this exception, but fsincos will.
  2970. btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
  2971. beq.b finex_extract # monadic
  2972. btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
  2973. bne.b finex_extract # yes
  2974. bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
  2975. bsr.l load_fpn2 # load dst into FP_DST
  2976. lea FP_DST(%a6),%a0 # pass: ptr to dst op
  2977. bsr.l set_tag_x # tag the operand type
  2978. cmpi.b %d0,&UNNORM # is operand an UNNORM?
  2979. bne.b finex_op2_done # no
  2980. bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
  2981. finex_op2_done:
  2982. mov.b %d0,DTAG(%a6) # save dst optype tag
  2983. finex_extract:
  2984. clr.l %d0
  2985. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
  2986. mov.b 1+EXC_CMDREG(%a6),%d1
  2987. andi.w &0x007f,%d1 # extract extension
  2988. lea FP_SRC(%a6),%a0
  2989. lea FP_DST(%a6),%a1
  2990. mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
  2991. jsr (tbl_unsupp.l,%pc,%d1.l*1)
  2992. # the operation has been emulated. the result is in fp0.
  2993. finex_save:
  2994. bfextu EXC_CMDREG(%a6){&6:&3},%d0
  2995. bsr.l store_fpreg
  2996. finex_exit:
  2997. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  2998. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  2999. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  3000. frestore FP_SRC(%a6)
  3001. unlk %a6
  3002. bra.l _real_inex
  3003. finex_fmovcr:
  3004. clr.l %d0
  3005. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
  3006. mov.b 1+EXC_CMDREG(%a6),%d1
  3007. andi.l &0x0000007f,%d1 # pass rom offset
  3008. bsr.l smovcr
  3009. bra.b finex_save
  3010. ########################################################################
  3011. #
  3012. # the hardware does not save the default result to memory on enabled
  3013. # inexact exceptions. we do this here before passing control to
  3014. # the user inexact handler.
  3015. #
  3016. # byte, word, and long destination format operations can pass
  3017. # through here. so can double and single precision.
  3018. # although packed opclass three operations can take inexact
  3019. # exceptions, they won't pass through here since they are caught
  3020. # first by the unsupported data format exception handler. that handler
  3021. # sends them directly to _real_inex() if necessary.
  3022. #
  3023. finex_out:
  3024. mov.b &NORM,STAG(%a6) # src is a NORM
  3025. clr.l %d0
  3026. mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
  3027. andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
  3028. lea FP_SRC(%a6),%a0 # pass ptr to src operand
  3029. bsr.l fout # store the default result
  3030. bra.b finex_exit
  3031. #########################################################################
  3032. # XDEF **************************************************************** #
  3033. # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
  3034. # #
  3035. # This handler should be the first code executed upon taking #
  3036. # the FP DZ exception in an operating system. #
  3037. # #
  3038. # XREF **************************************************************** #
  3039. # _imem_read_long() - read instruction longword from memory #
  3040. # fix_skewed_ops() - adjust fsave operand #
  3041. # _real_dz() - "callout" exit point from FP DZ handler #
  3042. # #
  3043. # INPUT *************************************************************** #
  3044. # - The system stack contains the FP DZ exception stack. #
  3045. # - The fsave frame contains the source operand. #
  3046. # #
  3047. # OUTPUT ************************************************************** #
  3048. # - The system stack contains the FP DZ exception stack. #
  3049. # - The fsave frame contains the adjusted source operand. #
  3050. # #
  3051. # ALGORITHM *********************************************************** #
  3052. # In a system where the DZ exception is enabled, the goal is to #
  3053. # get to the handler specified at _real_dz(). But, on the 060, when the #
  3054. # exception is taken, the input operand in the fsave state frame may #
  3055. # be incorrect for some cases and need to be adjusted. So, this package #
  3056. # adjusts the operand using fix_skewed_ops() and then branches to #
  3057. # _real_dz(). #
  3058. # #
  3059. #########################################################################
  3060. global _fpsp_dz
  3061. _fpsp_dz:
  3062. link.w %a6,&-LOCAL_SIZE # init stack frame
  3063. fsave FP_SRC(%a6) # grab the "busy" frame
  3064. movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
  3065. fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
  3066. fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
  3067. # the FPIAR holds the "current PC" of the faulting instruction
  3068. mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
  3069. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  3070. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  3071. bsr.l _imem_read_long # fetch the instruction words
  3072. mov.l %d0,EXC_OPWORD(%a6)
  3073. ##############################################################################
  3074. # here, we simply see if the operand in the fsave frame needs to be "unskewed".
  3075. # this would be the case for opclass two operations with a source zero
  3076. # in the sgl or dbl format.
  3077. lea FP_SRC(%a6),%a0 # pass: ptr to src op
  3078. bsr.l fix_skewed_ops # fix src op
  3079. fdz_exit:
  3080. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  3081. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  3082. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  3083. frestore FP_SRC(%a6)
  3084. unlk %a6
  3085. bra.l _real_dz
  3086. #########################################################################
  3087. # XDEF **************************************************************** #
  3088. # _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
  3089. # exception when the "reduced" version of the #
  3090. # FPSP is implemented that does not emulate #
  3091. # FP unimplemented instructions. #
  3092. # #
  3093. # This handler should be the first code executed upon taking a #
  3094. # "Line F Emulator" exception in an operating system integrating #
  3095. # the reduced version of 060FPSP. #
  3096. # #
  3097. # XREF **************************************************************** #
  3098. # _real_fpu_disabled() - Handle "FPU disabled" exceptions #
  3099. # _real_fline() - Handle all other cases (treated equally) #
  3100. # #
  3101. # INPUT *************************************************************** #
  3102. # - The system stack contains a "Line F Emulator" exception #
  3103. # stack frame. #
  3104. # #
  3105. # OUTPUT ************************************************************** #
  3106. # - The system stack is unchanged. #
  3107. # #
  3108. # ALGORITHM *********************************************************** #
  3109. # When a "Line F Emulator" exception occurs in a system where #
  3110. # "FPU Unimplemented" instructions will not be emulated, the exception #
  3111. # can occur because then FPU is disabled or the instruction is to be #
  3112. # classifed as "Line F". This module determines which case exists and #
  3113. # calls the appropriate "callout". #
  3114. # #
  3115. #########################################################################
  3116. global _fpsp_fline
  3117. _fpsp_fline:
  3118. # check to see if the FPU is disabled. if so, jump to the OS entry
  3119. # point for that condition.
  3120. cmpi.w 0x6(%sp),&0x402c
  3121. beq.l _real_fpu_disabled
  3122. bra.l _real_fline
  3123. #########################################################################
  3124. # XDEF **************************************************************** #
  3125. # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
  3126. # #
  3127. # XREF **************************************************************** #
  3128. # inc_areg() - increment an address register #
  3129. # dec_areg() - decrement an address register #
  3130. # #
  3131. # INPUT *************************************************************** #
  3132. # d0 = number of bytes to adjust <ea> by #
  3133. # #
  3134. # OUTPUT ************************************************************** #
  3135. # None #
  3136. # #
  3137. # ALGORITHM *********************************************************** #
  3138. # "Dummy" CALCulate Effective Address: #
  3139. # The stacked <ea> for FP unimplemented instructions and opclass #
  3140. # two packed instructions is correct with the exception of... #
  3141. # #
  3142. # 1) -(An) : The register is not updated regardless of size. #
  3143. # Also, for extended precision and packed, the #
  3144. # stacked <ea> value is 8 bytes too big #
  3145. # 2) (An)+ : The register is not updated. #
  3146. # 3) #<data> : The upper longword of the immediate operand is #
  3147. # stacked b,w,l and s sizes are completely stacked. #
  3148. # d,x, and p are not. #
  3149. # #
  3150. #########################################################################
  3151. global _dcalc_ea
  3152. _dcalc_ea:
  3153. mov.l %d0, %a0 # move # bytes to %a0
  3154. mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
  3155. mov.l %d0, %d1 # make a copy
  3156. andi.w &0x38, %d0 # extract mode field
  3157. andi.l &0x7, %d1 # extract reg field
  3158. cmpi.b %d0,&0x18 # is mode (An)+ ?
  3159. beq.b dcea_pi # yes
  3160. cmpi.b %d0,&0x20 # is mode -(An) ?
  3161. beq.b dcea_pd # yes
  3162. or.w %d1,%d0 # concat mode,reg
  3163. cmpi.b %d0,&0x3c # is mode #<data>?
  3164. beq.b dcea_imm # yes
  3165. mov.l EXC_EA(%a6),%a0 # return <ea>
  3166. rts
  3167. # need to set immediate data flag here since we'll need to do
  3168. # an imem_read to fetch this later.
  3169. dcea_imm:
  3170. mov.b &immed_flg,SPCOND_FLG(%a6)
  3171. lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
  3172. rts
  3173. # here, the <ea> is stacked correctly. however, we must update the
  3174. # address register...
  3175. dcea_pi:
  3176. mov.l %a0,%d0 # pass amt to inc by
  3177. bsr.l inc_areg # inc addr register
  3178. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  3179. rts
  3180. # the <ea> is stacked correctly for all but extended and packed which
  3181. # the <ea>s are 8 bytes too large.
  3182. # it would make no sense to have a pre-decrement to a7 in supervisor
  3183. # mode so we don't even worry about this tricky case here : )
  3184. dcea_pd:
  3185. mov.l %a0,%d0 # pass amt to dec by
  3186. bsr.l dec_areg # dec addr register
  3187. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  3188. cmpi.b %d0,&0xc # is opsize ext or packed?
  3189. beq.b dcea_pd2 # yes
  3190. rts
  3191. dcea_pd2:
  3192. sub.l &0x8,%a0 # correct <ea>
  3193. mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
  3194. rts
  3195. #########################################################################
  3196. # XDEF **************************************************************** #
  3197. # _calc_ea_fout(): calculate correct stacked <ea> for extended #
  3198. # and packed data opclass 3 operations. #
  3199. # #
  3200. # XREF **************************************************************** #
  3201. # None #
  3202. # #
  3203. # INPUT *************************************************************** #
  3204. # None #
  3205. # #
  3206. # OUTPUT ************************************************************** #
  3207. # a0 = return correct effective address #
  3208. # #
  3209. # ALGORITHM *********************************************************** #
  3210. # For opclass 3 extended and packed data operations, the <ea> #
  3211. # stacked for the exception is incorrect for -(an) and (an)+ addressing #
  3212. # modes. Also, while we're at it, the index register itself must get #
  3213. # updated. #
  3214. # So, for -(an), we must subtract 8 off of the stacked <ea> value #
  3215. # and return that value as the correct <ea> and store that value in An. #
  3216. # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
  3217. # #
  3218. #########################################################################
  3219. # This calc_ea is currently used to retrieve the correct <ea>
  3220. # for fmove outs of type extended and packed.
  3221. global _calc_ea_fout
  3222. _calc_ea_fout:
  3223. mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
  3224. mov.l %d0,%d1 # make a copy
  3225. andi.w &0x38,%d0 # extract mode field
  3226. andi.l &0x7,%d1 # extract reg field
  3227. cmpi.b %d0,&0x18 # is mode (An)+ ?
  3228. beq.b ceaf_pi # yes
  3229. cmpi.b %d0,&0x20 # is mode -(An) ?
  3230. beq.w ceaf_pd # yes
  3231. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  3232. rts
  3233. # (An)+ : extended and packed fmove out
  3234. # : stacked <ea> is correct
  3235. # : "An" not updated
  3236. ceaf_pi:
  3237. mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
  3238. mov.l EXC_EA(%a6),%a0
  3239. jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
  3240. swbeg &0x8
  3241. tbl_ceaf_pi:
  3242. short ceaf_pi0 - tbl_ceaf_pi
  3243. short ceaf_pi1 - tbl_ceaf_pi
  3244. short ceaf_pi2 - tbl_ceaf_pi
  3245. short ceaf_pi3 - tbl_ceaf_pi
  3246. short ceaf_pi4 - tbl_ceaf_pi
  3247. short ceaf_pi5 - tbl_ceaf_pi
  3248. short ceaf_pi6 - tbl_ceaf_pi
  3249. short ceaf_pi7 - tbl_ceaf_pi
  3250. ceaf_pi0:
  3251. addi.l &0xc,EXC_DREGS+0x8(%a6)
  3252. rts
  3253. ceaf_pi1:
  3254. addi.l &0xc,EXC_DREGS+0xc(%a6)
  3255. rts
  3256. ceaf_pi2:
  3257. add.l &0xc,%a2
  3258. rts
  3259. ceaf_pi3:
  3260. add.l &0xc,%a3
  3261. rts
  3262. ceaf_pi4:
  3263. add.l &0xc,%a4
  3264. rts
  3265. ceaf_pi5:
  3266. add.l &0xc,%a5
  3267. rts
  3268. ceaf_pi6:
  3269. addi.l &0xc,EXC_A6(%a6)
  3270. rts
  3271. ceaf_pi7:
  3272. mov.b &mia7_flg,SPCOND_FLG(%a6)
  3273. addi.l &0xc,EXC_A7(%a6)
  3274. rts
  3275. # -(An) : extended and packed fmove out
  3276. # : stacked <ea> = actual <ea> + 8
  3277. # : "An" not updated
  3278. ceaf_pd:
  3279. mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
  3280. mov.l EXC_EA(%a6),%a0
  3281. sub.l &0x8,%a0
  3282. sub.l &0x8,EXC_EA(%a6)
  3283. jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
  3284. swbeg &0x8
  3285. tbl_ceaf_pd:
  3286. short ceaf_pd0 - tbl_ceaf_pd
  3287. short ceaf_pd1 - tbl_ceaf_pd
  3288. short ceaf_pd2 - tbl_ceaf_pd
  3289. short ceaf_pd3 - tbl_ceaf_pd
  3290. short ceaf_pd4 - tbl_ceaf_pd
  3291. short ceaf_pd5 - tbl_ceaf_pd
  3292. short ceaf_pd6 - tbl_ceaf_pd
  3293. short ceaf_pd7 - tbl_ceaf_pd
  3294. ceaf_pd0:
  3295. mov.l %a0,EXC_DREGS+0x8(%a6)
  3296. rts
  3297. ceaf_pd1:
  3298. mov.l %a0,EXC_DREGS+0xc(%a6)
  3299. rts
  3300. ceaf_pd2:
  3301. mov.l %a0,%a2
  3302. rts
  3303. ceaf_pd3:
  3304. mov.l %a0,%a3
  3305. rts
  3306. ceaf_pd4:
  3307. mov.l %a0,%a4
  3308. rts
  3309. ceaf_pd5:
  3310. mov.l %a0,%a5
  3311. rts
  3312. ceaf_pd6:
  3313. mov.l %a0,EXC_A6(%a6)
  3314. rts
  3315. ceaf_pd7:
  3316. mov.l %a0,EXC_A7(%a6)
  3317. mov.b &mda7_flg,SPCOND_FLG(%a6)
  3318. rts
  3319. #
  3320. # This table holds the offsets of the emulation routines for each individual
  3321. # math operation relative to the address of this table. Included are
  3322. # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
  3323. # this table is for the version if the 060FPSP without transcendentals.
  3324. # The location within the table is determined by the extension bits of the
  3325. # operation longword.
  3326. #
  3327. swbeg &109
  3328. tbl_unsupp:
  3329. long fin - tbl_unsupp # 00: fmove
  3330. long fint - tbl_unsupp # 01: fint
  3331. long tbl_unsupp - tbl_unsupp # 02: fsinh
  3332. long fintrz - tbl_unsupp # 03: fintrz
  3333. long fsqrt - tbl_unsupp # 04: fsqrt
  3334. long tbl_unsupp - tbl_unsupp
  3335. long tbl_unsupp - tbl_unsupp # 06: flognp1
  3336. long tbl_unsupp - tbl_unsupp
  3337. long tbl_unsupp - tbl_unsupp # 08: fetoxm1
  3338. long tbl_unsupp - tbl_unsupp # 09: ftanh
  3339. long tbl_unsupp - tbl_unsupp # 0a: fatan
  3340. long tbl_unsupp - tbl_unsupp
  3341. long tbl_unsupp - tbl_unsupp # 0c: fasin
  3342. long tbl_unsupp - tbl_unsupp # 0d: fatanh
  3343. long tbl_unsupp - tbl_unsupp # 0e: fsin
  3344. long tbl_unsupp - tbl_unsupp # 0f: ftan
  3345. long tbl_unsupp - tbl_unsupp # 10: fetox
  3346. long tbl_unsupp - tbl_unsupp # 11: ftwotox
  3347. long tbl_unsupp - tbl_unsupp # 12: ftentox
  3348. long tbl_unsupp - tbl_unsupp
  3349. long tbl_unsupp - tbl_unsupp # 14: flogn
  3350. long tbl_unsupp - tbl_unsupp # 15: flog10
  3351. long tbl_unsupp - tbl_unsupp # 16: flog2
  3352. long tbl_unsupp - tbl_unsupp
  3353. long fabs - tbl_unsupp # 18: fabs
  3354. long tbl_unsupp - tbl_unsupp # 19: fcosh
  3355. long fneg - tbl_unsupp # 1a: fneg
  3356. long tbl_unsupp - tbl_unsupp
  3357. long tbl_unsupp - tbl_unsupp # 1c: facos
  3358. long tbl_unsupp - tbl_unsupp # 1d: fcos
  3359. long tbl_unsupp - tbl_unsupp # 1e: fgetexp
  3360. long tbl_unsupp - tbl_unsupp # 1f: fgetman
  3361. long fdiv - tbl_unsupp # 20: fdiv
  3362. long tbl_unsupp - tbl_unsupp # 21: fmod
  3363. long fadd - tbl_unsupp # 22: fadd
  3364. long fmul - tbl_unsupp # 23: fmul
  3365. long fsgldiv - tbl_unsupp # 24: fsgldiv
  3366. long tbl_unsupp - tbl_unsupp # 25: frem
  3367. long tbl_unsupp - tbl_unsupp # 26: fscale
  3368. long fsglmul - tbl_unsupp # 27: fsglmul
  3369. long fsub - tbl_unsupp # 28: fsub
  3370. long tbl_unsupp - tbl_unsupp
  3371. long tbl_unsupp - tbl_unsupp
  3372. long tbl_unsupp - tbl_unsupp
  3373. long tbl_unsupp - tbl_unsupp
  3374. long tbl_unsupp - tbl_unsupp
  3375. long tbl_unsupp - tbl_unsupp
  3376. long tbl_unsupp - tbl_unsupp
  3377. long tbl_unsupp - tbl_unsupp # 30: fsincos
  3378. long tbl_unsupp - tbl_unsupp # 31: fsincos
  3379. long tbl_unsupp - tbl_unsupp # 32: fsincos
  3380. long tbl_unsupp - tbl_unsupp # 33: fsincos
  3381. long tbl_unsupp - tbl_unsupp # 34: fsincos
  3382. long tbl_unsupp - tbl_unsupp # 35: fsincos
  3383. long tbl_unsupp - tbl_unsupp # 36: fsincos
  3384. long tbl_unsupp - tbl_unsupp # 37: fsincos
  3385. long fcmp - tbl_unsupp # 38: fcmp
  3386. long tbl_unsupp - tbl_unsupp
  3387. long ftst - tbl_unsupp # 3a: ftst
  3388. long tbl_unsupp - tbl_unsupp
  3389. long tbl_unsupp - tbl_unsupp
  3390. long tbl_unsupp - tbl_unsupp
  3391. long tbl_unsupp - tbl_unsupp
  3392. long tbl_unsupp - tbl_unsupp
  3393. long fsin - tbl_unsupp # 40: fsmove
  3394. long fssqrt - tbl_unsupp # 41: fssqrt
  3395. long tbl_unsupp - tbl_unsupp
  3396. long tbl_unsupp - tbl_unsupp
  3397. long fdin - tbl_unsupp # 44: fdmove
  3398. long fdsqrt - tbl_unsupp # 45: fdsqrt
  3399. long tbl_unsupp - tbl_unsupp
  3400. long tbl_unsupp - tbl_unsupp
  3401. long tbl_unsupp - tbl_unsupp
  3402. long tbl_unsupp - tbl_unsupp
  3403. long tbl_unsupp - tbl_unsupp
  3404. long tbl_unsupp - tbl_unsupp
  3405. long tbl_unsupp - tbl_unsupp
  3406. long tbl_unsupp - tbl_unsupp
  3407. long tbl_unsupp - tbl_unsupp
  3408. long tbl_unsupp - tbl_unsupp
  3409. long tbl_unsupp - tbl_unsupp
  3410. long tbl_unsupp - tbl_unsupp
  3411. long tbl_unsupp - tbl_unsupp
  3412. long tbl_unsupp - tbl_unsupp
  3413. long tbl_unsupp - tbl_unsupp
  3414. long tbl_unsupp - tbl_unsupp
  3415. long tbl_unsupp - tbl_unsupp
  3416. long tbl_unsupp - tbl_unsupp
  3417. long fsabs - tbl_unsupp # 58: fsabs
  3418. long tbl_unsupp - tbl_unsupp
  3419. long fsneg - tbl_unsupp # 5a: fsneg
  3420. long tbl_unsupp - tbl_unsupp
  3421. long fdabs - tbl_unsupp # 5c: fdabs
  3422. long tbl_unsupp - tbl_unsupp
  3423. long fdneg - tbl_unsupp # 5e: fdneg
  3424. long tbl_unsupp - tbl_unsupp
  3425. long fsdiv - tbl_unsupp # 60: fsdiv
  3426. long tbl_unsupp - tbl_unsupp
  3427. long fsadd - tbl_unsupp # 62: fsadd
  3428. long fsmul - tbl_unsupp # 63: fsmul
  3429. long fddiv - tbl_unsupp # 64: fddiv
  3430. long tbl_unsupp - tbl_unsupp
  3431. long fdadd - tbl_unsupp # 66: fdadd
  3432. long fdmul - tbl_unsupp # 67: fdmul
  3433. long fssub - tbl_unsupp # 68: fssub
  3434. long tbl_unsupp - tbl_unsupp
  3435. long tbl_unsupp - tbl_unsupp
  3436. long tbl_unsupp - tbl_unsupp
  3437. long fdsub - tbl_unsupp # 6c: fdsub
  3438. #################################################
  3439. # Add this here so non-fp modules can compile.
  3440. # (smovcr is called from fpsp_inex.)
  3441. global smovcr
  3442. smovcr:
  3443. bra.b smovcr
  3444. #########################################################################
  3445. # XDEF **************************************************************** #
  3446. # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
  3447. # #
  3448. # XREF **************************************************************** #
  3449. # fetch_dreg() - fetch data register #
  3450. # {i,d,}mem_read() - fetch data from memory #
  3451. # _mem_write() - write data to memory #
  3452. # iea_iacc() - instruction memory access error occurred #
  3453. # iea_dacc() - data memory access error occurred #
  3454. # restore() - restore An index regs if access error occurred #
  3455. # #
  3456. # INPUT *************************************************************** #
  3457. # None #
  3458. # #
  3459. # OUTPUT ************************************************************** #
  3460. # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
  3461. # d0 = size of dump #
  3462. # d1 = Dn #
  3463. # Else if instruction access error, #
  3464. # d0 = FSLW #
  3465. # Else if data access error, #
  3466. # d0 = FSLW #
  3467. # a0 = address of fault #
  3468. # Else #
  3469. # none. #
  3470. # #
  3471. # ALGORITHM *********************************************************** #
  3472. # The effective address must be calculated since this is entered #
  3473. # from an "Unimplemented Effective Address" exception handler. So, we #
  3474. # have our own fcalc_ea() routine here. If an access error is flagged #
  3475. # by a _{i,d,}mem_read() call, we must exit through the special #
  3476. # handler. #
  3477. # The data register is determined and its value loaded to get the #
  3478. # string of FP registers affected. This value is used as an index into #
  3479. # a lookup table such that we can determine the number of bytes #
  3480. # involved. #
  3481. # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
  3482. # to read in all FP values. Again, _mem_read() may fail and require a #
  3483. # special exit. #
  3484. # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
  3485. # to write all FP values. _mem_write() may also fail. #
  3486. # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
  3487. # then we return the size of the dump and the string to the caller #
  3488. # so that the move can occur outside of this routine. This special #
  3489. # case is required so that moves to the system stack are handled #
  3490. # correctly. #
  3491. # #
  3492. # DYNAMIC: #
  3493. # fmovm.x dn, <ea> #
  3494. # fmovm.x <ea>, dn #
  3495. # #
  3496. # <WORD 1> <WORD2> #
  3497. # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
  3498. # #
  3499. # & = (0): predecrement addressing mode #
  3500. # (1): postincrement or control addressing mode #
  3501. # @ = (0): move listed regs from memory to the FPU #
  3502. # (1): move listed regs from the FPU to memory #
  3503. # $$$ : index of data register holding reg select mask #
  3504. # #
  3505. # NOTES: #
  3506. # If the data register holds a zero, then the #
  3507. # instruction is a nop. #
  3508. # #
  3509. #########################################################################
  3510. global fmovm_dynamic
  3511. fmovm_dynamic:
  3512. # extract the data register in which the bit string resides...
  3513. mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
  3514. andi.w &0x70,%d1 # extract reg bits
  3515. lsr.b &0x4,%d1 # shift into lo bits
  3516. # fetch the bit string into d0...
  3517. bsr.l fetch_dreg # fetch reg string
  3518. andi.l &0x000000ff,%d0 # keep only lo byte
  3519. mov.l %d0,-(%sp) # save strg
  3520. mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
  3521. mov.l %d0,-(%sp) # save size
  3522. bsr.l fmovm_calc_ea # calculate <ea>
  3523. mov.l (%sp)+,%d0 # restore size
  3524. mov.l (%sp)+,%d1 # restore strg
  3525. # if the bit string is a zero, then the operation is a no-op
  3526. # but, make sure that we've calculated ea and advanced the opword pointer
  3527. beq.w fmovm_data_done
  3528. # separate move ins from move outs...
  3529. btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
  3530. beq.w fmovm_data_in # it's a move out
  3531. #############
  3532. # MOVE OUT: #
  3533. #############
  3534. fmovm_data_out:
  3535. btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
  3536. bne.w fmovm_out_ctrl # control
  3537. ############################
  3538. fmovm_out_predec:
  3539. # for predecrement mode, the bit string is the opposite of both control
  3540. # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
  3541. # here, we convert it to be just like the others...
  3542. mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
  3543. btst &0x5,EXC_SR(%a6) # user or supervisor mode?
  3544. beq.b fmovm_out_ctrl # user
  3545. fmovm_out_predec_s:
  3546. cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
  3547. bne.b fmovm_out_ctrl
  3548. # the operation was unfortunately an: fmovm.x dn,-(sp)
  3549. # called from supervisor mode.
  3550. # we're also passing "size" and "strg" back to the calling routine
  3551. rts
  3552. ############################
  3553. fmovm_out_ctrl:
  3554. mov.l %a0,%a1 # move <ea> to a1
  3555. sub.l %d0,%sp # subtract size of dump
  3556. lea (%sp),%a0
  3557. tst.b %d1 # should FP0 be moved?
  3558. bpl.b fmovm_out_ctrl_fp1 # no
  3559. mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
  3560. mov.l 0x4+EXC_FP0(%a6),(%a0)+
  3561. mov.l 0x8+EXC_FP0(%a6),(%a0)+
  3562. fmovm_out_ctrl_fp1:
  3563. lsl.b &0x1,%d1 # should FP1 be moved?
  3564. bpl.b fmovm_out_ctrl_fp2 # no
  3565. mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
  3566. mov.l 0x4+EXC_FP1(%a6),(%a0)+
  3567. mov.l 0x8+EXC_FP1(%a6),(%a0)+
  3568. fmovm_out_ctrl_fp2:
  3569. lsl.b &0x1,%d1 # should FP2 be moved?
  3570. bpl.b fmovm_out_ctrl_fp3 # no
  3571. fmovm.x &0x20,(%a0) # yes
  3572. add.l &0xc,%a0
  3573. fmovm_out_ctrl_fp3:
  3574. lsl.b &0x1,%d1 # should FP3 be moved?
  3575. bpl.b fmovm_out_ctrl_fp4 # no
  3576. fmovm.x &0x10,(%a0) # yes
  3577. add.l &0xc,%a0
  3578. fmovm_out_ctrl_fp4:
  3579. lsl.b &0x1,%d1 # should FP4 be moved?
  3580. bpl.b fmovm_out_ctrl_fp5 # no
  3581. fmovm.x &0x08,(%a0) # yes
  3582. add.l &0xc,%a0
  3583. fmovm_out_ctrl_fp5:
  3584. lsl.b &0x1,%d1 # should FP5 be moved?
  3585. bpl.b fmovm_out_ctrl_fp6 # no
  3586. fmovm.x &0x04,(%a0) # yes
  3587. add.l &0xc,%a0
  3588. fmovm_out_ctrl_fp6:
  3589. lsl.b &0x1,%d1 # should FP6 be moved?
  3590. bpl.b fmovm_out_ctrl_fp7 # no
  3591. fmovm.x &0x02,(%a0) # yes
  3592. add.l &0xc,%a0
  3593. fmovm_out_ctrl_fp7:
  3594. lsl.b &0x1,%d1 # should FP7 be moved?
  3595. bpl.b fmovm_out_ctrl_done # no
  3596. fmovm.x &0x01,(%a0) # yes
  3597. add.l &0xc,%a0
  3598. fmovm_out_ctrl_done:
  3599. mov.l %a1,L_SCR1(%a6)
  3600. lea (%sp),%a0 # pass: supervisor src
  3601. mov.l %d0,-(%sp) # save size
  3602. bsr.l _dmem_write # copy data to user mem
  3603. mov.l (%sp)+,%d0
  3604. add.l %d0,%sp # clear fpreg data from stack
  3605. tst.l %d1 # did dstore err?
  3606. bne.w fmovm_out_err # yes
  3607. rts
  3608. ############
  3609. # MOVE IN: #
  3610. ############
  3611. fmovm_data_in:
  3612. mov.l %a0,L_SCR1(%a6)
  3613. sub.l %d0,%sp # make room for fpregs
  3614. lea (%sp),%a1
  3615. mov.l %d1,-(%sp) # save bit string for later
  3616. mov.l %d0,-(%sp) # save # of bytes
  3617. bsr.l _dmem_read # copy data from user mem
  3618. mov.l (%sp)+,%d0 # retrieve # of bytes
  3619. tst.l %d1 # did dfetch fail?
  3620. bne.w fmovm_in_err # yes
  3621. mov.l (%sp)+,%d1 # load bit string
  3622. lea (%sp),%a0 # addr of stack
  3623. tst.b %d1 # should FP0 be moved?
  3624. bpl.b fmovm_data_in_fp1 # no
  3625. mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
  3626. mov.l (%a0)+,0x4+EXC_FP0(%a6)
  3627. mov.l (%a0)+,0x8+EXC_FP0(%a6)
  3628. fmovm_data_in_fp1:
  3629. lsl.b &0x1,%d1 # should FP1 be moved?
  3630. bpl.b fmovm_data_in_fp2 # no
  3631. mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
  3632. mov.l (%a0)+,0x4+EXC_FP1(%a6)
  3633. mov.l (%a0)+,0x8+EXC_FP1(%a6)
  3634. fmovm_data_in_fp2:
  3635. lsl.b &0x1,%d1 # should FP2 be moved?
  3636. bpl.b fmovm_data_in_fp3 # no
  3637. fmovm.x (%a0)+,&0x20 # yes
  3638. fmovm_data_in_fp3:
  3639. lsl.b &0x1,%d1 # should FP3 be moved?
  3640. bpl.b fmovm_data_in_fp4 # no
  3641. fmovm.x (%a0)+,&0x10 # yes
  3642. fmovm_data_in_fp4:
  3643. lsl.b &0x1,%d1 # should FP4 be moved?
  3644. bpl.b fmovm_data_in_fp5 # no
  3645. fmovm.x (%a0)+,&0x08 # yes
  3646. fmovm_data_in_fp5:
  3647. lsl.b &0x1,%d1 # should FP5 be moved?
  3648. bpl.b fmovm_data_in_fp6 # no
  3649. fmovm.x (%a0)+,&0x04 # yes
  3650. fmovm_data_in_fp6:
  3651. lsl.b &0x1,%d1 # should FP6 be moved?
  3652. bpl.b fmovm_data_in_fp7 # no
  3653. fmovm.x (%a0)+,&0x02 # yes
  3654. fmovm_data_in_fp7:
  3655. lsl.b &0x1,%d1 # should FP7 be moved?
  3656. bpl.b fmovm_data_in_done # no
  3657. fmovm.x (%a0)+,&0x01 # yes
  3658. fmovm_data_in_done:
  3659. add.l %d0,%sp # remove fpregs from stack
  3660. rts
  3661. #####################################
  3662. fmovm_data_done:
  3663. rts
  3664. ##############################################################################
  3665. #
  3666. # table indexed by the operation's bit string that gives the number
  3667. # of bytes that will be moved.
  3668. #
  3669. # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
  3670. #
  3671. tbl_fmovm_size:
  3672. byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
  3673. byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  3674. byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  3675. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3676. byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  3677. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3678. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3679. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3680. byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  3681. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3682. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3683. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3684. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3685. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3686. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3687. byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  3688. byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  3689. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3690. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3691. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3692. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3693. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3694. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3695. byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  3696. byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  3697. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3698. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3699. byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  3700. byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  3701. byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  3702. byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  3703. byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
  3704. #
  3705. # table to convert a pre-decrement bit string into a post-increment
  3706. # or control bit string.
  3707. # ex: 0x00 ==> 0x00
  3708. # 0x01 ==> 0x80
  3709. # 0x02 ==> 0x40
  3710. # .
  3711. # .
  3712. # 0xfd ==> 0xbf
  3713. # 0xfe ==> 0x7f
  3714. # 0xff ==> 0xff
  3715. #
  3716. tbl_fmovm_convert:
  3717. byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
  3718. byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
  3719. byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
  3720. byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
  3721. byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
  3722. byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
  3723. byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
  3724. byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
  3725. byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
  3726. byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
  3727. byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
  3728. byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
  3729. byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
  3730. byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
  3731. byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
  3732. byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
  3733. byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
  3734. byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
  3735. byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
  3736. byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
  3737. byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
  3738. byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
  3739. byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
  3740. byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
  3741. byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
  3742. byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
  3743. byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
  3744. byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
  3745. byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
  3746. byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
  3747. byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
  3748. byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
  3749. global fmovm_calc_ea
  3750. ###############################################
  3751. # _fmovm_calc_ea: calculate effective address #
  3752. ###############################################
  3753. fmovm_calc_ea:
  3754. mov.l %d0,%a0 # move # bytes to a0
  3755. # currently, MODE and REG are taken from the EXC_OPWORD. this could be
  3756. # easily changed if they were inputs passed in registers.
  3757. mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
  3758. mov.w %d0,%d1 # make a copy
  3759. andi.w &0x3f,%d0 # extract mode field
  3760. andi.l &0x7,%d1 # extract reg field
  3761. # jump to the corresponding function for each {MODE,REG} pair.
  3762. mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
  3763. jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
  3764. swbeg &64
  3765. tbl_fea_mode:
  3766. short tbl_fea_mode - tbl_fea_mode
  3767. short tbl_fea_mode - tbl_fea_mode
  3768. short tbl_fea_mode - tbl_fea_mode
  3769. short tbl_fea_mode - tbl_fea_mode
  3770. short tbl_fea_mode - tbl_fea_mode
  3771. short tbl_fea_mode - tbl_fea_mode
  3772. short tbl_fea_mode - tbl_fea_mode
  3773. short tbl_fea_mode - tbl_fea_mode
  3774. short tbl_fea_mode - tbl_fea_mode
  3775. short tbl_fea_mode - tbl_fea_mode
  3776. short tbl_fea_mode - tbl_fea_mode
  3777. short tbl_fea_mode - tbl_fea_mode
  3778. short tbl_fea_mode - tbl_fea_mode
  3779. short tbl_fea_mode - tbl_fea_mode
  3780. short tbl_fea_mode - tbl_fea_mode
  3781. short tbl_fea_mode - tbl_fea_mode
  3782. short faddr_ind_a0 - tbl_fea_mode
  3783. short faddr_ind_a1 - tbl_fea_mode
  3784. short faddr_ind_a2 - tbl_fea_mode
  3785. short faddr_ind_a3 - tbl_fea_mode
  3786. short faddr_ind_a4 - tbl_fea_mode
  3787. short faddr_ind_a5 - tbl_fea_mode
  3788. short faddr_ind_a6 - tbl_fea_mode
  3789. short faddr_ind_a7 - tbl_fea_mode
  3790. short faddr_ind_p_a0 - tbl_fea_mode
  3791. short faddr_ind_p_a1 - tbl_fea_mode
  3792. short faddr_ind_p_a2 - tbl_fea_mode
  3793. short faddr_ind_p_a3 - tbl_fea_mode
  3794. short faddr_ind_p_a4 - tbl_fea_mode
  3795. short faddr_ind_p_a5 - tbl_fea_mode
  3796. short faddr_ind_p_a6 - tbl_fea_mode
  3797. short faddr_ind_p_a7 - tbl_fea_mode
  3798. short faddr_ind_m_a0 - tbl_fea_mode
  3799. short faddr_ind_m_a1 - tbl_fea_mode
  3800. short faddr_ind_m_a2 - tbl_fea_mode
  3801. short faddr_ind_m_a3 - tbl_fea_mode
  3802. short faddr_ind_m_a4 - tbl_fea_mode
  3803. short faddr_ind_m_a5 - tbl_fea_mode
  3804. short faddr_ind_m_a6 - tbl_fea_mode
  3805. short faddr_ind_m_a7 - tbl_fea_mode
  3806. short faddr_ind_disp_a0 - tbl_fea_mode
  3807. short faddr_ind_disp_a1 - tbl_fea_mode
  3808. short faddr_ind_disp_a2 - tbl_fea_mode
  3809. short faddr_ind_disp_a3 - tbl_fea_mode
  3810. short faddr_ind_disp_a4 - tbl_fea_mode
  3811. short faddr_ind_disp_a5 - tbl_fea_mode
  3812. short faddr_ind_disp_a6 - tbl_fea_mode
  3813. short faddr_ind_disp_a7 - tbl_fea_mode
  3814. short faddr_ind_ext - tbl_fea_mode
  3815. short faddr_ind_ext - tbl_fea_mode
  3816. short faddr_ind_ext - tbl_fea_mode
  3817. short faddr_ind_ext - tbl_fea_mode
  3818. short faddr_ind_ext - tbl_fea_mode
  3819. short faddr_ind_ext - tbl_fea_mode
  3820. short faddr_ind_ext - tbl_fea_mode
  3821. short faddr_ind_ext - tbl_fea_mode
  3822. short fabs_short - tbl_fea_mode
  3823. short fabs_long - tbl_fea_mode
  3824. short fpc_ind - tbl_fea_mode
  3825. short fpc_ind_ext - tbl_fea_mode
  3826. short tbl_fea_mode - tbl_fea_mode
  3827. short tbl_fea_mode - tbl_fea_mode
  3828. short tbl_fea_mode - tbl_fea_mode
  3829. short tbl_fea_mode - tbl_fea_mode
  3830. ###################################
  3831. # Address register indirect: (An) #
  3832. ###################################
  3833. faddr_ind_a0:
  3834. mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
  3835. rts
  3836. faddr_ind_a1:
  3837. mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
  3838. rts
  3839. faddr_ind_a2:
  3840. mov.l %a2,%a0 # Get current a2
  3841. rts
  3842. faddr_ind_a3:
  3843. mov.l %a3,%a0 # Get current a3
  3844. rts
  3845. faddr_ind_a4:
  3846. mov.l %a4,%a0 # Get current a4
  3847. rts
  3848. faddr_ind_a5:
  3849. mov.l %a5,%a0 # Get current a5
  3850. rts
  3851. faddr_ind_a6:
  3852. mov.l (%a6),%a0 # Get current a6
  3853. rts
  3854. faddr_ind_a7:
  3855. mov.l EXC_A7(%a6),%a0 # Get current a7
  3856. rts
  3857. #####################################################
  3858. # Address register indirect w/ postincrement: (An)+ #
  3859. #####################################################
  3860. faddr_ind_p_a0:
  3861. mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
  3862. mov.l %d0,%d1
  3863. add.l %a0,%d1 # Increment
  3864. mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
  3865. mov.l %d0,%a0
  3866. rts
  3867. faddr_ind_p_a1:
  3868. mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
  3869. mov.l %d0,%d1
  3870. add.l %a0,%d1 # Increment
  3871. mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
  3872. mov.l %d0,%a0
  3873. rts
  3874. faddr_ind_p_a2:
  3875. mov.l %a2,%d0 # Get current a2
  3876. mov.l %d0,%d1
  3877. add.l %a0,%d1 # Increment
  3878. mov.l %d1,%a2 # Save incr value
  3879. mov.l %d0,%a0
  3880. rts
  3881. faddr_ind_p_a3:
  3882. mov.l %a3,%d0 # Get current a3
  3883. mov.l %d0,%d1
  3884. add.l %a0,%d1 # Increment
  3885. mov.l %d1,%a3 # Save incr value
  3886. mov.l %d0,%a0
  3887. rts
  3888. faddr_ind_p_a4:
  3889. mov.l %a4,%d0 # Get current a4
  3890. mov.l %d0,%d1
  3891. add.l %a0,%d1 # Increment
  3892. mov.l %d1,%a4 # Save incr value
  3893. mov.l %d0,%a0
  3894. rts
  3895. faddr_ind_p_a5:
  3896. mov.l %a5,%d0 # Get current a5
  3897. mov.l %d0,%d1
  3898. add.l %a0,%d1 # Increment
  3899. mov.l %d1,%a5 # Save incr value
  3900. mov.l %d0,%a0
  3901. rts
  3902. faddr_ind_p_a6:
  3903. mov.l (%a6),%d0 # Get current a6
  3904. mov.l %d0,%d1
  3905. add.l %a0,%d1 # Increment
  3906. mov.l %d1,(%a6) # Save incr value
  3907. mov.l %d0,%a0
  3908. rts
  3909. faddr_ind_p_a7:
  3910. mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
  3911. mov.l EXC_A7(%a6),%d0 # Get current a7
  3912. mov.l %d0,%d1
  3913. add.l %a0,%d1 # Increment
  3914. mov.l %d1,EXC_A7(%a6) # Save incr value
  3915. mov.l %d0,%a0
  3916. rts
  3917. ####################################################
  3918. # Address register indirect w/ predecrement: -(An) #
  3919. ####################################################
  3920. faddr_ind_m_a0:
  3921. mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
  3922. sub.l %a0,%d0 # Decrement
  3923. mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
  3924. mov.l %d0,%a0
  3925. rts
  3926. faddr_ind_m_a1:
  3927. mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
  3928. sub.l %a0,%d0 # Decrement
  3929. mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
  3930. mov.l %d0,%a0
  3931. rts
  3932. faddr_ind_m_a2:
  3933. mov.l %a2,%d0 # Get current a2
  3934. sub.l %a0,%d0 # Decrement
  3935. mov.l %d0,%a2 # Save decr value
  3936. mov.l %d0,%a0
  3937. rts
  3938. faddr_ind_m_a3:
  3939. mov.l %a3,%d0 # Get current a3
  3940. sub.l %a0,%d0 # Decrement
  3941. mov.l %d0,%a3 # Save decr value
  3942. mov.l %d0,%a0
  3943. rts
  3944. faddr_ind_m_a4:
  3945. mov.l %a4,%d0 # Get current a4
  3946. sub.l %a0,%d0 # Decrement
  3947. mov.l %d0,%a4 # Save decr value
  3948. mov.l %d0,%a0
  3949. rts
  3950. faddr_ind_m_a5:
  3951. mov.l %a5,%d0 # Get current a5
  3952. sub.l %a0,%d0 # Decrement
  3953. mov.l %d0,%a5 # Save decr value
  3954. mov.l %d0,%a0
  3955. rts
  3956. faddr_ind_m_a6:
  3957. mov.l (%a6),%d0 # Get current a6
  3958. sub.l %a0,%d0 # Decrement
  3959. mov.l %d0,(%a6) # Save decr value
  3960. mov.l %d0,%a0
  3961. rts
  3962. faddr_ind_m_a7:
  3963. mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
  3964. mov.l EXC_A7(%a6),%d0 # Get current a7
  3965. sub.l %a0,%d0 # Decrement
  3966. mov.l %d0,EXC_A7(%a6) # Save decr value
  3967. mov.l %d0,%a0
  3968. rts
  3969. ########################################################
  3970. # Address register indirect w/ displacement: (d16, An) #
  3971. ########################################################
  3972. faddr_ind_disp_a0:
  3973. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  3974. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  3975. bsr.l _imem_read_word
  3976. tst.l %d1 # did ifetch fail?
  3977. bne.l iea_iacc # yes
  3978. mov.w %d0,%a0 # sign extend displacement
  3979. add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
  3980. rts
  3981. faddr_ind_disp_a1:
  3982. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  3983. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  3984. bsr.l _imem_read_word
  3985. tst.l %d1 # did ifetch fail?
  3986. bne.l iea_iacc # yes
  3987. mov.w %d0,%a0 # sign extend displacement
  3988. add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
  3989. rts
  3990. faddr_ind_disp_a2:
  3991. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  3992. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  3993. bsr.l _imem_read_word
  3994. tst.l %d1 # did ifetch fail?
  3995. bne.l iea_iacc # yes
  3996. mov.w %d0,%a0 # sign extend displacement
  3997. add.l %a2,%a0 # a2 + d16
  3998. rts
  3999. faddr_ind_disp_a3:
  4000. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4001. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4002. bsr.l _imem_read_word
  4003. tst.l %d1 # did ifetch fail?
  4004. bne.l iea_iacc # yes
  4005. mov.w %d0,%a0 # sign extend displacement
  4006. add.l %a3,%a0 # a3 + d16
  4007. rts
  4008. faddr_ind_disp_a4:
  4009. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4010. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4011. bsr.l _imem_read_word
  4012. tst.l %d1 # did ifetch fail?
  4013. bne.l iea_iacc # yes
  4014. mov.w %d0,%a0 # sign extend displacement
  4015. add.l %a4,%a0 # a4 + d16
  4016. rts
  4017. faddr_ind_disp_a5:
  4018. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4019. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4020. bsr.l _imem_read_word
  4021. tst.l %d1 # did ifetch fail?
  4022. bne.l iea_iacc # yes
  4023. mov.w %d0,%a0 # sign extend displacement
  4024. add.l %a5,%a0 # a5 + d16
  4025. rts
  4026. faddr_ind_disp_a6:
  4027. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4028. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4029. bsr.l _imem_read_word
  4030. tst.l %d1 # did ifetch fail?
  4031. bne.l iea_iacc # yes
  4032. mov.w %d0,%a0 # sign extend displacement
  4033. add.l (%a6),%a0 # a6 + d16
  4034. rts
  4035. faddr_ind_disp_a7:
  4036. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4037. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4038. bsr.l _imem_read_word
  4039. tst.l %d1 # did ifetch fail?
  4040. bne.l iea_iacc # yes
  4041. mov.w %d0,%a0 # sign extend displacement
  4042. add.l EXC_A7(%a6),%a0 # a7 + d16
  4043. rts
  4044. ########################################################################
  4045. # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
  4046. # " " " w/ " (base displacement): (bd, An, Xn) #
  4047. # Memory indirect postindexed: ([bd, An], Xn, od) #
  4048. # Memory indirect preindexed: ([bd, An, Xn], od) #
  4049. ########################################################################
  4050. faddr_ind_ext:
  4051. addq.l &0x8,%d1
  4052. bsr.l fetch_dreg # fetch base areg
  4053. mov.l %d0,-(%sp)
  4054. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4055. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4056. bsr.l _imem_read_word # fetch extword in d0
  4057. tst.l %d1 # did ifetch fail?
  4058. bne.l iea_iacc # yes
  4059. mov.l (%sp)+,%a0
  4060. btst &0x8,%d0
  4061. bne.w fcalc_mem_ind
  4062. mov.l %d0,L_SCR1(%a6) # hold opword
  4063. mov.l %d0,%d1
  4064. rol.w &0x4,%d1
  4065. andi.w &0xf,%d1 # extract index regno
  4066. # count on fetch_dreg() not to alter a0...
  4067. bsr.l fetch_dreg # fetch index
  4068. mov.l %d2,-(%sp) # save d2
  4069. mov.l L_SCR1(%a6),%d2 # fetch opword
  4070. btst &0xb,%d2 # is it word or long?
  4071. bne.b faii8_long
  4072. ext.l %d0 # sign extend word index
  4073. faii8_long:
  4074. mov.l %d2,%d1
  4075. rol.w &0x7,%d1
  4076. andi.l &0x3,%d1 # extract scale value
  4077. lsl.l %d1,%d0 # shift index by scale
  4078. extb.l %d2 # sign extend displacement
  4079. add.l %d2,%d0 # index + disp
  4080. add.l %d0,%a0 # An + (index + disp)
  4081. mov.l (%sp)+,%d2 # restore old d2
  4082. rts
  4083. ###########################
  4084. # Absolute short: (XXX).W #
  4085. ###########################
  4086. fabs_short:
  4087. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4088. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4089. bsr.l _imem_read_word # fetch short address
  4090. tst.l %d1 # did ifetch fail?
  4091. bne.l iea_iacc # yes
  4092. mov.w %d0,%a0 # return <ea> in a0
  4093. rts
  4094. ##########################
  4095. # Absolute long: (XXX).L #
  4096. ##########################
  4097. fabs_long:
  4098. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4099. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4100. bsr.l _imem_read_long # fetch long address
  4101. tst.l %d1 # did ifetch fail?
  4102. bne.l iea_iacc # yes
  4103. mov.l %d0,%a0 # return <ea> in a0
  4104. rts
  4105. #######################################################
  4106. # Program counter indirect w/ displacement: (d16, PC) #
  4107. #######################################################
  4108. fpc_ind:
  4109. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4110. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4111. bsr.l _imem_read_word # fetch word displacement
  4112. tst.l %d1 # did ifetch fail?
  4113. bne.l iea_iacc # yes
  4114. mov.w %d0,%a0 # sign extend displacement
  4115. add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
  4116. # _imem_read_word() increased the extwptr by 2. need to adjust here.
  4117. subq.l &0x2,%a0 # adjust <ea>
  4118. rts
  4119. ##########################################################
  4120. # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
  4121. # " " w/ " (base displacement): (bd, PC, An) #
  4122. # PC memory indirect postindexed: ([bd, PC], Xn, od) #
  4123. # PC memory indirect preindexed: ([bd, PC, Xn], od) #
  4124. ##########################################################
  4125. fpc_ind_ext:
  4126. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4127. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4128. bsr.l _imem_read_word # fetch ext word
  4129. tst.l %d1 # did ifetch fail?
  4130. bne.l iea_iacc # yes
  4131. mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
  4132. subq.l &0x2,%a0 # adjust base
  4133. btst &0x8,%d0 # is disp only 8 bits?
  4134. bne.w fcalc_mem_ind # calc memory indirect
  4135. mov.l %d0,L_SCR1(%a6) # store opword
  4136. mov.l %d0,%d1 # make extword copy
  4137. rol.w &0x4,%d1 # rotate reg num into place
  4138. andi.w &0xf,%d1 # extract register number
  4139. # count on fetch_dreg() not to alter a0...
  4140. bsr.l fetch_dreg # fetch index
  4141. mov.l %d2,-(%sp) # save d2
  4142. mov.l L_SCR1(%a6),%d2 # fetch opword
  4143. btst &0xb,%d2 # is index word or long?
  4144. bne.b fpii8_long # long
  4145. ext.l %d0 # sign extend word index
  4146. fpii8_long:
  4147. mov.l %d2,%d1
  4148. rol.w &0x7,%d1 # rotate scale value into place
  4149. andi.l &0x3,%d1 # extract scale value
  4150. lsl.l %d1,%d0 # shift index by scale
  4151. extb.l %d2 # sign extend displacement
  4152. add.l %d2,%d0 # disp + index
  4153. add.l %d0,%a0 # An + (index + disp)
  4154. mov.l (%sp)+,%d2 # restore temp register
  4155. rts
  4156. # d2 = index
  4157. # d3 = base
  4158. # d4 = od
  4159. # d5 = extword
  4160. fcalc_mem_ind:
  4161. btst &0x6,%d0 # is the index suppressed?
  4162. beq.b fcalc_index
  4163. movm.l &0x3c00,-(%sp) # save d2-d5
  4164. mov.l %d0,%d5 # put extword in d5
  4165. mov.l %a0,%d3 # put base in d3
  4166. clr.l %d2 # yes, so index = 0
  4167. bra.b fbase_supp_ck
  4168. # index:
  4169. fcalc_index:
  4170. mov.l %d0,L_SCR1(%a6) # save d0 (opword)
  4171. bfextu %d0{&16:&4},%d1 # fetch dreg index
  4172. bsr.l fetch_dreg
  4173. movm.l &0x3c00,-(%sp) # save d2-d5
  4174. mov.l %d0,%d2 # put index in d2
  4175. mov.l L_SCR1(%a6),%d5
  4176. mov.l %a0,%d3
  4177. btst &0xb,%d5 # is index word or long?
  4178. bne.b fno_ext
  4179. ext.l %d2
  4180. fno_ext:
  4181. bfextu %d5{&21:&2},%d0
  4182. lsl.l %d0,%d2
  4183. # base address (passed as parameter in d3):
  4184. # we clear the value here if it should actually be suppressed.
  4185. fbase_supp_ck:
  4186. btst &0x7,%d5 # is the bd suppressed?
  4187. beq.b fno_base_sup
  4188. clr.l %d3
  4189. # base displacement:
  4190. fno_base_sup:
  4191. bfextu %d5{&26:&2},%d0 # get bd size
  4192. # beq.l fmovm_error # if (size == 0) it's reserved
  4193. cmpi.b %d0,&0x2
  4194. blt.b fno_bd
  4195. beq.b fget_word_bd
  4196. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4197. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4198. bsr.l _imem_read_long
  4199. tst.l %d1 # did ifetch fail?
  4200. bne.l fcea_iacc # yes
  4201. bra.b fchk_ind
  4202. fget_word_bd:
  4203. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4204. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4205. bsr.l _imem_read_word
  4206. tst.l %d1 # did ifetch fail?
  4207. bne.l fcea_iacc # yes
  4208. ext.l %d0 # sign extend bd
  4209. fchk_ind:
  4210. add.l %d0,%d3 # base += bd
  4211. # outer displacement:
  4212. fno_bd:
  4213. bfextu %d5{&30:&2},%d0 # is od suppressed?
  4214. beq.w faii_bd
  4215. cmpi.b %d0,&0x2
  4216. blt.b fnull_od
  4217. beq.b fword_od
  4218. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4219. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4220. bsr.l _imem_read_long
  4221. tst.l %d1 # did ifetch fail?
  4222. bne.l fcea_iacc # yes
  4223. bra.b fadd_them
  4224. fword_od:
  4225. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4226. addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
  4227. bsr.l _imem_read_word
  4228. tst.l %d1 # did ifetch fail?
  4229. bne.l fcea_iacc # yes
  4230. ext.l %d0 # sign extend od
  4231. bra.b fadd_them
  4232. fnull_od:
  4233. clr.l %d0
  4234. fadd_them:
  4235. mov.l %d0,%d4
  4236. btst &0x2,%d5 # pre or post indexing?
  4237. beq.b fpre_indexed
  4238. mov.l %d3,%a0
  4239. bsr.l _dmem_read_long
  4240. tst.l %d1 # did dfetch fail?
  4241. bne.w fcea_err # yes
  4242. add.l %d2,%d0 # <ea> += index
  4243. add.l %d4,%d0 # <ea> += od
  4244. bra.b fdone_ea
  4245. fpre_indexed:
  4246. add.l %d2,%d3 # preindexing
  4247. mov.l %d3,%a0
  4248. bsr.l _dmem_read_long
  4249. tst.l %d1 # did dfetch fail?
  4250. bne.w fcea_err # yes
  4251. add.l %d4,%d0 # ea += od
  4252. bra.b fdone_ea
  4253. faii_bd:
  4254. add.l %d2,%d3 # ea = (base + bd) + index
  4255. mov.l %d3,%d0
  4256. fdone_ea:
  4257. mov.l %d0,%a0
  4258. movm.l (%sp)+,&0x003c # restore d2-d5
  4259. rts
  4260. #########################################################
  4261. fcea_err:
  4262. mov.l %d3,%a0
  4263. movm.l (%sp)+,&0x003c # restore d2-d5
  4264. mov.w &0x0101,%d0
  4265. bra.l iea_dacc
  4266. fcea_iacc:
  4267. movm.l (%sp)+,&0x003c # restore d2-d5
  4268. bra.l iea_iacc
  4269. fmovm_out_err:
  4270. bsr.l restore
  4271. mov.w &0x00e1,%d0
  4272. bra.b fmovm_err
  4273. fmovm_in_err:
  4274. bsr.l restore
  4275. mov.w &0x0161,%d0
  4276. fmovm_err:
  4277. mov.l L_SCR1(%a6),%a0
  4278. bra.l iea_dacc
  4279. #########################################################################
  4280. # XDEF **************************************************************** #
  4281. # fmovm_ctrl(): emulate fmovm.l of control registers instr #
  4282. # #
  4283. # XREF **************************************************************** #
  4284. # _imem_read_long() - read longword from memory #
  4285. # iea_iacc() - _imem_read_long() failed; error recovery #
  4286. # #
  4287. # INPUT *************************************************************** #
  4288. # None #
  4289. # #
  4290. # OUTPUT ************************************************************** #
  4291. # If _imem_read_long() doesn't fail: #
  4292. # USER_FPCR(a6) = new FPCR value #
  4293. # USER_FPSR(a6) = new FPSR value #
  4294. # USER_FPIAR(a6) = new FPIAR value #
  4295. # #
  4296. # ALGORITHM *********************************************************** #
  4297. # Decode the instruction type by looking at the extension word #
  4298. # in order to see how many control registers to fetch from memory. #
  4299. # Fetch them using _imem_read_long(). If this fetch fails, exit through #
  4300. # the special access error exit handler iea_iacc(). #
  4301. # #
  4302. # Instruction word decoding: #
  4303. # #
  4304. # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
  4305. # #
  4306. # WORD1 WORD2 #
  4307. # 1111 0010 00 111100 100$ $$00 0000 0000 #
  4308. # #
  4309. # $$$ (100): FPCR #
  4310. # (010): FPSR #
  4311. # (001): FPIAR #
  4312. # (000): FPIAR #
  4313. # #
  4314. #########################################################################
  4315. global fmovm_ctrl
  4316. fmovm_ctrl:
  4317. mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
  4318. cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
  4319. beq.w fctrl_in_7 # yes
  4320. cmpi.b %d0,&0x98 # fpcr & fpsr ?
  4321. beq.w fctrl_in_6 # yes
  4322. cmpi.b %d0,&0x94 # fpcr & fpiar ?
  4323. beq.b fctrl_in_5 # yes
  4324. # fmovem.l #<data>, fpsr/fpiar
  4325. fctrl_in_3:
  4326. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4327. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4328. bsr.l _imem_read_long # fetch FPSR from mem
  4329. tst.l %d1 # did ifetch fail?
  4330. bne.l iea_iacc # yes
  4331. mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
  4332. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4333. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4334. bsr.l _imem_read_long # fetch FPIAR from mem
  4335. tst.l %d1 # did ifetch fail?
  4336. bne.l iea_iacc # yes
  4337. mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
  4338. rts
  4339. # fmovem.l #<data>, fpcr/fpiar
  4340. fctrl_in_5:
  4341. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4342. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4343. bsr.l _imem_read_long # fetch FPCR from mem
  4344. tst.l %d1 # did ifetch fail?
  4345. bne.l iea_iacc # yes
  4346. mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
  4347. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4348. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4349. bsr.l _imem_read_long # fetch FPIAR from mem
  4350. tst.l %d1 # did ifetch fail?
  4351. bne.l iea_iacc # yes
  4352. mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
  4353. rts
  4354. # fmovem.l #<data>, fpcr/fpsr
  4355. fctrl_in_6:
  4356. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4357. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4358. bsr.l _imem_read_long # fetch FPCR from mem
  4359. tst.l %d1 # did ifetch fail?
  4360. bne.l iea_iacc # yes
  4361. mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
  4362. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4363. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4364. bsr.l _imem_read_long # fetch FPSR from mem
  4365. tst.l %d1 # did ifetch fail?
  4366. bne.l iea_iacc # yes
  4367. mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
  4368. rts
  4369. # fmovem.l #<data>, fpcr/fpsr/fpiar
  4370. fctrl_in_7:
  4371. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4372. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4373. bsr.l _imem_read_long # fetch FPCR from mem
  4374. tst.l %d1 # did ifetch fail?
  4375. bne.l iea_iacc # yes
  4376. mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
  4377. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4378. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4379. bsr.l _imem_read_long # fetch FPSR from mem
  4380. tst.l %d1 # did ifetch fail?
  4381. bne.l iea_iacc # yes
  4382. mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
  4383. mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
  4384. addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
  4385. bsr.l _imem_read_long # fetch FPIAR from mem
  4386. tst.l %d1 # did ifetch fail?
  4387. bne.l iea_iacc # yes
  4388. mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
  4389. rts
  4390. ##########################################################################
  4391. #########################################################################
  4392. # XDEF **************************************************************** #
  4393. # addsub_scaler2(): scale inputs to fadd/fsub such that no #
  4394. # OVFL/UNFL exceptions will result #
  4395. # #
  4396. # XREF **************************************************************** #
  4397. # norm() - normalize mantissa after adjusting exponent #
  4398. # #
  4399. # INPUT *************************************************************** #
  4400. # FP_SRC(a6) = fp op1(src) #
  4401. # FP_DST(a6) = fp op2(dst) #
  4402. # #
  4403. # OUTPUT ************************************************************** #
  4404. # FP_SRC(a6) = fp op1 scaled(src) #
  4405. # FP_DST(a6) = fp op2 scaled(dst) #
  4406. # d0 = scale amount #
  4407. # #
  4408. # ALGORITHM *********************************************************** #
  4409. # If the DST exponent is > the SRC exponent, set the DST exponent #
  4410. # equal to 0x3fff and scale the SRC exponent by the value that the #
  4411. # DST exponent was scaled by. If the SRC exponent is greater or equal, #
  4412. # do the opposite. Return this scale factor in d0. #
  4413. # If the two exponents differ by > the number of mantissa bits #
  4414. # plus two, then set the smallest exponent to a very small value as a #
  4415. # quick shortcut. #
  4416. # #
  4417. #########################################################################
  4418. global addsub_scaler2
  4419. addsub_scaler2:
  4420. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  4421. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  4422. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  4423. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  4424. mov.w SRC_EX(%a0),%d0
  4425. mov.w DST_EX(%a1),%d1
  4426. mov.w %d0,FP_SCR0_EX(%a6)
  4427. mov.w %d1,FP_SCR1_EX(%a6)
  4428. andi.w &0x7fff,%d0
  4429. andi.w &0x7fff,%d1
  4430. mov.w %d0,L_SCR1(%a6) # store src exponent
  4431. mov.w %d1,2+L_SCR1(%a6) # store dst exponent
  4432. cmp.w %d0, %d1 # is src exp >= dst exp?
  4433. bge.l src_exp_ge2
  4434. # dst exp is > src exp; scale dst to exp = 0x3fff
  4435. dst_exp_gt2:
  4436. bsr.l scale_to_zero_dst
  4437. mov.l %d0,-(%sp) # save scale factor
  4438. cmpi.b STAG(%a6),&DENORM # is dst denormalized?
  4439. bne.b cmpexp12
  4440. lea FP_SCR0(%a6),%a0
  4441. bsr.l norm # normalize the denorm; result is new exp
  4442. neg.w %d0 # new exp = -(shft val)
  4443. mov.w %d0,L_SCR1(%a6) # inset new exp
  4444. cmpexp12:
  4445. mov.w 2+L_SCR1(%a6),%d0
  4446. subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
  4447. cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
  4448. bge.b quick_scale12
  4449. mov.w L_SCR1(%a6),%d0
  4450. add.w 0x2(%sp),%d0 # scale src exponent by scale factor
  4451. mov.w FP_SCR0_EX(%a6),%d1
  4452. and.w &0x8000,%d1
  4453. or.w %d1,%d0 # concat {sgn,new exp}
  4454. mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
  4455. mov.l (%sp)+,%d0 # return SCALE factor
  4456. rts
  4457. quick_scale12:
  4458. andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
  4459. bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
  4460. mov.l (%sp)+,%d0 # return SCALE factor
  4461. rts
  4462. # src exp is >= dst exp; scale src to exp = 0x3fff
  4463. src_exp_ge2:
  4464. bsr.l scale_to_zero_src
  4465. mov.l %d0,-(%sp) # save scale factor
  4466. cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
  4467. bne.b cmpexp22
  4468. lea FP_SCR1(%a6),%a0
  4469. bsr.l norm # normalize the denorm; result is new exp
  4470. neg.w %d0 # new exp = -(shft val)
  4471. mov.w %d0,2+L_SCR1(%a6) # inset new exp
  4472. cmpexp22:
  4473. mov.w L_SCR1(%a6),%d0
  4474. subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
  4475. cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
  4476. bge.b quick_scale22
  4477. mov.w 2+L_SCR1(%a6),%d0
  4478. add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
  4479. mov.w FP_SCR1_EX(%a6),%d1
  4480. andi.w &0x8000,%d1
  4481. or.w %d1,%d0 # concat {sgn,new exp}
  4482. mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
  4483. mov.l (%sp)+,%d0 # return SCALE factor
  4484. rts
  4485. quick_scale22:
  4486. andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
  4487. bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
  4488. mov.l (%sp)+,%d0 # return SCALE factor
  4489. rts
  4490. ##########################################################################
  4491. #########################################################################
  4492. # XDEF **************************************************************** #
  4493. # scale_to_zero_src(): scale the exponent of extended precision #
  4494. # value at FP_SCR0(a6). #
  4495. # #
  4496. # XREF **************************************************************** #
  4497. # norm() - normalize the mantissa if the operand was a DENORM #
  4498. # #
  4499. # INPUT *************************************************************** #
  4500. # FP_SCR0(a6) = extended precision operand to be scaled #
  4501. # #
  4502. # OUTPUT ************************************************************** #
  4503. # FP_SCR0(a6) = scaled extended precision operand #
  4504. # d0 = scale value #
  4505. # #
  4506. # ALGORITHM *********************************************************** #
  4507. # Set the exponent of the input operand to 0x3fff. Save the value #
  4508. # of the difference between the original and new exponent. Then, #
  4509. # normalize the operand if it was a DENORM. Add this normalization #
  4510. # value to the previous value. Return the result. #
  4511. # #
  4512. #########################################################################
  4513. global scale_to_zero_src
  4514. scale_to_zero_src:
  4515. mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
  4516. mov.w %d1,%d0 # make a copy
  4517. andi.l &0x7fff,%d1 # extract operand's exponent
  4518. andi.w &0x8000,%d0 # extract operand's sgn
  4519. or.w &0x3fff,%d0 # insert new operand's exponent(=0)
  4520. mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
  4521. cmpi.b STAG(%a6),&DENORM # is operand normalized?
  4522. beq.b stzs_denorm # normalize the DENORM
  4523. stzs_norm:
  4524. mov.l &0x3fff,%d0
  4525. sub.l %d1,%d0 # scale = BIAS + (-exp)
  4526. rts
  4527. stzs_denorm:
  4528. lea FP_SCR0(%a6),%a0 # pass ptr to src op
  4529. bsr.l norm # normalize denorm
  4530. neg.l %d0 # new exponent = -(shft val)
  4531. mov.l %d0,%d1 # prepare for op_norm call
  4532. bra.b stzs_norm # finish scaling
  4533. ###
  4534. #########################################################################
  4535. # XDEF **************************************************************** #
  4536. # scale_sqrt(): scale the input operand exponent so a subsequent #
  4537. # fsqrt operation won't take an exception. #
  4538. # #
  4539. # XREF **************************************************************** #
  4540. # norm() - normalize the mantissa if the operand was a DENORM #
  4541. # #
  4542. # INPUT *************************************************************** #
  4543. # FP_SCR0(a6) = extended precision operand to be scaled #
  4544. # #
  4545. # OUTPUT ************************************************************** #
  4546. # FP_SCR0(a6) = scaled extended precision operand #
  4547. # d0 = scale value #
  4548. # #
  4549. # ALGORITHM *********************************************************** #
  4550. # If the input operand is a DENORM, normalize it. #
  4551. # If the exponent of the input operand is even, set the exponent #
  4552. # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
  4553. # exponent of the input operand is off, set the exponent to ox3fff and #
  4554. # return a scale factor of "(exp-0x3fff)/2". #
  4555. # #
  4556. #########################################################################
  4557. global scale_sqrt
  4558. scale_sqrt:
  4559. cmpi.b STAG(%a6),&DENORM # is operand normalized?
  4560. beq.b ss_denorm # normalize the DENORM
  4561. mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
  4562. andi.l &0x7fff,%d1 # extract operand's exponent
  4563. andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
  4564. btst &0x0,%d1 # is exp even or odd?
  4565. beq.b ss_norm_even
  4566. ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
  4567. mov.l &0x3fff,%d0
  4568. sub.l %d1,%d0 # scale = BIAS + (-exp)
  4569. asr.l &0x1,%d0 # divide scale factor by 2
  4570. rts
  4571. ss_norm_even:
  4572. ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
  4573. mov.l &0x3ffe,%d0
  4574. sub.l %d1,%d0 # scale = BIAS + (-exp)
  4575. asr.l &0x1,%d0 # divide scale factor by 2
  4576. rts
  4577. ss_denorm:
  4578. lea FP_SCR0(%a6),%a0 # pass ptr to src op
  4579. bsr.l norm # normalize denorm
  4580. btst &0x0,%d0 # is exp even or odd?
  4581. beq.b ss_denorm_even
  4582. ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
  4583. add.l &0x3fff,%d0
  4584. asr.l &0x1,%d0 # divide scale factor by 2
  4585. rts
  4586. ss_denorm_even:
  4587. ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
  4588. add.l &0x3ffe,%d0
  4589. asr.l &0x1,%d0 # divide scale factor by 2
  4590. rts
  4591. ###
  4592. #########################################################################
  4593. # XDEF **************************************************************** #
  4594. # scale_to_zero_dst(): scale the exponent of extended precision #
  4595. # value at FP_SCR1(a6). #
  4596. # #
  4597. # XREF **************************************************************** #
  4598. # norm() - normalize the mantissa if the operand was a DENORM #
  4599. # #
  4600. # INPUT *************************************************************** #
  4601. # FP_SCR1(a6) = extended precision operand to be scaled #
  4602. # #
  4603. # OUTPUT ************************************************************** #
  4604. # FP_SCR1(a6) = scaled extended precision operand #
  4605. # d0 = scale value #
  4606. # #
  4607. # ALGORITHM *********************************************************** #
  4608. # Set the exponent of the input operand to 0x3fff. Save the value #
  4609. # of the difference between the original and new exponent. Then, #
  4610. # normalize the operand if it was a DENORM. Add this normalization #
  4611. # value to the previous value. Return the result. #
  4612. # #
  4613. #########################################################################
  4614. global scale_to_zero_dst
  4615. scale_to_zero_dst:
  4616. mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
  4617. mov.w %d1,%d0 # make a copy
  4618. andi.l &0x7fff,%d1 # extract operand's exponent
  4619. andi.w &0x8000,%d0 # extract operand's sgn
  4620. or.w &0x3fff,%d0 # insert new operand's exponent(=0)
  4621. mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
  4622. cmpi.b DTAG(%a6),&DENORM # is operand normalized?
  4623. beq.b stzd_denorm # normalize the DENORM
  4624. stzd_norm:
  4625. mov.l &0x3fff,%d0
  4626. sub.l %d1,%d0 # scale = BIAS + (-exp)
  4627. rts
  4628. stzd_denorm:
  4629. lea FP_SCR1(%a6),%a0 # pass ptr to dst op
  4630. bsr.l norm # normalize denorm
  4631. neg.l %d0 # new exponent = -(shft val)
  4632. mov.l %d0,%d1 # prepare for op_norm call
  4633. bra.b stzd_norm # finish scaling
  4634. ##########################################################################
  4635. #########################################################################
  4636. # XDEF **************************************************************** #
  4637. # res_qnan(): return default result w/ QNAN operand for dyadic #
  4638. # res_snan(): return default result w/ SNAN operand for dyadic #
  4639. # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
  4640. # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
  4641. # #
  4642. # XREF **************************************************************** #
  4643. # None #
  4644. # #
  4645. # INPUT *************************************************************** #
  4646. # FP_SRC(a6) = pointer to extended precision src operand #
  4647. # FP_DST(a6) = pointer to extended precision dst operand #
  4648. # #
  4649. # OUTPUT ************************************************************** #
  4650. # fp0 = default result #
  4651. # #
  4652. # ALGORITHM *********************************************************** #
  4653. # If either operand (but not both operands) of an operation is a #
  4654. # nonsignalling NAN, then that NAN is returned as the result. If both #
  4655. # operands are nonsignalling NANs, then the destination operand #
  4656. # nonsignalling NAN is returned as the result. #
  4657. # If either operand to an operation is a signalling NAN (SNAN), #
  4658. # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
  4659. # enable bit is set in the FPCR, then the trap is taken and the #
  4660. # destination is not modified. If the SNAN trap enable bit is not set, #
  4661. # then the SNAN is converted to a nonsignalling NAN (by setting the #
  4662. # SNAN bit in the operand to one), and the operation continues as #
  4663. # described in the preceding paragraph, for nonsignalling NANs. #
  4664. # Make sure the appropriate FPSR bits are set before exiting. #
  4665. # #
  4666. #########################################################################
  4667. global res_qnan
  4668. global res_snan
  4669. res_qnan:
  4670. res_snan:
  4671. cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
  4672. beq.b dst_snan2
  4673. cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
  4674. beq.b dst_qnan2
  4675. src_nan:
  4676. cmp.b STAG(%a6), &QNAN
  4677. beq.b src_qnan2
  4678. global res_snan_1op
  4679. res_snan_1op:
  4680. src_snan2:
  4681. bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
  4682. or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
  4683. lea FP_SRC(%a6), %a0
  4684. bra.b nan_comp
  4685. global res_qnan_1op
  4686. res_qnan_1op:
  4687. src_qnan2:
  4688. or.l &nan_mask, USER_FPSR(%a6)
  4689. lea FP_SRC(%a6), %a0
  4690. bra.b nan_comp
  4691. dst_snan2:
  4692. or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
  4693. bset &0x6, FP_DST_HI(%a6) # set SNAN bit
  4694. lea FP_DST(%a6), %a0
  4695. bra.b nan_comp
  4696. dst_qnan2:
  4697. lea FP_DST(%a6), %a0
  4698. cmp.b STAG(%a6), &SNAN
  4699. bne nan_done
  4700. or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
  4701. nan_done:
  4702. or.l &nan_mask, USER_FPSR(%a6)
  4703. nan_comp:
  4704. btst &0x7, FTEMP_EX(%a0) # is NAN neg?
  4705. beq.b nan_not_neg
  4706. or.l &neg_mask, USER_FPSR(%a6)
  4707. nan_not_neg:
  4708. fmovm.x (%a0), &0x80
  4709. rts
  4710. #########################################################################
  4711. # XDEF **************************************************************** #
  4712. # res_operr(): return default result during operand error #
  4713. # #
  4714. # XREF **************************************************************** #
  4715. # None #
  4716. # #
  4717. # INPUT *************************************************************** #
  4718. # None #
  4719. # #
  4720. # OUTPUT ************************************************************** #
  4721. # fp0 = default operand error result #
  4722. # #
  4723. # ALGORITHM *********************************************************** #
  4724. # An nonsignalling NAN is returned as the default result when #
  4725. # an operand error occurs for the following cases: #
  4726. # #
  4727. # Multiply: (Infinity x Zero) #
  4728. # Divide : (Zero / Zero) || (Infinity / Infinity) #
  4729. # #
  4730. #########################################################################
  4731. global res_operr
  4732. res_operr:
  4733. or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
  4734. fmovm.x nan_return(%pc), &0x80
  4735. rts
  4736. nan_return:
  4737. long 0x7fff0000, 0xffffffff, 0xffffffff
  4738. #########################################################################
  4739. # XDEF **************************************************************** #
  4740. # _denorm(): denormalize an intermediate result #
  4741. # #
  4742. # XREF **************************************************************** #
  4743. # None #
  4744. # #
  4745. # INPUT *************************************************************** #
  4746. # a0 = points to the operand to be denormalized #
  4747. # (in the internal extended format) #
  4748. # #
  4749. # d0 = rounding precision #
  4750. # #
  4751. # OUTPUT ************************************************************** #
  4752. # a0 = pointer to the denormalized result #
  4753. # (in the internal extended format) #
  4754. # #
  4755. # d0 = guard,round,sticky #
  4756. # #
  4757. # ALGORITHM *********************************************************** #
  4758. # According to the exponent underflow threshold for the given #
  4759. # precision, shift the mantissa bits to the right in order raise the #
  4760. # exponent of the operand to the threshold value. While shifting the #
  4761. # mantissa bits right, maintain the value of the guard, round, and #
  4762. # sticky bits. #
  4763. # other notes: #
  4764. # (1) _denorm() is called by the underflow routines #
  4765. # (2) _denorm() does NOT affect the status register #
  4766. # #
  4767. #########################################################################
  4768. #
  4769. # table of exponent threshold values for each precision
  4770. #
  4771. tbl_thresh:
  4772. short 0x0
  4773. short sgl_thresh
  4774. short dbl_thresh
  4775. global _denorm
  4776. _denorm:
  4777. #
  4778. # Load the exponent threshold for the precision selected and check
  4779. # to see if (threshold - exponent) is > 65 in which case we can
  4780. # simply calculate the sticky bit and zero the mantissa. otherwise
  4781. # we have to call the denormalization routine.
  4782. #
  4783. lsr.b &0x2, %d0 # shift prec to lo bits
  4784. mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
  4785. mov.w %d1, %d0 # copy d1 into d0
  4786. sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
  4787. cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
  4788. bpl.b denorm_set_stky # yes; just calc sticky
  4789. clr.l %d0 # clear g,r,s
  4790. btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
  4791. beq.b denorm_call # no; don't change anything
  4792. bset &29, %d0 # yes; set sticky bit
  4793. denorm_call:
  4794. bsr.l dnrm_lp # denormalize the number
  4795. rts
  4796. #
  4797. # all bit would have been shifted off during the denorm so simply
  4798. # calculate if the sticky should be set and clear the entire mantissa.
  4799. #
  4800. denorm_set_stky:
  4801. mov.l &0x20000000, %d0 # set sticky bit in return value
  4802. mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
  4803. clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
  4804. clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
  4805. rts
  4806. # #
  4807. # dnrm_lp(): normalize exponent/mantissa to specified threshhold #
  4808. # #
  4809. # INPUT: #
  4810. # %a0 : points to the operand to be denormalized #
  4811. # %d0{31:29} : initial guard,round,sticky #
  4812. # %d1{15:0} : denormalization threshold #
  4813. # OUTPUT: #
  4814. # %a0 : points to the denormalized operand #
  4815. # %d0{31:29} : final guard,round,sticky #
  4816. # #
  4817. # *** Local Equates *** #
  4818. set GRS, L_SCR2 # g,r,s temp storage
  4819. set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
  4820. global dnrm_lp
  4821. dnrm_lp:
  4822. #
  4823. # make a copy of FTEMP_LO and place the g,r,s bits directly after it
  4824. # in memory so as to make the bitfield extraction for denormalization easier.
  4825. #
  4826. mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
  4827. mov.l %d0, GRS(%a6) # place g,r,s after it
  4828. #
  4829. # check to see how much less than the underflow threshold the operand
  4830. # exponent is.
  4831. #
  4832. mov.l %d1, %d0 # copy the denorm threshold
  4833. sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
  4834. ble.b dnrm_no_lp # d1 <= 0
  4835. cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
  4836. blt.b case_1 # yes
  4837. cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
  4838. blt.b case_2 # yes
  4839. bra.w case_3 # (d1 >= 64)
  4840. #
  4841. # No normalization necessary
  4842. #
  4843. dnrm_no_lp:
  4844. mov.l GRS(%a6), %d0 # restore original g,r,s
  4845. rts
  4846. #
  4847. # case (0<d1<32)
  4848. #
  4849. # %d0 = denorm threshold
  4850. # %d1 = "n" = amt to shift
  4851. #
  4852. # ---------------------------------------------------------
  4853. # | FTEMP_HI | FTEMP_LO |grs000.........000|
  4854. # ---------------------------------------------------------
  4855. # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
  4856. # \ \ \ \
  4857. # \ \ \ \
  4858. # \ \ \ \
  4859. # \ \ \ \
  4860. # \ \ \ \
  4861. # \ \ \ \
  4862. # \ \ \ \
  4863. # \ \ \ \
  4864. # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
  4865. # ---------------------------------------------------------
  4866. # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
  4867. # ---------------------------------------------------------
  4868. #
  4869. case_1:
  4870. mov.l %d2, -(%sp) # create temp storage
  4871. mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
  4872. mov.l &32, %d0
  4873. sub.w %d1, %d0 # %d0 = 32 - %d1
  4874. cmpi.w %d1, &29 # is shft amt >= 29
  4875. blt.b case1_extract # no; no fix needed
  4876. mov.b GRS(%a6), %d2
  4877. or.b %d2, 3+FTEMP_LO2(%a6)
  4878. case1_extract:
  4879. bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
  4880. bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
  4881. bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
  4882. mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
  4883. mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
  4884. bftst %d0{&2:&30} # were bits shifted off?
  4885. beq.b case1_sticky_clear # no; go finish
  4886. bset &rnd_stky_bit, %d0 # yes; set sticky bit
  4887. case1_sticky_clear:
  4888. and.l &0xe0000000, %d0 # clear all but G,R,S
  4889. mov.l (%sp)+, %d2 # restore temp register
  4890. rts
  4891. #
  4892. # case (32<=d1<64)
  4893. #
  4894. # %d0 = denorm threshold
  4895. # %d1 = "n" = amt to shift
  4896. #
  4897. # ---------------------------------------------------------
  4898. # | FTEMP_HI | FTEMP_LO |grs000.........000|
  4899. # ---------------------------------------------------------
  4900. # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
  4901. # \ \ \
  4902. # \ \ \
  4903. # \ \ -------------------
  4904. # \ -------------------- \
  4905. # ------------------- \ \
  4906. # \ \ \
  4907. # \ \ \
  4908. # \ \ \
  4909. # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
  4910. # ---------------------------------------------------------
  4911. # |0...............0|0....0| NEW_LO |grs |
  4912. # ---------------------------------------------------------
  4913. #
  4914. case_2:
  4915. mov.l %d2, -(%sp) # create temp storage
  4916. mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
  4917. subi.w &0x20, %d1 # %d1 now between 0 and 32
  4918. mov.l &0x20, %d0
  4919. sub.w %d1, %d0 # %d0 = 32 - %d1
  4920. # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
  4921. # the number of bits to check for the sticky detect.
  4922. # it only plays a role in shift amounts of 61-63.
  4923. mov.b GRS(%a6), %d2
  4924. or.b %d2, 3+FTEMP_LO2(%a6)
  4925. bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
  4926. bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
  4927. bftst %d1{&2:&30} # were any bits shifted off?
  4928. bne.b case2_set_sticky # yes; set sticky bit
  4929. bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
  4930. bne.b case2_set_sticky # yes; set sticky bit
  4931. mov.l %d1, %d0 # move new G,R,S to %d0
  4932. bra.b case2_end
  4933. case2_set_sticky:
  4934. mov.l %d1, %d0 # move new G,R,S to %d0
  4935. bset &rnd_stky_bit, %d0 # set sticky bit
  4936. case2_end:
  4937. clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
  4938. mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
  4939. and.l &0xe0000000, %d0 # clear all but G,R,S
  4940. mov.l (%sp)+,%d2 # restore temp register
  4941. rts
  4942. #
  4943. # case (d1>=64)
  4944. #
  4945. # %d0 = denorm threshold
  4946. # %d1 = amt to shift
  4947. #
  4948. case_3:
  4949. mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
  4950. cmpi.w %d1, &65 # is shift amt > 65?
  4951. blt.b case3_64 # no; it's == 64
  4952. beq.b case3_65 # no; it's == 65
  4953. #
  4954. # case (d1>65)
  4955. #
  4956. # Shift value is > 65 and out of range. All bits are shifted off.
  4957. # Return a zero mantissa with the sticky bit set
  4958. #
  4959. clr.l FTEMP_HI(%a0) # clear hi(mantissa)
  4960. clr.l FTEMP_LO(%a0) # clear lo(mantissa)
  4961. mov.l &0x20000000, %d0 # set sticky bit
  4962. rts
  4963. #
  4964. # case (d1 == 64)
  4965. #
  4966. # ---------------------------------------------------------
  4967. # | FTEMP_HI | FTEMP_LO |grs000.........000|
  4968. # ---------------------------------------------------------
  4969. # <-------(32)------>
  4970. # \ \
  4971. # \ \
  4972. # \ \
  4973. # \ ------------------------------
  4974. # ------------------------------- \
  4975. # \ \
  4976. # \ \
  4977. # \ \
  4978. # <-------(32)------>
  4979. # ---------------------------------------------------------
  4980. # |0...............0|0................0|grs |
  4981. # ---------------------------------------------------------
  4982. #
  4983. case3_64:
  4984. mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
  4985. mov.l %d0, %d1 # make a copy
  4986. and.l &0xc0000000, %d0 # extract G,R
  4987. and.l &0x3fffffff, %d1 # extract other bits
  4988. bra.b case3_complete
  4989. #
  4990. # case (d1 == 65)
  4991. #
  4992. # ---------------------------------------------------------
  4993. # | FTEMP_HI | FTEMP_LO |grs000.........000|
  4994. # ---------------------------------------------------------
  4995. # <-------(32)------>
  4996. # \ \
  4997. # \ \
  4998. # \ \
  4999. # \ ------------------------------
  5000. # -------------------------------- \
  5001. # \ \
  5002. # \ \
  5003. # \ \
  5004. # <-------(31)----->
  5005. # ---------------------------------------------------------
  5006. # |0...............0|0................0|0rs |
  5007. # ---------------------------------------------------------
  5008. #
  5009. case3_65:
  5010. mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
  5011. and.l &0x80000000, %d0 # extract R bit
  5012. lsr.l &0x1, %d0 # shift high bit into R bit
  5013. and.l &0x7fffffff, %d1 # extract other bits
  5014. case3_complete:
  5015. # last operation done was an "and" of the bits shifted off so the condition
  5016. # codes are already set so branch accordingly.
  5017. bne.b case3_set_sticky # yes; go set new sticky
  5018. tst.l FTEMP_LO(%a0) # were any bits shifted off?
  5019. bne.b case3_set_sticky # yes; go set new sticky
  5020. tst.b GRS(%a6) # were any bits shifted off?
  5021. bne.b case3_set_sticky # yes; go set new sticky
  5022. #
  5023. # no bits were shifted off so don't set the sticky bit.
  5024. # the guard and
  5025. # the entire mantissa is zero.
  5026. #
  5027. clr.l FTEMP_HI(%a0) # clear hi(mantissa)
  5028. clr.l FTEMP_LO(%a0) # clear lo(mantissa)
  5029. rts
  5030. #
  5031. # some bits were shifted off so set the sticky bit.
  5032. # the entire mantissa is zero.
  5033. #
  5034. case3_set_sticky:
  5035. bset &rnd_stky_bit,%d0 # set new sticky bit
  5036. clr.l FTEMP_HI(%a0) # clear hi(mantissa)
  5037. clr.l FTEMP_LO(%a0) # clear lo(mantissa)
  5038. rts
  5039. #########################################################################
  5040. # XDEF **************************************************************** #
  5041. # _round(): round result according to precision/mode #
  5042. # #
  5043. # XREF **************************************************************** #
  5044. # None #
  5045. # #
  5046. # INPUT *************************************************************** #
  5047. # a0 = ptr to input operand in internal extended format #
  5048. # d1(hi) = contains rounding precision: #
  5049. # ext = $0000xxxx #
  5050. # sgl = $0004xxxx #
  5051. # dbl = $0008xxxx #
  5052. # d1(lo) = contains rounding mode: #
  5053. # RN = $xxxx0000 #
  5054. # RZ = $xxxx0001 #
  5055. # RM = $xxxx0002 #
  5056. # RP = $xxxx0003 #
  5057. # d0{31:29} = contains the g,r,s bits (extended) #
  5058. # #
  5059. # OUTPUT ************************************************************** #
  5060. # a0 = pointer to rounded result #
  5061. # #
  5062. # ALGORITHM *********************************************************** #
  5063. # On return the value pointed to by a0 is correctly rounded, #
  5064. # a0 is preserved and the g-r-s bits in d0 are cleared. #
  5065. # The result is not typed - the tag field is invalid. The #
  5066. # result is still in the internal extended format. #
  5067. # #
  5068. # The INEX bit of USER_FPSR will be set if the rounded result was #
  5069. # inexact (i.e. if any of the g-r-s bits were set). #
  5070. # #
  5071. #########################################################################
  5072. global _round
  5073. _round:
  5074. #
  5075. # ext_grs() looks at the rounding precision and sets the appropriate
  5076. # G,R,S bits.
  5077. # If (G,R,S == 0) then result is exact and round is done, else set
  5078. # the inex flag in status reg and continue.
  5079. #
  5080. bsr.l ext_grs # extract G,R,S
  5081. tst.l %d0 # are G,R,S zero?
  5082. beq.w truncate # yes; round is complete
  5083. or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
  5084. #
  5085. # Use rounding mode as an index into a jump table for these modes.
  5086. # All of the following assumes grs != 0.
  5087. #
  5088. mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
  5089. jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
  5090. tbl_mode:
  5091. short rnd_near - tbl_mode
  5092. short truncate - tbl_mode # RZ always truncates
  5093. short rnd_mnus - tbl_mode
  5094. short rnd_plus - tbl_mode
  5095. #################################################################
  5096. # ROUND PLUS INFINITY #
  5097. # #
  5098. # If sign of fp number = 0 (positive), then add 1 to l. #
  5099. #################################################################
  5100. rnd_plus:
  5101. tst.b FTEMP_SGN(%a0) # check for sign
  5102. bmi.w truncate # if positive then truncate
  5103. mov.l &0xffffffff, %d0 # force g,r,s to be all f's
  5104. swap %d1 # set up d1 for round prec.
  5105. cmpi.b %d1, &s_mode # is prec = sgl?
  5106. beq.w add_sgl # yes
  5107. bgt.w add_dbl # no; it's dbl
  5108. bra.w add_ext # no; it's ext
  5109. #################################################################
  5110. # ROUND MINUS INFINITY #
  5111. # #
  5112. # If sign of fp number = 1 (negative), then add 1 to l. #
  5113. #################################################################
  5114. rnd_mnus:
  5115. tst.b FTEMP_SGN(%a0) # check for sign
  5116. bpl.w truncate # if negative then truncate
  5117. mov.l &0xffffffff, %d0 # force g,r,s to be all f's
  5118. swap %d1 # set up d1 for round prec.
  5119. cmpi.b %d1, &s_mode # is prec = sgl?
  5120. beq.w add_sgl # yes
  5121. bgt.w add_dbl # no; it's dbl
  5122. bra.w add_ext # no; it's ext
  5123. #################################################################
  5124. # ROUND NEAREST #
  5125. # #
  5126. # If (g=1), then add 1 to l and if (r=s=0), then clear l #
  5127. # Note that this will round to even in case of a tie. #
  5128. #################################################################
  5129. rnd_near:
  5130. asl.l &0x1, %d0 # shift g-bit to c-bit
  5131. bcc.w truncate # if (g=1) then
  5132. swap %d1 # set up d1 for round prec.
  5133. cmpi.b %d1, &s_mode # is prec = sgl?
  5134. beq.w add_sgl # yes
  5135. bgt.w add_dbl # no; it's dbl
  5136. bra.w add_ext # no; it's ext
  5137. # *** LOCAL EQUATES ***
  5138. set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
  5139. set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
  5140. #########################
  5141. # ADD SINGLE #
  5142. #########################
  5143. add_sgl:
  5144. add.l &ad_1_sgl, FTEMP_HI(%a0)
  5145. bcc.b scc_clr # no mantissa overflow
  5146. roxr.w FTEMP_HI(%a0) # shift v-bit back in
  5147. roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
  5148. add.w &0x1, FTEMP_EX(%a0) # and incr exponent
  5149. scc_clr:
  5150. tst.l %d0 # test for rs = 0
  5151. bne.b sgl_done
  5152. and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
  5153. sgl_done:
  5154. and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
  5155. clr.l FTEMP_LO(%a0) # clear d2
  5156. rts
  5157. #########################
  5158. # ADD EXTENDED #
  5159. #########################
  5160. add_ext:
  5161. addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
  5162. bcc.b xcc_clr # test for carry out
  5163. addq.l &1,FTEMP_HI(%a0) # propagate carry
  5164. bcc.b xcc_clr
  5165. roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
  5166. roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
  5167. roxr.w FTEMP_LO(%a0)
  5168. roxr.w FTEMP_LO+2(%a0)
  5169. add.w &0x1,FTEMP_EX(%a0) # and inc exp
  5170. xcc_clr:
  5171. tst.l %d0 # test rs = 0
  5172. bne.b add_ext_done
  5173. and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
  5174. add_ext_done:
  5175. rts
  5176. #########################
  5177. # ADD DOUBLE #
  5178. #########################
  5179. add_dbl:
  5180. add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
  5181. bcc.b dcc_clr # no carry
  5182. addq.l &0x1, FTEMP_HI(%a0) # propagate carry
  5183. bcc.b dcc_clr # no carry
  5184. roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
  5185. roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
  5186. roxr.w FTEMP_LO(%a0)
  5187. roxr.w FTEMP_LO+2(%a0)
  5188. addq.w &0x1, FTEMP_EX(%a0) # incr exponent
  5189. dcc_clr:
  5190. tst.l %d0 # test for rs = 0
  5191. bne.b dbl_done
  5192. and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
  5193. dbl_done:
  5194. and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
  5195. rts
  5196. ###########################
  5197. # Truncate all other bits #
  5198. ###########################
  5199. truncate:
  5200. swap %d1 # select rnd prec
  5201. cmpi.b %d1, &s_mode # is prec sgl?
  5202. beq.w sgl_done # yes
  5203. bgt.b dbl_done # no; it's dbl
  5204. rts # no; it's ext
  5205. #
  5206. # ext_grs(): extract guard, round and sticky bits according to
  5207. # rounding precision.
  5208. #
  5209. # INPUT
  5210. # d0 = extended precision g,r,s (in d0{31:29})
  5211. # d1 = {PREC,ROUND}
  5212. # OUTPUT
  5213. # d0{31:29} = guard, round, sticky
  5214. #
  5215. # The ext_grs extract the guard/round/sticky bits according to the
  5216. # selected rounding precision. It is called by the round subroutine
  5217. # only. All registers except d0 are kept intact. d0 becomes an
  5218. # updated guard,round,sticky in d0{31:29}
  5219. #
  5220. # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
  5221. # prior to usage, and needs to restore d1 to original. this
  5222. # routine is tightly tied to the round routine and not meant to
  5223. # uphold standard subroutine calling practices.
  5224. #
  5225. ext_grs:
  5226. swap %d1 # have d1.w point to round precision
  5227. tst.b %d1 # is rnd prec = extended?
  5228. bne.b ext_grs_not_ext # no; go handle sgl or dbl
  5229. #
  5230. # %d0 actually already hold g,r,s since _round() had it before calling
  5231. # this function. so, as long as we don't disturb it, we are "returning" it.
  5232. #
  5233. ext_grs_ext:
  5234. swap %d1 # yes; return to correct positions
  5235. rts
  5236. ext_grs_not_ext:
  5237. movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
  5238. cmpi.b %d1, &s_mode # is rnd prec = sgl?
  5239. bne.b ext_grs_dbl # no; go handle dbl
  5240. #
  5241. # sgl:
  5242. # 96 64 40 32 0
  5243. # -----------------------------------------------------
  5244. # | EXP |XXXXXXX| |xx | |grs|
  5245. # -----------------------------------------------------
  5246. # <--(24)--->nn\ /
  5247. # ee ---------------------
  5248. # ww |
  5249. # v
  5250. # gr new sticky
  5251. #
  5252. ext_grs_sgl:
  5253. bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
  5254. mov.l &30, %d2 # of the sgl prec. limits
  5255. lsl.l %d2, %d3 # shift g-r bits to MSB of d3
  5256. mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
  5257. and.l &0x0000003f, %d2 # s bit is the or of all other
  5258. bne.b ext_grs_st_stky # bits to the right of g-r
  5259. tst.l FTEMP_LO(%a0) # test lower mantissa
  5260. bne.b ext_grs_st_stky # if any are set, set sticky
  5261. tst.l %d0 # test original g,r,s
  5262. bne.b ext_grs_st_stky # if any are set, set sticky
  5263. bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
  5264. #
  5265. # dbl:
  5266. # 96 64 32 11 0
  5267. # -----------------------------------------------------
  5268. # | EXP |XXXXXXX| | |xx |grs|
  5269. # -----------------------------------------------------
  5270. # nn\ /
  5271. # ee -------
  5272. # ww |
  5273. # v
  5274. # gr new sticky
  5275. #
  5276. ext_grs_dbl:
  5277. bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
  5278. mov.l &30, %d2 # of the dbl prec. limits
  5279. lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
  5280. mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
  5281. and.l &0x000001ff, %d2 # s bit is the or-ing of all
  5282. bne.b ext_grs_st_stky # other bits to the right of g-r
  5283. tst.l %d0 # test word original g,r,s
  5284. bne.b ext_grs_st_stky # if any are set, set sticky
  5285. bra.b ext_grs_end_sd # if clear, exit
  5286. ext_grs_st_stky:
  5287. bset &rnd_stky_bit, %d3 # set sticky bit
  5288. ext_grs_end_sd:
  5289. mov.l %d3, %d0 # return grs to d0
  5290. movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
  5291. swap %d1 # restore d1 to original
  5292. rts
  5293. #########################################################################
  5294. # norm(): normalize the mantissa of an extended precision input. the #
  5295. # input operand should not be normalized already. #
  5296. # #
  5297. # XDEF **************************************************************** #
  5298. # norm() #
  5299. # #
  5300. # XREF **************************************************************** #
  5301. # none #
  5302. # #
  5303. # INPUT *************************************************************** #
  5304. # a0 = pointer fp extended precision operand to normalize #
  5305. # #
  5306. # OUTPUT ************************************************************** #
  5307. # d0 = number of bit positions the mantissa was shifted #
  5308. # a0 = the input operand's mantissa is normalized; the exponent #
  5309. # is unchanged. #
  5310. # #
  5311. #########################################################################
  5312. global norm
  5313. norm:
  5314. mov.l %d2, -(%sp) # create some temp regs
  5315. mov.l %d3, -(%sp)
  5316. mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
  5317. mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
  5318. bfffo %d0{&0:&32}, %d2 # how many places to shift?
  5319. beq.b norm_lo # hi(man) is all zeroes!
  5320. norm_hi:
  5321. lsl.l %d2, %d0 # left shift hi(man)
  5322. bfextu %d1{&0:%d2}, %d3 # extract lo bits
  5323. or.l %d3, %d0 # create hi(man)
  5324. lsl.l %d2, %d1 # create lo(man)
  5325. mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
  5326. mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
  5327. mov.l %d2, %d0 # return shift amount
  5328. mov.l (%sp)+, %d3 # restore temp regs
  5329. mov.l (%sp)+, %d2
  5330. rts
  5331. norm_lo:
  5332. bfffo %d1{&0:&32}, %d2 # how many places to shift?
  5333. lsl.l %d2, %d1 # shift lo(man)
  5334. add.l &32, %d2 # add 32 to shft amount
  5335. mov.l %d1, FTEMP_HI(%a0) # store hi(man)
  5336. clr.l FTEMP_LO(%a0) # lo(man) is now zero
  5337. mov.l %d2, %d0 # return shift amount
  5338. mov.l (%sp)+, %d3 # restore temp regs
  5339. mov.l (%sp)+, %d2
  5340. rts
  5341. #########################################################################
  5342. # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
  5343. # - returns corresponding optype tag #
  5344. # #
  5345. # XDEF **************************************************************** #
  5346. # unnorm_fix() #
  5347. # #
  5348. # XREF **************************************************************** #
  5349. # norm() - normalize the mantissa #
  5350. # #
  5351. # INPUT *************************************************************** #
  5352. # a0 = pointer to unnormalized extended precision number #
  5353. # #
  5354. # OUTPUT ************************************************************** #
  5355. # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
  5356. # a0 = input operand has been converted to a norm, denorm, or #
  5357. # zero; both the exponent and mantissa are changed. #
  5358. # #
  5359. #########################################################################
  5360. global unnorm_fix
  5361. unnorm_fix:
  5362. bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
  5363. bne.b unnorm_shift # hi(man) is not all zeroes
  5364. #
  5365. # hi(man) is all zeroes so see if any bits in lo(man) are set
  5366. #
  5367. unnorm_chk_lo:
  5368. bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
  5369. beq.w unnorm_zero # yes
  5370. add.w &32, %d0 # no; fix shift distance
  5371. #
  5372. # d0 = # shifts needed for complete normalization
  5373. #
  5374. unnorm_shift:
  5375. clr.l %d1 # clear top word
  5376. mov.w FTEMP_EX(%a0), %d1 # extract exponent
  5377. and.w &0x7fff, %d1 # strip off sgn
  5378. cmp.w %d0, %d1 # will denorm push exp < 0?
  5379. bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
  5380. #
  5381. # exponent would not go < 0. therefore, number stays normalized
  5382. #
  5383. sub.w %d0, %d1 # shift exponent value
  5384. mov.w FTEMP_EX(%a0), %d0 # load old exponent
  5385. and.w &0x8000, %d0 # save old sign
  5386. or.w %d0, %d1 # {sgn,new exp}
  5387. mov.w %d1, FTEMP_EX(%a0) # insert new exponent
  5388. bsr.l norm # normalize UNNORM
  5389. mov.b &NORM, %d0 # return new optype tag
  5390. rts
  5391. #
  5392. # exponent would go < 0, so only denormalize until exp = 0
  5393. #
  5394. unnorm_nrm_zero:
  5395. cmp.b %d1, &32 # is exp <= 32?
  5396. bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
  5397. bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
  5398. mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
  5399. mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
  5400. lsl.l %d1, %d0 # extract new lo(man)
  5401. mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
  5402. and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
  5403. mov.b &DENORM, %d0 # return new optype tag
  5404. rts
  5405. #
  5406. # only mantissa bits set are in lo(man)
  5407. #
  5408. unnorm_nrm_zero_lrg:
  5409. sub.w &32, %d1 # adjust shft amt by 32
  5410. mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
  5411. lsl.l %d1, %d0 # left shift lo(man)
  5412. mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
  5413. clr.l FTEMP_LO(%a0) # lo(man) = 0
  5414. and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
  5415. mov.b &DENORM, %d0 # return new optype tag
  5416. rts
  5417. #
  5418. # whole mantissa is zero so this UNNORM is actually a zero
  5419. #
  5420. unnorm_zero:
  5421. and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
  5422. mov.b &ZERO, %d0 # fix optype tag
  5423. rts
  5424. #########################################################################
  5425. # XDEF **************************************************************** #
  5426. # set_tag_x(): return the optype of the input ext fp number #
  5427. # #
  5428. # XREF **************************************************************** #
  5429. # None #
  5430. # #
  5431. # INPUT *************************************************************** #
  5432. # a0 = pointer to extended precision operand #
  5433. # #
  5434. # OUTPUT ************************************************************** #
  5435. # d0 = value of type tag #
  5436. # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
  5437. # #
  5438. # ALGORITHM *********************************************************** #
  5439. # Simply test the exponent, j-bit, and mantissa values to #
  5440. # determine the type of operand. #
  5441. # If it's an unnormalized zero, alter the operand and force it #
  5442. # to be a normal zero. #
  5443. # #
  5444. #########################################################################
  5445. global set_tag_x
  5446. set_tag_x:
  5447. mov.w FTEMP_EX(%a0), %d0 # extract exponent
  5448. andi.w &0x7fff, %d0 # strip off sign
  5449. cmpi.w %d0, &0x7fff # is (EXP == MAX)?
  5450. beq.b inf_or_nan_x
  5451. not_inf_or_nan_x:
  5452. btst &0x7,FTEMP_HI(%a0)
  5453. beq.b not_norm_x
  5454. is_norm_x:
  5455. mov.b &NORM, %d0
  5456. rts
  5457. not_norm_x:
  5458. tst.w %d0 # is exponent = 0?
  5459. bne.b is_unnorm_x
  5460. not_unnorm_x:
  5461. tst.l FTEMP_HI(%a0)
  5462. bne.b is_denorm_x
  5463. tst.l FTEMP_LO(%a0)
  5464. bne.b is_denorm_x
  5465. is_zero_x:
  5466. mov.b &ZERO, %d0
  5467. rts
  5468. is_denorm_x:
  5469. mov.b &DENORM, %d0
  5470. rts
  5471. # must distinguish now "Unnormalized zeroes" which we
  5472. # must convert to zero.
  5473. is_unnorm_x:
  5474. tst.l FTEMP_HI(%a0)
  5475. bne.b is_unnorm_reg_x
  5476. tst.l FTEMP_LO(%a0)
  5477. bne.b is_unnorm_reg_x
  5478. # it's an "unnormalized zero". let's convert it to an actual zero...
  5479. andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
  5480. mov.b &ZERO, %d0
  5481. rts
  5482. is_unnorm_reg_x:
  5483. mov.b &UNNORM, %d0
  5484. rts
  5485. inf_or_nan_x:
  5486. tst.l FTEMP_LO(%a0)
  5487. bne.b is_nan_x
  5488. mov.l FTEMP_HI(%a0), %d0
  5489. and.l &0x7fffffff, %d0 # msb is a don't care!
  5490. bne.b is_nan_x
  5491. is_inf_x:
  5492. mov.b &INF, %d0
  5493. rts
  5494. is_nan_x:
  5495. btst &0x6, FTEMP_HI(%a0)
  5496. beq.b is_snan_x
  5497. mov.b &QNAN, %d0
  5498. rts
  5499. is_snan_x:
  5500. mov.b &SNAN, %d0
  5501. rts
  5502. #########################################################################
  5503. # XDEF **************************************************************** #
  5504. # set_tag_d(): return the optype of the input dbl fp number #
  5505. # #
  5506. # XREF **************************************************************** #
  5507. # None #
  5508. # #
  5509. # INPUT *************************************************************** #
  5510. # a0 = points to double precision operand #
  5511. # #
  5512. # OUTPUT ************************************************************** #
  5513. # d0 = value of type tag #
  5514. # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
  5515. # #
  5516. # ALGORITHM *********************************************************** #
  5517. # Simply test the exponent, j-bit, and mantissa values to #
  5518. # determine the type of operand. #
  5519. # #
  5520. #########################################################################
  5521. global set_tag_d
  5522. set_tag_d:
  5523. mov.l FTEMP(%a0), %d0
  5524. mov.l %d0, %d1
  5525. andi.l &0x7ff00000, %d0
  5526. beq.b zero_or_denorm_d
  5527. cmpi.l %d0, &0x7ff00000
  5528. beq.b inf_or_nan_d
  5529. is_norm_d:
  5530. mov.b &NORM, %d0
  5531. rts
  5532. zero_or_denorm_d:
  5533. and.l &0x000fffff, %d1
  5534. bne is_denorm_d
  5535. tst.l 4+FTEMP(%a0)
  5536. bne is_denorm_d
  5537. is_zero_d:
  5538. mov.b &ZERO, %d0
  5539. rts
  5540. is_denorm_d:
  5541. mov.b &DENORM, %d0
  5542. rts
  5543. inf_or_nan_d:
  5544. and.l &0x000fffff, %d1
  5545. bne is_nan_d
  5546. tst.l 4+FTEMP(%a0)
  5547. bne is_nan_d
  5548. is_inf_d:
  5549. mov.b &INF, %d0
  5550. rts
  5551. is_nan_d:
  5552. btst &19, %d1
  5553. bne is_qnan_d
  5554. is_snan_d:
  5555. mov.b &SNAN, %d0
  5556. rts
  5557. is_qnan_d:
  5558. mov.b &QNAN, %d0
  5559. rts
  5560. #########################################################################
  5561. # XDEF **************************************************************** #
  5562. # set_tag_s(): return the optype of the input sgl fp number #
  5563. # #
  5564. # XREF **************************************************************** #
  5565. # None #
  5566. # #
  5567. # INPUT *************************************************************** #
  5568. # a0 = pointer to single precision operand #
  5569. # #
  5570. # OUTPUT ************************************************************** #
  5571. # d0 = value of type tag #
  5572. # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
  5573. # #
  5574. # ALGORITHM *********************************************************** #
  5575. # Simply test the exponent, j-bit, and mantissa values to #
  5576. # determine the type of operand. #
  5577. # #
  5578. #########################################################################
  5579. global set_tag_s
  5580. set_tag_s:
  5581. mov.l FTEMP(%a0), %d0
  5582. mov.l %d0, %d1
  5583. andi.l &0x7f800000, %d0
  5584. beq.b zero_or_denorm_s
  5585. cmpi.l %d0, &0x7f800000
  5586. beq.b inf_or_nan_s
  5587. is_norm_s:
  5588. mov.b &NORM, %d0
  5589. rts
  5590. zero_or_denorm_s:
  5591. and.l &0x007fffff, %d1
  5592. bne is_denorm_s
  5593. is_zero_s:
  5594. mov.b &ZERO, %d0
  5595. rts
  5596. is_denorm_s:
  5597. mov.b &DENORM, %d0
  5598. rts
  5599. inf_or_nan_s:
  5600. and.l &0x007fffff, %d1
  5601. bne is_nan_s
  5602. is_inf_s:
  5603. mov.b &INF, %d0
  5604. rts
  5605. is_nan_s:
  5606. btst &22, %d1
  5607. bne is_qnan_s
  5608. is_snan_s:
  5609. mov.b &SNAN, %d0
  5610. rts
  5611. is_qnan_s:
  5612. mov.b &QNAN, %d0
  5613. rts
  5614. #########################################################################
  5615. # XDEF **************************************************************** #
  5616. # unf_res(): routine to produce default underflow result of a #
  5617. # scaled extended precision number; this is used by #
  5618. # fadd/fdiv/fmul/etc. emulation routines. #
  5619. # unf_res4(): same as above but for fsglmul/fsgldiv which use #
  5620. # single round prec and extended prec mode. #
  5621. # #
  5622. # XREF **************************************************************** #
  5623. # _denorm() - denormalize according to scale factor #
  5624. # _round() - round denormalized number according to rnd prec #
  5625. # #
  5626. # INPUT *************************************************************** #
  5627. # a0 = pointer to extended precison operand #
  5628. # d0 = scale factor #
  5629. # d1 = rounding precision/mode #
  5630. # #
  5631. # OUTPUT ************************************************************** #
  5632. # a0 = pointer to default underflow result in extended precision #
  5633. # d0.b = result FPSR_cc which caller may or may not want to save #
  5634. # #
  5635. # ALGORITHM *********************************************************** #
  5636. # Convert the input operand to "internal format" which means the #
  5637. # exponent is extended to 16 bits and the sign is stored in the unused #
  5638. # portion of the extended precison operand. Denormalize the number #
  5639. # according to the scale factor passed in d0. Then, round the #
  5640. # denormalized result. #
  5641. # Set the FPSR_exc bits as appropriate but return the cc bits in #
  5642. # d0 in case the caller doesn't want to save them (as is the case for #
  5643. # fmove out). #
  5644. # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
  5645. # precision and the rounding mode to single. #
  5646. # #
  5647. #########################################################################
  5648. global unf_res
  5649. unf_res:
  5650. mov.l %d1, -(%sp) # save rnd prec,mode on stack
  5651. btst &0x7, FTEMP_EX(%a0) # make "internal" format
  5652. sne FTEMP_SGN(%a0)
  5653. mov.w FTEMP_EX(%a0), %d1 # extract exponent
  5654. and.w &0x7fff, %d1
  5655. sub.w %d0, %d1
  5656. mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
  5657. mov.l %a0, -(%sp) # save operand ptr during calls
  5658. mov.l 0x4(%sp),%d0 # pass rnd prec.
  5659. andi.w &0x00c0,%d0
  5660. lsr.w &0x4,%d0
  5661. bsr.l _denorm # denorm result
  5662. mov.l (%sp),%a0
  5663. mov.w 0x6(%sp),%d1 # load prec:mode into %d1
  5664. andi.w &0xc0,%d1 # extract rnd prec
  5665. lsr.w &0x4,%d1
  5666. swap %d1
  5667. mov.w 0x6(%sp),%d1
  5668. andi.w &0x30,%d1
  5669. lsr.w &0x4,%d1
  5670. bsr.l _round # round the denorm
  5671. mov.l (%sp)+, %a0
  5672. # result is now rounded properly. convert back to normal format
  5673. bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
  5674. tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
  5675. beq.b unf_res_chkifzero # no; result is positive
  5676. bset &0x7, FTEMP_EX(%a0) # set result sgn
  5677. clr.b FTEMP_SGN(%a0) # clear temp sign
  5678. # the number may have become zero after rounding. set ccodes accordingly.
  5679. unf_res_chkifzero:
  5680. clr.l %d0
  5681. tst.l FTEMP_HI(%a0) # is value now a zero?
  5682. bne.b unf_res_cont # no
  5683. tst.l FTEMP_LO(%a0)
  5684. bne.b unf_res_cont # no
  5685. # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
  5686. bset &z_bit, %d0 # yes; set zero ccode bit
  5687. unf_res_cont:
  5688. #
  5689. # can inex1 also be set along with unfl and inex2???
  5690. #
  5691. # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
  5692. #
  5693. btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
  5694. beq.b unf_res_end # no
  5695. bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
  5696. unf_res_end:
  5697. add.l &0x4, %sp # clear stack
  5698. rts
  5699. # unf_res() for fsglmul() and fsgldiv().
  5700. global unf_res4
  5701. unf_res4:
  5702. mov.l %d1,-(%sp) # save rnd prec,mode on stack
  5703. btst &0x7,FTEMP_EX(%a0) # make "internal" format
  5704. sne FTEMP_SGN(%a0)
  5705. mov.w FTEMP_EX(%a0),%d1 # extract exponent
  5706. and.w &0x7fff,%d1
  5707. sub.w %d0,%d1
  5708. mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
  5709. mov.l %a0,-(%sp) # save operand ptr during calls
  5710. clr.l %d0 # force rnd prec = ext
  5711. bsr.l _denorm # denorm result
  5712. mov.l (%sp),%a0
  5713. mov.w &s_mode,%d1 # force rnd prec = sgl
  5714. swap %d1
  5715. mov.w 0x6(%sp),%d1 # load rnd mode
  5716. andi.w &0x30,%d1 # extract rnd prec
  5717. lsr.w &0x4,%d1
  5718. bsr.l _round # round the denorm
  5719. mov.l (%sp)+,%a0
  5720. # result is now rounded properly. convert back to normal format
  5721. bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
  5722. tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
  5723. beq.b unf_res4_chkifzero # no; result is positive
  5724. bset &0x7,FTEMP_EX(%a0) # set result sgn
  5725. clr.b FTEMP_SGN(%a0) # clear temp sign
  5726. # the number may have become zero after rounding. set ccodes accordingly.
  5727. unf_res4_chkifzero:
  5728. clr.l %d0
  5729. tst.l FTEMP_HI(%a0) # is value now a zero?
  5730. bne.b unf_res4_cont # no
  5731. tst.l FTEMP_LO(%a0)
  5732. bne.b unf_res4_cont # no
  5733. # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
  5734. bset &z_bit,%d0 # yes; set zero ccode bit
  5735. unf_res4_cont:
  5736. #
  5737. # can inex1 also be set along with unfl and inex2???
  5738. #
  5739. # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
  5740. #
  5741. btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  5742. beq.b unf_res4_end # no
  5743. bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
  5744. unf_res4_end:
  5745. add.l &0x4,%sp # clear stack
  5746. rts
  5747. #########################################################################
  5748. # XDEF **************************************************************** #
  5749. # ovf_res(): routine to produce the default overflow result of #
  5750. # an overflowing number. #
  5751. # ovf_res2(): same as above but the rnd mode/prec are passed #
  5752. # differently. #
  5753. # #
  5754. # XREF **************************************************************** #
  5755. # none #
  5756. # #
  5757. # INPUT *************************************************************** #
  5758. # d1.b = '-1' => (-); '0' => (+) #
  5759. # ovf_res(): #
  5760. # d0 = rnd mode/prec #
  5761. # ovf_res2(): #
  5762. # hi(d0) = rnd prec #
  5763. # lo(d0) = rnd mode #
  5764. # #
  5765. # OUTPUT ************************************************************** #
  5766. # a0 = points to extended precision result #
  5767. # d0.b = condition code bits #
  5768. # #
  5769. # ALGORITHM *********************************************************** #
  5770. # The default overflow result can be determined by the sign of #
  5771. # the result and the rounding mode/prec in effect. These bits are #
  5772. # concatenated together to create an index into the default result #
  5773. # table. A pointer to the correct result is returned in a0. The #
  5774. # resulting condition codes are returned in d0 in case the caller #
  5775. # doesn't want FPSR_cc altered (as is the case for fmove out). #
  5776. # #
  5777. #########################################################################
  5778. global ovf_res
  5779. ovf_res:
  5780. andi.w &0x10,%d1 # keep result sign
  5781. lsr.b &0x4,%d0 # shift prec/mode
  5782. or.b %d0,%d1 # concat the two
  5783. mov.w %d1,%d0 # make a copy
  5784. lsl.b &0x1,%d1 # multiply d1 by 2
  5785. bra.b ovf_res_load
  5786. global ovf_res2
  5787. ovf_res2:
  5788. and.w &0x10, %d1 # keep result sign
  5789. or.b %d0, %d1 # insert rnd mode
  5790. swap %d0
  5791. or.b %d0, %d1 # insert rnd prec
  5792. mov.w %d1, %d0 # make a copy
  5793. lsl.b &0x1, %d1 # shift left by 1
  5794. #
  5795. # use the rounding mode, precision, and result sign as in index into the
  5796. # two tables below to fetch the default result and the result ccodes.
  5797. #
  5798. ovf_res_load:
  5799. mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
  5800. lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
  5801. rts
  5802. tbl_ovfl_cc:
  5803. byte 0x2, 0x0, 0x0, 0x2
  5804. byte 0x2, 0x0, 0x0, 0x2
  5805. byte 0x2, 0x0, 0x0, 0x2
  5806. byte 0x0, 0x0, 0x0, 0x0
  5807. byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
  5808. byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
  5809. byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
  5810. tbl_ovfl_result:
  5811. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  5812. long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
  5813. long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
  5814. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  5815. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  5816. long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
  5817. long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
  5818. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  5819. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  5820. long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
  5821. long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
  5822. long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  5823. long 0x00000000,0x00000000,0x00000000,0x00000000
  5824. long 0x00000000,0x00000000,0x00000000,0x00000000
  5825. long 0x00000000,0x00000000,0x00000000,0x00000000
  5826. long 0x00000000,0x00000000,0x00000000,0x00000000
  5827. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  5828. long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
  5829. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  5830. long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
  5831. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  5832. long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
  5833. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  5834. long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
  5835. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  5836. long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
  5837. long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  5838. long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
  5839. #########################################################################
  5840. # XDEF **************************************************************** #
  5841. # fout(): move from fp register to memory or data register #
  5842. # #
  5843. # XREF **************************************************************** #
  5844. # _round() - needed to create EXOP for sgl/dbl precision #
  5845. # norm() - needed to create EXOP for extended precision #
  5846. # ovf_res() - create default overflow result for sgl/dbl precision#
  5847. # unf_res() - create default underflow result for sgl/dbl prec. #
  5848. # dst_dbl() - create rounded dbl precision result. #
  5849. # dst_sgl() - create rounded sgl precision result. #
  5850. # fetch_dreg() - fetch dynamic k-factor reg for packed. #
  5851. # bindec() - convert FP binary number to packed number. #
  5852. # _mem_write() - write data to memory. #
  5853. # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
  5854. # _dmem_write_{byte,word,long}() - write data to memory. #
  5855. # store_dreg_{b,w,l}() - store data to data register file. #
  5856. # facc_out_{b,w,l,d,x}() - data access error occurred. #
  5857. # #
  5858. # INPUT *************************************************************** #
  5859. # a0 = pointer to extended precision source operand #
  5860. # d0 = round prec,mode #
  5861. # #
  5862. # OUTPUT ************************************************************** #
  5863. # fp0 : intermediate underflow or overflow result if #
  5864. # OVFL/UNFL occurred for a sgl or dbl operand #
  5865. # #
  5866. # ALGORITHM *********************************************************** #
  5867. # This routine is accessed by many handlers that need to do an #
  5868. # opclass three move of an operand out to memory. #
  5869. # Decode an fmove out (opclass 3) instruction to determine if #
  5870. # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
  5871. # register or memory. The algorithm uses a standard "fmove" to create #
  5872. # the rounded result. Also, since exceptions are disabled, this also #
  5873. # create the correct OPERR default result if appropriate. #
  5874. # For sgl or dbl precision, overflow or underflow can occur. If #
  5875. # either occurs and is enabled, the EXOP. #
  5876. # For extended precision, the stacked <ea> must be fixed along #
  5877. # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
  5878. # the source is a denorm and if underflow is enabled, an EXOP must be #
  5879. # created. #
  5880. # For packed, the k-factor must be fetched from the instruction #
  5881. # word or a data register. The <ea> must be fixed as w/ extended #
  5882. # precision. Then, bindec() is called to create the appropriate #
  5883. # packed result. #
  5884. # If at any time an access error is flagged by one of the move- #
  5885. # to-memory routines, then a special exit must be made so that the #
  5886. # access error can be handled properly. #
  5887. # #
  5888. #########################################################################
  5889. global fout
  5890. fout:
  5891. bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
  5892. mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
  5893. jmp (tbl_fout.b,%pc,%a1) # jump to routine
  5894. swbeg &0x8
  5895. tbl_fout:
  5896. short fout_long - tbl_fout
  5897. short fout_sgl - tbl_fout
  5898. short fout_ext - tbl_fout
  5899. short fout_pack - tbl_fout
  5900. short fout_word - tbl_fout
  5901. short fout_dbl - tbl_fout
  5902. short fout_byte - tbl_fout
  5903. short fout_pack - tbl_fout
  5904. #################################################################
  5905. # fmove.b out ###################################################
  5906. #################################################################
  5907. # Only "Unimplemented Data Type" exceptions enter here. The operand
  5908. # is either a DENORM or a NORM.
  5909. fout_byte:
  5910. tst.b STAG(%a6) # is operand normalized?
  5911. bne.b fout_byte_denorm # no
  5912. fmovm.x SRC(%a0),&0x80 # load value
  5913. fout_byte_norm:
  5914. fmov.l %d0,%fpcr # insert rnd prec,mode
  5915. fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
  5916. fmov.l &0x0,%fpcr # clear FPCR
  5917. fmov.l %fpsr,%d1 # fetch FPSR
  5918. or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
  5919. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  5920. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  5921. beq.b fout_byte_dn # must save to integer regfile
  5922. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  5923. bsr.l _dmem_write_byte # write byte
  5924. tst.l %d1 # did dstore fail?
  5925. bne.l facc_out_b # yes
  5926. rts
  5927. fout_byte_dn:
  5928. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  5929. andi.w &0x7,%d1
  5930. bsr.l store_dreg_b
  5931. rts
  5932. fout_byte_denorm:
  5933. mov.l SRC_EX(%a0),%d1
  5934. andi.l &0x80000000,%d1 # keep DENORM sign
  5935. ori.l &0x00800000,%d1 # make smallest sgl
  5936. fmov.s %d1,%fp0
  5937. bra.b fout_byte_norm
  5938. #################################################################
  5939. # fmove.w out ###################################################
  5940. #################################################################
  5941. # Only "Unimplemented Data Type" exceptions enter here. The operand
  5942. # is either a DENORM or a NORM.
  5943. fout_word:
  5944. tst.b STAG(%a6) # is operand normalized?
  5945. bne.b fout_word_denorm # no
  5946. fmovm.x SRC(%a0),&0x80 # load value
  5947. fout_word_norm:
  5948. fmov.l %d0,%fpcr # insert rnd prec:mode
  5949. fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
  5950. fmov.l &0x0,%fpcr # clear FPCR
  5951. fmov.l %fpsr,%d1 # fetch FPSR
  5952. or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
  5953. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  5954. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  5955. beq.b fout_word_dn # must save to integer regfile
  5956. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  5957. bsr.l _dmem_write_word # write word
  5958. tst.l %d1 # did dstore fail?
  5959. bne.l facc_out_w # yes
  5960. rts
  5961. fout_word_dn:
  5962. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  5963. andi.w &0x7,%d1
  5964. bsr.l store_dreg_w
  5965. rts
  5966. fout_word_denorm:
  5967. mov.l SRC_EX(%a0),%d1
  5968. andi.l &0x80000000,%d1 # keep DENORM sign
  5969. ori.l &0x00800000,%d1 # make smallest sgl
  5970. fmov.s %d1,%fp0
  5971. bra.b fout_word_norm
  5972. #################################################################
  5973. # fmove.l out ###################################################
  5974. #################################################################
  5975. # Only "Unimplemented Data Type" exceptions enter here. The operand
  5976. # is either a DENORM or a NORM.
  5977. fout_long:
  5978. tst.b STAG(%a6) # is operand normalized?
  5979. bne.b fout_long_denorm # no
  5980. fmovm.x SRC(%a0),&0x80 # load value
  5981. fout_long_norm:
  5982. fmov.l %d0,%fpcr # insert rnd prec:mode
  5983. fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
  5984. fmov.l &0x0,%fpcr # clear FPCR
  5985. fmov.l %fpsr,%d1 # fetch FPSR
  5986. or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
  5987. fout_long_write:
  5988. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  5989. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  5990. beq.b fout_long_dn # must save to integer regfile
  5991. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  5992. bsr.l _dmem_write_long # write long
  5993. tst.l %d1 # did dstore fail?
  5994. bne.l facc_out_l # yes
  5995. rts
  5996. fout_long_dn:
  5997. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  5998. andi.w &0x7,%d1
  5999. bsr.l store_dreg_l
  6000. rts
  6001. fout_long_denorm:
  6002. mov.l SRC_EX(%a0),%d1
  6003. andi.l &0x80000000,%d1 # keep DENORM sign
  6004. ori.l &0x00800000,%d1 # make smallest sgl
  6005. fmov.s %d1,%fp0
  6006. bra.b fout_long_norm
  6007. #################################################################
  6008. # fmove.x out ###################################################
  6009. #################################################################
  6010. # Only "Unimplemented Data Type" exceptions enter here. The operand
  6011. # is either a DENORM or a NORM.
  6012. # The DENORM causes an Underflow exception.
  6013. fout_ext:
  6014. # we copy the extended precision result to FP_SCR0 so that the reserved
  6015. # 16-bit field gets zeroed. we do this since we promise not to disturb
  6016. # what's at SRC(a0).
  6017. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6018. clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
  6019. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6020. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6021. fmovm.x SRC(%a0),&0x80 # return result
  6022. bsr.l _calc_ea_fout # fix stacked <ea>
  6023. mov.l %a0,%a1 # pass: dst addr
  6024. lea FP_SCR0(%a6),%a0 # pass: src addr
  6025. mov.l &0xc,%d0 # pass: opsize is 12 bytes
  6026. # we must not yet write the extended precision data to the stack
  6027. # in the pre-decrement case from supervisor mode or else we'll corrupt
  6028. # the stack frame. so, leave it in FP_SRC for now and deal with it later...
  6029. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  6030. beq.b fout_ext_a7
  6031. bsr.l _dmem_write # write ext prec number to memory
  6032. tst.l %d1 # did dstore fail?
  6033. bne.w fout_ext_err # yes
  6034. tst.b STAG(%a6) # is operand normalized?
  6035. bne.b fout_ext_denorm # no
  6036. rts
  6037. # the number is a DENORM. must set the underflow exception bit
  6038. fout_ext_denorm:
  6039. bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
  6040. mov.b FPCR_ENABLE(%a6),%d0
  6041. andi.b &0x0a,%d0 # is UNFL or INEX enabled?
  6042. bne.b fout_ext_exc # yes
  6043. rts
  6044. # we don't want to do the write if the exception occurred in supervisor mode
  6045. # so _mem_write2() handles this for us.
  6046. fout_ext_a7:
  6047. bsr.l _mem_write2 # write ext prec number to memory
  6048. tst.l %d1 # did dstore fail?
  6049. bne.w fout_ext_err # yes
  6050. tst.b STAG(%a6) # is operand normalized?
  6051. bne.b fout_ext_denorm # no
  6052. rts
  6053. fout_ext_exc:
  6054. lea FP_SCR0(%a6),%a0
  6055. bsr.l norm # normalize the mantissa
  6056. neg.w %d0 # new exp = -(shft amt)
  6057. andi.w &0x7fff,%d0
  6058. andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
  6059. or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
  6060. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  6061. rts
  6062. fout_ext_err:
  6063. mov.l EXC_A6(%a6),(%a6) # fix stacked a6
  6064. bra.l facc_out_x
  6065. #########################################################################
  6066. # fmove.s out ###########################################################
  6067. #########################################################################
  6068. fout_sgl:
  6069. andi.b &0x30,%d0 # clear rnd prec
  6070. ori.b &s_mode*0x10,%d0 # insert sgl prec
  6071. mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
  6072. #
  6073. # operand is a normalized number. first, we check to see if the move out
  6074. # would cause either an underflow or overflow. these cases are handled
  6075. # separately. otherwise, set the FPCR to the proper rounding mode and
  6076. # execute the move.
  6077. #
  6078. mov.w SRC_EX(%a0),%d0 # extract exponent
  6079. andi.w &0x7fff,%d0 # strip sign
  6080. cmpi.w %d0,&SGL_HI # will operand overflow?
  6081. bgt.w fout_sgl_ovfl # yes; go handle OVFL
  6082. beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
  6083. cmpi.w %d0,&SGL_LO # will operand underflow?
  6084. blt.w fout_sgl_unfl # yes; go handle underflow
  6085. #
  6086. # NORMs(in range) can be stored out by a simple "fmov.s"
  6087. # Unnormalized inputs can come through this point.
  6088. #
  6089. fout_sgl_exg:
  6090. fmovm.x SRC(%a0),&0x80 # fetch fop from stack
  6091. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6092. fmov.l &0x0,%fpsr # clear FPSR
  6093. fmov.s %fp0,%d0 # store does convert and round
  6094. fmov.l &0x0,%fpcr # clear FPCR
  6095. fmov.l %fpsr,%d1 # save FPSR
  6096. or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
  6097. fout_sgl_exg_write:
  6098. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  6099. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  6100. beq.b fout_sgl_exg_write_dn # must save to integer regfile
  6101. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  6102. bsr.l _dmem_write_long # write long
  6103. tst.l %d1 # did dstore fail?
  6104. bne.l facc_out_l # yes
  6105. rts
  6106. fout_sgl_exg_write_dn:
  6107. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  6108. andi.w &0x7,%d1
  6109. bsr.l store_dreg_l
  6110. rts
  6111. #
  6112. # here, we know that the operand would UNFL if moved out to single prec,
  6113. # so, denorm and round and then use generic store single routine to
  6114. # write the value to memory.
  6115. #
  6116. fout_sgl_unfl:
  6117. bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
  6118. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6119. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6120. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6121. mov.l %a0,-(%sp)
  6122. clr.l %d0 # pass: S.F. = 0
  6123. cmpi.b STAG(%a6),&DENORM # fetch src optype tag
  6124. bne.b fout_sgl_unfl_cont # let DENORMs fall through
  6125. lea FP_SCR0(%a6),%a0
  6126. bsr.l norm # normalize the DENORM
  6127. fout_sgl_unfl_cont:
  6128. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  6129. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  6130. bsr.l unf_res # calc default underflow result
  6131. lea FP_SCR0(%a6),%a0 # pass: ptr to fop
  6132. bsr.l dst_sgl # convert to single prec
  6133. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  6134. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  6135. beq.b fout_sgl_unfl_dn # must save to integer regfile
  6136. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  6137. bsr.l _dmem_write_long # write long
  6138. tst.l %d1 # did dstore fail?
  6139. bne.l facc_out_l # yes
  6140. bra.b fout_sgl_unfl_chkexc
  6141. fout_sgl_unfl_dn:
  6142. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  6143. andi.w &0x7,%d1
  6144. bsr.l store_dreg_l
  6145. fout_sgl_unfl_chkexc:
  6146. mov.b FPCR_ENABLE(%a6),%d1
  6147. andi.b &0x0a,%d1 # is UNFL or INEX enabled?
  6148. bne.w fout_sd_exc_unfl # yes
  6149. addq.l &0x4,%sp
  6150. rts
  6151. #
  6152. # it's definitely an overflow so call ovf_res to get the correct answer
  6153. #
  6154. fout_sgl_ovfl:
  6155. tst.b 3+SRC_HI(%a0) # is result inexact?
  6156. bne.b fout_sgl_ovfl_inex2
  6157. tst.l SRC_LO(%a0) # is result inexact?
  6158. bne.b fout_sgl_ovfl_inex2
  6159. ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  6160. bra.b fout_sgl_ovfl_cont
  6161. fout_sgl_ovfl_inex2:
  6162. ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
  6163. fout_sgl_ovfl_cont:
  6164. mov.l %a0,-(%sp)
  6165. # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
  6166. # overflow result. DON'T save the returned ccodes from ovf_res() since
  6167. # fmove out doesn't alter them.
  6168. tst.b SRC_EX(%a0) # is operand negative?
  6169. smi %d1 # set if so
  6170. mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
  6171. bsr.l ovf_res # calc OVFL result
  6172. fmovm.x (%a0),&0x80 # load default overflow result
  6173. fmov.s %fp0,%d0 # store to single
  6174. mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
  6175. andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
  6176. beq.b fout_sgl_ovfl_dn # must save to integer regfile
  6177. mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
  6178. bsr.l _dmem_write_long # write long
  6179. tst.l %d1 # did dstore fail?
  6180. bne.l facc_out_l # yes
  6181. bra.b fout_sgl_ovfl_chkexc
  6182. fout_sgl_ovfl_dn:
  6183. mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
  6184. andi.w &0x7,%d1
  6185. bsr.l store_dreg_l
  6186. fout_sgl_ovfl_chkexc:
  6187. mov.b FPCR_ENABLE(%a6),%d1
  6188. andi.b &0x0a,%d1 # is UNFL or INEX enabled?
  6189. bne.w fout_sd_exc_ovfl # yes
  6190. addq.l &0x4,%sp
  6191. rts
  6192. #
  6193. # move out MAY overflow:
  6194. # (1) force the exp to 0x3fff
  6195. # (2) do a move w/ appropriate rnd mode
  6196. # (3) if exp still equals zero, then insert original exponent
  6197. # for the correct result.
  6198. # if exp now equals one, then it overflowed so call ovf_res.
  6199. #
  6200. fout_sgl_may_ovfl:
  6201. mov.w SRC_EX(%a0),%d1 # fetch current sign
  6202. andi.w &0x8000,%d1 # keep it,clear exp
  6203. ori.w &0x3fff,%d1 # insert exp = 0
  6204. mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
  6205. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
  6206. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
  6207. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6208. fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
  6209. fmov.l &0x0,%fpcr # clear FPCR
  6210. fabs.x %fp0 # need absolute value
  6211. fcmp.b %fp0,&0x2 # did exponent increase?
  6212. fblt.w fout_sgl_exg # no; go finish NORM
  6213. bra.w fout_sgl_ovfl # yes; go handle overflow
  6214. ################
  6215. fout_sd_exc_unfl:
  6216. mov.l (%sp)+,%a0
  6217. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6218. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6219. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6220. cmpi.b STAG(%a6),&DENORM # was src a DENORM?
  6221. bne.b fout_sd_exc_cont # no
  6222. lea FP_SCR0(%a6),%a0
  6223. bsr.l norm
  6224. neg.l %d0
  6225. andi.w &0x7fff,%d0
  6226. bfins %d0,FP_SCR0_EX(%a6){&1:&15}
  6227. bra.b fout_sd_exc_cont
  6228. fout_sd_exc:
  6229. fout_sd_exc_ovfl:
  6230. mov.l (%sp)+,%a0 # restore a0
  6231. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6232. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6233. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6234. fout_sd_exc_cont:
  6235. bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
  6236. sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
  6237. lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
  6238. mov.b 3+L_SCR3(%a6),%d1
  6239. lsr.b &0x4,%d1
  6240. andi.w &0x0c,%d1
  6241. swap %d1
  6242. mov.b 3+L_SCR3(%a6),%d1
  6243. lsr.b &0x4,%d1
  6244. andi.w &0x03,%d1
  6245. clr.l %d0 # pass: zero g,r,s
  6246. bsr.l _round # round the DENORM
  6247. tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
  6248. beq.b fout_sd_exc_done # no
  6249. bset &0x7,FP_SCR0_EX(%a6) # yes
  6250. fout_sd_exc_done:
  6251. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  6252. rts
  6253. #################################################################
  6254. # fmove.d out ###################################################
  6255. #################################################################
  6256. fout_dbl:
  6257. andi.b &0x30,%d0 # clear rnd prec
  6258. ori.b &d_mode*0x10,%d0 # insert dbl prec
  6259. mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
  6260. #
  6261. # operand is a normalized number. first, we check to see if the move out
  6262. # would cause either an underflow or overflow. these cases are handled
  6263. # separately. otherwise, set the FPCR to the proper rounding mode and
  6264. # execute the move.
  6265. #
  6266. mov.w SRC_EX(%a0),%d0 # extract exponent
  6267. andi.w &0x7fff,%d0 # strip sign
  6268. cmpi.w %d0,&DBL_HI # will operand overflow?
  6269. bgt.w fout_dbl_ovfl # yes; go handle OVFL
  6270. beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
  6271. cmpi.w %d0,&DBL_LO # will operand underflow?
  6272. blt.w fout_dbl_unfl # yes; go handle underflow
  6273. #
  6274. # NORMs(in range) can be stored out by a simple "fmov.d"
  6275. # Unnormalized inputs can come through this point.
  6276. #
  6277. fout_dbl_exg:
  6278. fmovm.x SRC(%a0),&0x80 # fetch fop from stack
  6279. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6280. fmov.l &0x0,%fpsr # clear FPSR
  6281. fmov.d %fp0,L_SCR1(%a6) # store does convert and round
  6282. fmov.l &0x0,%fpcr # clear FPCR
  6283. fmov.l %fpsr,%d0 # save FPSR
  6284. or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
  6285. mov.l EXC_EA(%a6),%a1 # pass: dst addr
  6286. lea L_SCR1(%a6),%a0 # pass: src addr
  6287. movq.l &0x8,%d0 # pass: opsize is 8 bytes
  6288. bsr.l _dmem_write # store dbl fop to memory
  6289. tst.l %d1 # did dstore fail?
  6290. bne.l facc_out_d # yes
  6291. rts # no; so we're finished
  6292. #
  6293. # here, we know that the operand would UNFL if moved out to double prec,
  6294. # so, denorm and round and then use generic store double routine to
  6295. # write the value to memory.
  6296. #
  6297. fout_dbl_unfl:
  6298. bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
  6299. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6300. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6301. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6302. mov.l %a0,-(%sp)
  6303. clr.l %d0 # pass: S.F. = 0
  6304. cmpi.b STAG(%a6),&DENORM # fetch src optype tag
  6305. bne.b fout_dbl_unfl_cont # let DENORMs fall through
  6306. lea FP_SCR0(%a6),%a0
  6307. bsr.l norm # normalize the DENORM
  6308. fout_dbl_unfl_cont:
  6309. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  6310. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  6311. bsr.l unf_res # calc default underflow result
  6312. lea FP_SCR0(%a6),%a0 # pass: ptr to fop
  6313. bsr.l dst_dbl # convert to single prec
  6314. mov.l %d0,L_SCR1(%a6)
  6315. mov.l %d1,L_SCR2(%a6)
  6316. mov.l EXC_EA(%a6),%a1 # pass: dst addr
  6317. lea L_SCR1(%a6),%a0 # pass: src addr
  6318. movq.l &0x8,%d0 # pass: opsize is 8 bytes
  6319. bsr.l _dmem_write # store dbl fop to memory
  6320. tst.l %d1 # did dstore fail?
  6321. bne.l facc_out_d # yes
  6322. mov.b FPCR_ENABLE(%a6),%d1
  6323. andi.b &0x0a,%d1 # is UNFL or INEX enabled?
  6324. bne.w fout_sd_exc_unfl # yes
  6325. addq.l &0x4,%sp
  6326. rts
  6327. #
  6328. # it's definitely an overflow so call ovf_res to get the correct answer
  6329. #
  6330. fout_dbl_ovfl:
  6331. mov.w 2+SRC_LO(%a0),%d0
  6332. andi.w &0x7ff,%d0
  6333. bne.b fout_dbl_ovfl_inex2
  6334. ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  6335. bra.b fout_dbl_ovfl_cont
  6336. fout_dbl_ovfl_inex2:
  6337. ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
  6338. fout_dbl_ovfl_cont:
  6339. mov.l %a0,-(%sp)
  6340. # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
  6341. # overflow result. DON'T save the returned ccodes from ovf_res() since
  6342. # fmove out doesn't alter them.
  6343. tst.b SRC_EX(%a0) # is operand negative?
  6344. smi %d1 # set if so
  6345. mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
  6346. bsr.l ovf_res # calc OVFL result
  6347. fmovm.x (%a0),&0x80 # load default overflow result
  6348. fmov.d %fp0,L_SCR1(%a6) # store to double
  6349. mov.l EXC_EA(%a6),%a1 # pass: dst addr
  6350. lea L_SCR1(%a6),%a0 # pass: src addr
  6351. movq.l &0x8,%d0 # pass: opsize is 8 bytes
  6352. bsr.l _dmem_write # store dbl fop to memory
  6353. tst.l %d1 # did dstore fail?
  6354. bne.l facc_out_d # yes
  6355. mov.b FPCR_ENABLE(%a6),%d1
  6356. andi.b &0x0a,%d1 # is UNFL or INEX enabled?
  6357. bne.w fout_sd_exc_ovfl # yes
  6358. addq.l &0x4,%sp
  6359. rts
  6360. #
  6361. # move out MAY overflow:
  6362. # (1) force the exp to 0x3fff
  6363. # (2) do a move w/ appropriate rnd mode
  6364. # (3) if exp still equals zero, then insert original exponent
  6365. # for the correct result.
  6366. # if exp now equals one, then it overflowed so call ovf_res.
  6367. #
  6368. fout_dbl_may_ovfl:
  6369. mov.w SRC_EX(%a0),%d1 # fetch current sign
  6370. andi.w &0x8000,%d1 # keep it,clear exp
  6371. ori.w &0x3fff,%d1 # insert exp = 0
  6372. mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
  6373. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
  6374. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
  6375. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6376. fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
  6377. fmov.l &0x0,%fpcr # clear FPCR
  6378. fabs.x %fp0 # need absolute value
  6379. fcmp.b %fp0,&0x2 # did exponent increase?
  6380. fblt.w fout_dbl_exg # no; go finish NORM
  6381. bra.w fout_dbl_ovfl # yes; go handle overflow
  6382. #########################################################################
  6383. # XDEF **************************************************************** #
  6384. # dst_dbl(): create double precision value from extended prec. #
  6385. # #
  6386. # XREF **************************************************************** #
  6387. # None #
  6388. # #
  6389. # INPUT *************************************************************** #
  6390. # a0 = pointer to source operand in extended precision #
  6391. # #
  6392. # OUTPUT ************************************************************** #
  6393. # d0 = hi(double precision result) #
  6394. # d1 = lo(double precision result) #
  6395. # #
  6396. # ALGORITHM *********************************************************** #
  6397. # #
  6398. # Changes extended precision to double precision. #
  6399. # Note: no attempt is made to round the extended value to double. #
  6400. # dbl_sign = ext_sign #
  6401. # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
  6402. # get rid of ext integer bit #
  6403. # dbl_mant = ext_mant{62:12} #
  6404. # #
  6405. # --------------- --------------- --------------- #
  6406. # extended -> |s| exp | |1| ms mant | | ls mant | #
  6407. # --------------- --------------- --------------- #
  6408. # 95 64 63 62 32 31 11 0 #
  6409. # | | #
  6410. # | | #
  6411. # | | #
  6412. # v v #
  6413. # --------------- --------------- #
  6414. # double -> |s|exp| mant | | mant | #
  6415. # --------------- --------------- #
  6416. # 63 51 32 31 0 #
  6417. # #
  6418. #########################################################################
  6419. dst_dbl:
  6420. clr.l %d0 # clear d0
  6421. mov.w FTEMP_EX(%a0),%d0 # get exponent
  6422. subi.w &EXT_BIAS,%d0 # subtract extended precision bias
  6423. addi.w &DBL_BIAS,%d0 # add double precision bias
  6424. tst.b FTEMP_HI(%a0) # is number a denorm?
  6425. bmi.b dst_get_dupper # no
  6426. subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
  6427. dst_get_dupper:
  6428. swap %d0 # d0 now in upper word
  6429. lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
  6430. tst.b FTEMP_EX(%a0) # test sign
  6431. bpl.b dst_get_dman # if postive, go process mantissa
  6432. bset &0x1f,%d0 # if negative, set sign
  6433. dst_get_dman:
  6434. mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
  6435. bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
  6436. or.l %d1,%d0 # put these bits in ms word of double
  6437. mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
  6438. mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
  6439. mov.l &21,%d0 # load shift count
  6440. lsl.l %d0,%d1 # put lower 11 bits in upper bits
  6441. mov.l %d1,L_SCR2(%a6) # build lower lword in memory
  6442. mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
  6443. bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
  6444. mov.l L_SCR2(%a6),%d1
  6445. or.l %d0,%d1 # put them in double result
  6446. mov.l L_SCR1(%a6),%d0
  6447. rts
  6448. #########################################################################
  6449. # XDEF **************************************************************** #
  6450. # dst_sgl(): create single precision value from extended prec #
  6451. # #
  6452. # XREF **************************************************************** #
  6453. # #
  6454. # INPUT *************************************************************** #
  6455. # a0 = pointer to source operand in extended precision #
  6456. # #
  6457. # OUTPUT ************************************************************** #
  6458. # d0 = single precision result #
  6459. # #
  6460. # ALGORITHM *********************************************************** #
  6461. # #
  6462. # Changes extended precision to single precision. #
  6463. # sgl_sign = ext_sign #
  6464. # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
  6465. # get rid of ext integer bit #
  6466. # sgl_mant = ext_mant{62:12} #
  6467. # #
  6468. # --------------- --------------- --------------- #
  6469. # extended -> |s| exp | |1| ms mant | | ls mant | #
  6470. # --------------- --------------- --------------- #
  6471. # 95 64 63 62 40 32 31 12 0 #
  6472. # | | #
  6473. # | | #
  6474. # | | #
  6475. # v v #
  6476. # --------------- #
  6477. # single -> |s|exp| mant | #
  6478. # --------------- #
  6479. # 31 22 0 #
  6480. # #
  6481. #########################################################################
  6482. dst_sgl:
  6483. clr.l %d0
  6484. mov.w FTEMP_EX(%a0),%d0 # get exponent
  6485. subi.w &EXT_BIAS,%d0 # subtract extended precision bias
  6486. addi.w &SGL_BIAS,%d0 # add single precision bias
  6487. tst.b FTEMP_HI(%a0) # is number a denorm?
  6488. bmi.b dst_get_supper # no
  6489. subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
  6490. dst_get_supper:
  6491. swap %d0 # put exp in upper word of d0
  6492. lsl.l &0x7,%d0 # shift it into single exp bits
  6493. tst.b FTEMP_EX(%a0) # test sign
  6494. bpl.b dst_get_sman # if positive, continue
  6495. bset &0x1f,%d0 # if negative, put in sign first
  6496. dst_get_sman:
  6497. mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
  6498. andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
  6499. lsr.l &0x8,%d1 # and put them flush right
  6500. or.l %d1,%d0 # put these bits in ms word of single
  6501. rts
  6502. ##############################################################################
  6503. fout_pack:
  6504. bsr.l _calc_ea_fout # fetch the <ea>
  6505. mov.l %a0,-(%sp)
  6506. mov.b STAG(%a6),%d0 # fetch input type
  6507. bne.w fout_pack_not_norm # input is not NORM
  6508. fout_pack_norm:
  6509. btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
  6510. beq.b fout_pack_s # static
  6511. fout_pack_d:
  6512. mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
  6513. lsr.b &0x4,%d1
  6514. andi.w &0x7,%d1
  6515. bsr.l fetch_dreg # fetch Dn w/ k-factor
  6516. bra.b fout_pack_type
  6517. fout_pack_s:
  6518. mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
  6519. fout_pack_type:
  6520. bfexts %d0{&25:&7},%d0 # extract k-factor
  6521. mov.l %d0,-(%sp)
  6522. lea FP_SRC(%a6),%a0 # pass: ptr to input
  6523. # bindec is currently scrambling FP_SRC for denorm inputs.
  6524. # we'll have to change this, but for now, tough luck!!!
  6525. bsr.l bindec # convert xprec to packed
  6526. # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
  6527. andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
  6528. mov.l (%sp)+,%d0
  6529. tst.b 3+FP_SCR0_EX(%a6)
  6530. bne.b fout_pack_set
  6531. tst.l FP_SCR0_HI(%a6)
  6532. bne.b fout_pack_set
  6533. tst.l FP_SCR0_LO(%a6)
  6534. bne.b fout_pack_set
  6535. # add the extra condition that only if the k-factor was zero, too, should
  6536. # we zero the exponent
  6537. tst.l %d0
  6538. bne.b fout_pack_set
  6539. # "mantissa" is all zero which means that the answer is zero. but, the '040
  6540. # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
  6541. # if the mantissa is zero, I will zero the exponent, too.
  6542. # the question now is whether the exponents sign bit is allowed to be non-zero
  6543. # for a zero, also...
  6544. andi.w &0xf000,FP_SCR0(%a6)
  6545. fout_pack_set:
  6546. lea FP_SCR0(%a6),%a0 # pass: src addr
  6547. fout_pack_write:
  6548. mov.l (%sp)+,%a1 # pass: dst addr
  6549. mov.l &0xc,%d0 # pass: opsize is 12 bytes
  6550. cmpi.b SPCOND_FLG(%a6),&mda7_flg
  6551. beq.b fout_pack_a7
  6552. bsr.l _dmem_write # write ext prec number to memory
  6553. tst.l %d1 # did dstore fail?
  6554. bne.w fout_ext_err # yes
  6555. rts
  6556. # we don't want to do the write if the exception occurred in supervisor mode
  6557. # so _mem_write2() handles this for us.
  6558. fout_pack_a7:
  6559. bsr.l _mem_write2 # write ext prec number to memory
  6560. tst.l %d1 # did dstore fail?
  6561. bne.w fout_ext_err # yes
  6562. rts
  6563. fout_pack_not_norm:
  6564. cmpi.b %d0,&DENORM # is it a DENORM?
  6565. beq.w fout_pack_norm # yes
  6566. lea FP_SRC(%a6),%a0
  6567. clr.w 2+FP_SRC_EX(%a6)
  6568. cmpi.b %d0,&SNAN # is it an SNAN?
  6569. beq.b fout_pack_snan # yes
  6570. bra.b fout_pack_write # no
  6571. fout_pack_snan:
  6572. ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
  6573. bset &0x6,FP_SRC_HI(%a6) # set snan bit
  6574. bra.b fout_pack_write
  6575. #########################################################################
  6576. # XDEF **************************************************************** #
  6577. # fmul(): emulates the fmul instruction #
  6578. # fsmul(): emulates the fsmul instruction #
  6579. # fdmul(): emulates the fdmul instruction #
  6580. # #
  6581. # XREF **************************************************************** #
  6582. # scale_to_zero_src() - scale src exponent to zero #
  6583. # scale_to_zero_dst() - scale dst exponent to zero #
  6584. # unf_res() - return default underflow result #
  6585. # ovf_res() - return default overflow result #
  6586. # res_qnan() - return QNAN result #
  6587. # res_snan() - return SNAN result #
  6588. # #
  6589. # INPUT *************************************************************** #
  6590. # a0 = pointer to extended precision source operand #
  6591. # a1 = pointer to extended precision destination operand #
  6592. # d0 rnd prec,mode #
  6593. # #
  6594. # OUTPUT ************************************************************** #
  6595. # fp0 = result #
  6596. # fp1 = EXOP (if exception occurred) #
  6597. # #
  6598. # ALGORITHM *********************************************************** #
  6599. # Handle NANs, infinities, and zeroes as special cases. Divide #
  6600. # norms/denorms into ext/sgl/dbl precision. #
  6601. # For norms/denorms, scale the exponents such that a multiply #
  6602. # instruction won't cause an exception. Use the regular fmul to #
  6603. # compute a result. Check if the regular operands would have taken #
  6604. # an exception. If so, return the default overflow/underflow result #
  6605. # and return the EXOP if exceptions are enabled. Else, scale the #
  6606. # result operand to the proper exponent. #
  6607. # #
  6608. #########################################################################
  6609. align 0x10
  6610. tbl_fmul_ovfl:
  6611. long 0x3fff - 0x7ffe # ext_max
  6612. long 0x3fff - 0x407e # sgl_max
  6613. long 0x3fff - 0x43fe # dbl_max
  6614. tbl_fmul_unfl:
  6615. long 0x3fff + 0x0001 # ext_unfl
  6616. long 0x3fff - 0x3f80 # sgl_unfl
  6617. long 0x3fff - 0x3c00 # dbl_unfl
  6618. global fsmul
  6619. fsmul:
  6620. andi.b &0x30,%d0 # clear rnd prec
  6621. ori.b &s_mode*0x10,%d0 # insert sgl prec
  6622. bra.b fmul
  6623. global fdmul
  6624. fdmul:
  6625. andi.b &0x30,%d0
  6626. ori.b &d_mode*0x10,%d0 # insert dbl prec
  6627. global fmul
  6628. fmul:
  6629. mov.l %d0,L_SCR3(%a6) # store rnd info
  6630. clr.w %d1
  6631. mov.b DTAG(%a6),%d1
  6632. lsl.b &0x3,%d1
  6633. or.b STAG(%a6),%d1 # combine src tags
  6634. bne.w fmul_not_norm # optimize on non-norm input
  6635. fmul_norm:
  6636. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  6637. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  6638. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  6639. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  6640. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  6641. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  6642. bsr.l scale_to_zero_src # scale src exponent
  6643. mov.l %d0,-(%sp) # save scale factor 1
  6644. bsr.l scale_to_zero_dst # scale dst exponent
  6645. add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
  6646. mov.w 2+L_SCR3(%a6),%d1 # fetch precision
  6647. lsr.b &0x6,%d1 # shift to lo bits
  6648. mov.l (%sp)+,%d0 # load S.F.
  6649. cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
  6650. beq.w fmul_may_ovfl # result may rnd to overflow
  6651. blt.w fmul_ovfl # result will overflow
  6652. cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
  6653. beq.w fmul_may_unfl # result may rnd to no unfl
  6654. bgt.w fmul_unfl # result will underflow
  6655. #
  6656. # NORMAL:
  6657. # - the result of the multiply operation will neither overflow nor underflow.
  6658. # - do the multiply to the proper precision and rounding mode.
  6659. # - scale the result exponent using the scale factor. if both operands were
  6660. # normalized then we really don't need to go through this scaling. but for now,
  6661. # this will do.
  6662. #
  6663. fmul_normal:
  6664. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  6665. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6666. fmov.l &0x0,%fpsr # clear FPSR
  6667. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6668. fmov.l %fpsr,%d1 # save status
  6669. fmov.l &0x0,%fpcr # clear FPCR
  6670. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  6671. fmul_normal_exit:
  6672. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  6673. mov.l %d2,-(%sp) # save d2
  6674. mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
  6675. mov.l %d1,%d2 # make a copy
  6676. andi.l &0x7fff,%d1 # strip sign
  6677. andi.w &0x8000,%d2 # keep old sign
  6678. sub.l %d0,%d1 # add scale factor
  6679. or.w %d2,%d1 # concat old sign,new exp
  6680. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  6681. mov.l (%sp)+,%d2 # restore d2
  6682. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  6683. rts
  6684. #
  6685. # OVERFLOW:
  6686. # - the result of the multiply operation is an overflow.
  6687. # - do the multiply to the proper precision and rounding mode in order to
  6688. # set the inexact bits.
  6689. # - calculate the default result and return it in fp0.
  6690. # - if overflow or inexact is enabled, we need a multiply result rounded to
  6691. # extended precision. if the original operation was extended, then we have this
  6692. # result. if the original operation was single or double, we have to do another
  6693. # multiply using extended precision and the correct rounding mode. the result
  6694. # of this operation then has its exponent scaled by -0x6000 to create the
  6695. # exceptional operand.
  6696. #
  6697. fmul_ovfl:
  6698. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  6699. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6700. fmov.l &0x0,%fpsr # clear FPSR
  6701. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6702. fmov.l %fpsr,%d1 # save status
  6703. fmov.l &0x0,%fpcr # clear FPCR
  6704. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  6705. # save setting this until now because this is where fmul_may_ovfl may jump in
  6706. fmul_ovfl_tst:
  6707. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  6708. mov.b FPCR_ENABLE(%a6),%d1
  6709. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  6710. bne.b fmul_ovfl_ena # yes
  6711. # calculate the default result
  6712. fmul_ovfl_dis:
  6713. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  6714. sne %d1 # set sign param accordingly
  6715. mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
  6716. bsr.l ovf_res # calculate default result
  6717. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  6718. fmovm.x (%a0),&0x80 # return default result in fp0
  6719. rts
  6720. #
  6721. # OVFL is enabled; Create EXOP:
  6722. # - if precision is extended, then we have the EXOP. simply bias the exponent
  6723. # with an extra -0x6000. if the precision is single or double, we need to
  6724. # calculate a result rounded to extended precision.
  6725. #
  6726. fmul_ovfl_ena:
  6727. mov.l L_SCR3(%a6),%d1
  6728. andi.b &0xc0,%d1 # test the rnd prec
  6729. bne.b fmul_ovfl_ena_sd # it's sgl or dbl
  6730. fmul_ovfl_ena_cont:
  6731. fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
  6732. mov.l %d2,-(%sp) # save d2
  6733. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  6734. mov.w %d1,%d2 # make a copy
  6735. andi.l &0x7fff,%d1 # strip sign
  6736. sub.l %d0,%d1 # add scale factor
  6737. subi.l &0x6000,%d1 # subtract bias
  6738. andi.w &0x7fff,%d1 # clear sign bit
  6739. andi.w &0x8000,%d2 # keep old sign
  6740. or.w %d2,%d1 # concat old sign,new exp
  6741. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  6742. mov.l (%sp)+,%d2 # restore d2
  6743. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  6744. bra.b fmul_ovfl_dis
  6745. fmul_ovfl_ena_sd:
  6746. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  6747. mov.l L_SCR3(%a6),%d1
  6748. andi.b &0x30,%d1 # keep rnd mode only
  6749. fmov.l %d1,%fpcr # set FPCR
  6750. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6751. fmov.l &0x0,%fpcr # clear FPCR
  6752. bra.b fmul_ovfl_ena_cont
  6753. #
  6754. # may OVERFLOW:
  6755. # - the result of the multiply operation MAY overflow.
  6756. # - do the multiply to the proper precision and rounding mode in order to
  6757. # set the inexact bits.
  6758. # - calculate the default result and return it in fp0.
  6759. #
  6760. fmul_may_ovfl:
  6761. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  6762. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6763. fmov.l &0x0,%fpsr # clear FPSR
  6764. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6765. fmov.l %fpsr,%d1 # save status
  6766. fmov.l &0x0,%fpcr # clear FPCR
  6767. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  6768. fabs.x %fp0,%fp1 # make a copy of result
  6769. fcmp.b %fp1,&0x2 # is |result| >= 2.b?
  6770. fbge.w fmul_ovfl_tst # yes; overflow has occurred
  6771. # no, it didn't overflow; we have correct result
  6772. bra.w fmul_normal_exit
  6773. #
  6774. # UNDERFLOW:
  6775. # - the result of the multiply operation is an underflow.
  6776. # - do the multiply to the proper precision and rounding mode in order to
  6777. # set the inexact bits.
  6778. # - calculate the default result and return it in fp0.
  6779. # - if overflow or inexact is enabled, we need a multiply result rounded to
  6780. # extended precision. if the original operation was extended, then we have this
  6781. # result. if the original operation was single or double, we have to do another
  6782. # multiply using extended precision and the correct rounding mode. the result
  6783. # of this operation then has its exponent scaled by -0x6000 to create the
  6784. # exceptional operand.
  6785. #
  6786. fmul_unfl:
  6787. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  6788. # for fun, let's use only extended precision, round to zero. then, let
  6789. # the unf_res() routine figure out all the rest.
  6790. # will we get the correct answer.
  6791. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  6792. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  6793. fmov.l &0x0,%fpsr # clear FPSR
  6794. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6795. fmov.l %fpsr,%d1 # save status
  6796. fmov.l &0x0,%fpcr # clear FPCR
  6797. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  6798. mov.b FPCR_ENABLE(%a6),%d1
  6799. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  6800. bne.b fmul_unfl_ena # yes
  6801. fmul_unfl_dis:
  6802. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  6803. lea FP_SCR0(%a6),%a0 # pass: result addr
  6804. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  6805. bsr.l unf_res # calculate default result
  6806. or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
  6807. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  6808. rts
  6809. #
  6810. # UNFL is enabled.
  6811. #
  6812. fmul_unfl_ena:
  6813. fmovm.x FP_SCR1(%a6),&0x40 # load dst op
  6814. mov.l L_SCR3(%a6),%d1
  6815. andi.b &0xc0,%d1 # is precision extended?
  6816. bne.b fmul_unfl_ena_sd # no, sgl or dbl
  6817. # if the rnd mode is anything but RZ, then we have to re-do the above
  6818. # multiplication becuase we used RZ for all.
  6819. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6820. fmul_unfl_ena_cont:
  6821. fmov.l &0x0,%fpsr # clear FPSR
  6822. fmul.x FP_SCR0(%a6),%fp1 # execute multiply
  6823. fmov.l &0x0,%fpcr # clear FPCR
  6824. fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
  6825. mov.l %d2,-(%sp) # save d2
  6826. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  6827. mov.l %d1,%d2 # make a copy
  6828. andi.l &0x7fff,%d1 # strip sign
  6829. andi.w &0x8000,%d2 # keep old sign
  6830. sub.l %d0,%d1 # add scale factor
  6831. addi.l &0x6000,%d1 # add bias
  6832. andi.w &0x7fff,%d1
  6833. or.w %d2,%d1 # concat old sign,new exp
  6834. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  6835. mov.l (%sp)+,%d2 # restore d2
  6836. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  6837. bra.w fmul_unfl_dis
  6838. fmul_unfl_ena_sd:
  6839. mov.l L_SCR3(%a6),%d1
  6840. andi.b &0x30,%d1 # use only rnd mode
  6841. fmov.l %d1,%fpcr # set FPCR
  6842. bra.b fmul_unfl_ena_cont
  6843. # MAY UNDERFLOW:
  6844. # -use the correct rounding mode and precision. this code favors operations
  6845. # that do not underflow.
  6846. fmul_may_unfl:
  6847. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  6848. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  6849. fmov.l &0x0,%fpsr # clear FPSR
  6850. fmul.x FP_SCR0(%a6),%fp0 # execute multiply
  6851. fmov.l %fpsr,%d1 # save status
  6852. fmov.l &0x0,%fpcr # clear FPCR
  6853. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  6854. fabs.x %fp0,%fp1 # make a copy of result
  6855. fcmp.b %fp1,&0x2 # is |result| > 2.b?
  6856. fbgt.w fmul_normal_exit # no; no underflow occurred
  6857. fblt.w fmul_unfl # yes; underflow occurred
  6858. #
  6859. # we still don't know if underflow occurred. result is ~ equal to 2. but,
  6860. # we don't know if the result was an underflow that rounded up to a 2 or
  6861. # a normalized number that rounded down to a 2. so, redo the entire operation
  6862. # using RZ as the rounding mode to see what the pre-rounded result is.
  6863. # this case should be relatively rare.
  6864. #
  6865. fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
  6866. mov.l L_SCR3(%a6),%d1
  6867. andi.b &0xc0,%d1 # keep rnd prec
  6868. ori.b &rz_mode*0x10,%d1 # insert RZ
  6869. fmov.l %d1,%fpcr # set FPCR
  6870. fmov.l &0x0,%fpsr # clear FPSR
  6871. fmul.x FP_SCR0(%a6),%fp1 # execute multiply
  6872. fmov.l &0x0,%fpcr # clear FPCR
  6873. fabs.x %fp1 # make absolute value
  6874. fcmp.b %fp1,&0x2 # is |result| < 2.b?
  6875. fbge.w fmul_normal_exit # no; no underflow occurred
  6876. bra.w fmul_unfl # yes, underflow occurred
  6877. ################################################################################
  6878. #
  6879. # Multiply: inputs are not both normalized; what are they?
  6880. #
  6881. fmul_not_norm:
  6882. mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
  6883. jmp (tbl_fmul_op.b,%pc,%d1.w)
  6884. swbeg &48
  6885. tbl_fmul_op:
  6886. short fmul_norm - tbl_fmul_op # NORM x NORM
  6887. short fmul_zero - tbl_fmul_op # NORM x ZERO
  6888. short fmul_inf_src - tbl_fmul_op # NORM x INF
  6889. short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
  6890. short fmul_norm - tbl_fmul_op # NORM x DENORM
  6891. short fmul_res_snan - tbl_fmul_op # NORM x SNAN
  6892. short tbl_fmul_op - tbl_fmul_op #
  6893. short tbl_fmul_op - tbl_fmul_op #
  6894. short fmul_zero - tbl_fmul_op # ZERO x NORM
  6895. short fmul_zero - tbl_fmul_op # ZERO x ZERO
  6896. short fmul_res_operr - tbl_fmul_op # ZERO x INF
  6897. short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
  6898. short fmul_zero - tbl_fmul_op # ZERO x DENORM
  6899. short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
  6900. short tbl_fmul_op - tbl_fmul_op #
  6901. short tbl_fmul_op - tbl_fmul_op #
  6902. short fmul_inf_dst - tbl_fmul_op # INF x NORM
  6903. short fmul_res_operr - tbl_fmul_op # INF x ZERO
  6904. short fmul_inf_dst - tbl_fmul_op # INF x INF
  6905. short fmul_res_qnan - tbl_fmul_op # INF x QNAN
  6906. short fmul_inf_dst - tbl_fmul_op # INF x DENORM
  6907. short fmul_res_snan - tbl_fmul_op # INF x SNAN
  6908. short tbl_fmul_op - tbl_fmul_op #
  6909. short tbl_fmul_op - tbl_fmul_op #
  6910. short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
  6911. short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
  6912. short fmul_res_qnan - tbl_fmul_op # QNAN x INF
  6913. short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
  6914. short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
  6915. short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
  6916. short tbl_fmul_op - tbl_fmul_op #
  6917. short tbl_fmul_op - tbl_fmul_op #
  6918. short fmul_norm - tbl_fmul_op # NORM x NORM
  6919. short fmul_zero - tbl_fmul_op # NORM x ZERO
  6920. short fmul_inf_src - tbl_fmul_op # NORM x INF
  6921. short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
  6922. short fmul_norm - tbl_fmul_op # NORM x DENORM
  6923. short fmul_res_snan - tbl_fmul_op # NORM x SNAN
  6924. short tbl_fmul_op - tbl_fmul_op #
  6925. short tbl_fmul_op - tbl_fmul_op #
  6926. short fmul_res_snan - tbl_fmul_op # SNAN x NORM
  6927. short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
  6928. short fmul_res_snan - tbl_fmul_op # SNAN x INF
  6929. short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
  6930. short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
  6931. short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
  6932. short tbl_fmul_op - tbl_fmul_op #
  6933. short tbl_fmul_op - tbl_fmul_op #
  6934. fmul_res_operr:
  6935. bra.l res_operr
  6936. fmul_res_snan:
  6937. bra.l res_snan
  6938. fmul_res_qnan:
  6939. bra.l res_qnan
  6940. #
  6941. # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
  6942. #
  6943. global fmul_zero # global for fsglmul
  6944. fmul_zero:
  6945. mov.b SRC_EX(%a0),%d0 # exclusive or the signs
  6946. mov.b DST_EX(%a1),%d1
  6947. eor.b %d0,%d1
  6948. bpl.b fmul_zero_p # result ZERO is pos.
  6949. fmul_zero_n:
  6950. fmov.s &0x80000000,%fp0 # load -ZERO
  6951. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
  6952. rts
  6953. fmul_zero_p:
  6954. fmov.s &0x00000000,%fp0 # load +ZERO
  6955. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  6956. rts
  6957. #
  6958. # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
  6959. #
  6960. # Note: The j-bit for an infinity is a don't-care. However, to be
  6961. # strictly compatible w/ the 68881/882, we make sure to return an
  6962. # INF w/ the j-bit set if the input INF j-bit was set. Destination
  6963. # INFs take priority.
  6964. #
  6965. global fmul_inf_dst # global for fsglmul
  6966. fmul_inf_dst:
  6967. fmovm.x DST(%a1),&0x80 # return INF result in fp0
  6968. mov.b SRC_EX(%a0),%d0 # exclusive or the signs
  6969. mov.b DST_EX(%a1),%d1
  6970. eor.b %d0,%d1
  6971. bpl.b fmul_inf_dst_p # result INF is pos.
  6972. fmul_inf_dst_n:
  6973. fabs.x %fp0 # clear result sign
  6974. fneg.x %fp0 # set result sign
  6975. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
  6976. rts
  6977. fmul_inf_dst_p:
  6978. fabs.x %fp0 # clear result sign
  6979. mov.b &inf_bmask,FPSR_CC(%a6) # set INF
  6980. rts
  6981. global fmul_inf_src # global for fsglmul
  6982. fmul_inf_src:
  6983. fmovm.x SRC(%a0),&0x80 # return INF result in fp0
  6984. mov.b SRC_EX(%a0),%d0 # exclusive or the signs
  6985. mov.b DST_EX(%a1),%d1
  6986. eor.b %d0,%d1
  6987. bpl.b fmul_inf_dst_p # result INF is pos.
  6988. bra.b fmul_inf_dst_n
  6989. #########################################################################
  6990. # XDEF **************************************************************** #
  6991. # fin(): emulates the fmove instruction #
  6992. # fsin(): emulates the fsmove instruction #
  6993. # fdin(): emulates the fdmove instruction #
  6994. # #
  6995. # XREF **************************************************************** #
  6996. # norm() - normalize mantissa for EXOP on denorm #
  6997. # scale_to_zero_src() - scale src exponent to zero #
  6998. # ovf_res() - return default overflow result #
  6999. # unf_res() - return default underflow result #
  7000. # res_qnan_1op() - return QNAN result #
  7001. # res_snan_1op() - return SNAN result #
  7002. # #
  7003. # INPUT *************************************************************** #
  7004. # a0 = pointer to extended precision source operand #
  7005. # d0 = round prec/mode #
  7006. # #
  7007. # OUTPUT ************************************************************** #
  7008. # fp0 = result #
  7009. # fp1 = EXOP (if exception occurred) #
  7010. # #
  7011. # ALGORITHM *********************************************************** #
  7012. # Handle NANs, infinities, and zeroes as special cases. Divide #
  7013. # norms into extended, single, and double precision. #
  7014. # Norms can be emulated w/ a regular fmove instruction. For #
  7015. # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
  7016. # if the result would have overflowed/underflowed. If so, use unf_res() #
  7017. # or ovf_res() to return the default result. Also return EXOP if #
  7018. # exception is enabled. If no exception, return the default result. #
  7019. # Unnorms don't pass through here. #
  7020. # #
  7021. #########################################################################
  7022. global fsin
  7023. fsin:
  7024. andi.b &0x30,%d0 # clear rnd prec
  7025. ori.b &s_mode*0x10,%d0 # insert sgl precision
  7026. bra.b fin
  7027. global fdin
  7028. fdin:
  7029. andi.b &0x30,%d0 # clear rnd prec
  7030. ori.b &d_mode*0x10,%d0 # insert dbl precision
  7031. global fin
  7032. fin:
  7033. mov.l %d0,L_SCR3(%a6) # store rnd info
  7034. mov.b STAG(%a6),%d1 # fetch src optype tag
  7035. bne.w fin_not_norm # optimize on non-norm input
  7036. #
  7037. # FP MOVE IN: NORMs and DENORMs ONLY!
  7038. #
  7039. fin_norm:
  7040. andi.b &0xc0,%d0 # is precision extended?
  7041. bne.w fin_not_ext # no, so go handle dbl or sgl
  7042. #
  7043. # precision selected is extended. so...we cannot get an underflow
  7044. # or overflow because of rounding to the correct precision. so...
  7045. # skip the scaling and unscaling...
  7046. #
  7047. tst.b SRC_EX(%a0) # is the operand negative?
  7048. bpl.b fin_norm_done # no
  7049. bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
  7050. fin_norm_done:
  7051. fmovm.x SRC(%a0),&0x80 # return result in fp0
  7052. rts
  7053. #
  7054. # for an extended precision DENORM, the UNFL exception bit is set
  7055. # the accrued bit is NOT set in this instance(no inexactness!)
  7056. #
  7057. fin_denorm:
  7058. andi.b &0xc0,%d0 # is precision extended?
  7059. bne.w fin_not_ext # no, so go handle dbl or sgl
  7060. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  7061. tst.b SRC_EX(%a0) # is the operand negative?
  7062. bpl.b fin_denorm_done # no
  7063. bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
  7064. fin_denorm_done:
  7065. fmovm.x SRC(%a0),&0x80 # return result in fp0
  7066. btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  7067. bne.b fin_denorm_unfl_ena # yes
  7068. rts
  7069. #
  7070. # the input is an extended DENORM and underflow is enabled in the FPCR.
  7071. # normalize the mantissa and add the bias of 0x6000 to the resulting negative
  7072. # exponent and insert back into the operand.
  7073. #
  7074. fin_denorm_unfl_ena:
  7075. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7076. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7077. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7078. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  7079. bsr.l norm # normalize result
  7080. neg.w %d0 # new exponent = -(shft val)
  7081. addi.w &0x6000,%d0 # add new bias to exponent
  7082. mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
  7083. andi.w &0x8000,%d1 # keep old sign
  7084. andi.w &0x7fff,%d0 # clear sign position
  7085. or.w %d1,%d0 # concat new exo,old sign
  7086. mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
  7087. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7088. rts
  7089. #
  7090. # operand is to be rounded to single or double precision
  7091. #
  7092. fin_not_ext:
  7093. cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
  7094. bne.b fin_dbl
  7095. #
  7096. # operand is to be rounded to single precision
  7097. #
  7098. fin_sgl:
  7099. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7100. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7101. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7102. bsr.l scale_to_zero_src # calculate scale factor
  7103. cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
  7104. bge.w fin_sd_unfl # yes; go handle underflow
  7105. cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
  7106. beq.w fin_sd_may_ovfl # maybe; go check
  7107. blt.w fin_sd_ovfl # yes; go handle overflow
  7108. #
  7109. # operand will NOT overflow or underflow when moved into the fp reg file
  7110. #
  7111. fin_sd_normal:
  7112. fmov.l &0x0,%fpsr # clear FPSR
  7113. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7114. fmov.x FP_SCR0(%a6),%fp0 # perform move
  7115. fmov.l %fpsr,%d1 # save FPSR
  7116. fmov.l &0x0,%fpcr # clear FPCR
  7117. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7118. fin_sd_normal_exit:
  7119. mov.l %d2,-(%sp) # save d2
  7120. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  7121. mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
  7122. mov.w %d1,%d2 # make a copy
  7123. andi.l &0x7fff,%d1 # strip sign
  7124. sub.l %d0,%d1 # add scale factor
  7125. andi.w &0x8000,%d2 # keep old sign
  7126. or.w %d1,%d2 # concat old sign,new exponent
  7127. mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
  7128. mov.l (%sp)+,%d2 # restore d2
  7129. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  7130. rts
  7131. #
  7132. # operand is to be rounded to double precision
  7133. #
  7134. fin_dbl:
  7135. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7136. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7137. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7138. bsr.l scale_to_zero_src # calculate scale factor
  7139. cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
  7140. bge.w fin_sd_unfl # yes; go handle underflow
  7141. cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
  7142. beq.w fin_sd_may_ovfl # maybe; go check
  7143. blt.w fin_sd_ovfl # yes; go handle overflow
  7144. bra.w fin_sd_normal # no; ho handle normalized op
  7145. #
  7146. # operand WILL underflow when moved in to the fp register file
  7147. #
  7148. fin_sd_unfl:
  7149. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  7150. tst.b FP_SCR0_EX(%a6) # is operand negative?
  7151. bpl.b fin_sd_unfl_tst
  7152. bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
  7153. # if underflow or inexact is enabled, then go calculate the EXOP first.
  7154. fin_sd_unfl_tst:
  7155. mov.b FPCR_ENABLE(%a6),%d1
  7156. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  7157. bne.b fin_sd_unfl_ena # yes
  7158. fin_sd_unfl_dis:
  7159. lea FP_SCR0(%a6),%a0 # pass: result addr
  7160. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  7161. bsr.l unf_res # calculate default result
  7162. or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
  7163. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  7164. rts
  7165. #
  7166. # operand will underflow AND underflow or inexact is enabled.
  7167. # therefore, we must return the result rounded to extended precision.
  7168. #
  7169. fin_sd_unfl_ena:
  7170. mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  7171. mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  7172. mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
  7173. mov.l %d2,-(%sp) # save d2
  7174. mov.w %d1,%d2 # make a copy
  7175. andi.l &0x7fff,%d1 # strip sign
  7176. sub.l %d0,%d1 # subtract scale factor
  7177. andi.w &0x8000,%d2 # extract old sign
  7178. addi.l &0x6000,%d1 # add new bias
  7179. andi.w &0x7fff,%d1
  7180. or.w %d1,%d2 # concat old sign,new exp
  7181. mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
  7182. fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
  7183. mov.l (%sp)+,%d2 # restore d2
  7184. bra.b fin_sd_unfl_dis
  7185. #
  7186. # operand WILL overflow.
  7187. #
  7188. fin_sd_ovfl:
  7189. fmov.l &0x0,%fpsr # clear FPSR
  7190. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7191. fmov.x FP_SCR0(%a6),%fp0 # perform move
  7192. fmov.l &0x0,%fpcr # clear FPCR
  7193. fmov.l %fpsr,%d1 # save FPSR
  7194. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7195. fin_sd_ovfl_tst:
  7196. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  7197. mov.b FPCR_ENABLE(%a6),%d1
  7198. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  7199. bne.b fin_sd_ovfl_ena # yes
  7200. #
  7201. # OVFL is not enabled; therefore, we must create the default result by
  7202. # calling ovf_res().
  7203. #
  7204. fin_sd_ovfl_dis:
  7205. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  7206. sne %d1 # set sign param accordingly
  7207. mov.l L_SCR3(%a6),%d0 # pass: prec,mode
  7208. bsr.l ovf_res # calculate default result
  7209. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  7210. fmovm.x (%a0),&0x80 # return default result in fp0
  7211. rts
  7212. #
  7213. # OVFL is enabled.
  7214. # the INEX2 bit has already been updated by the round to the correct precision.
  7215. # now, round to extended(and don't alter the FPSR).
  7216. #
  7217. fin_sd_ovfl_ena:
  7218. mov.l %d2,-(%sp) # save d2
  7219. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  7220. mov.l %d1,%d2 # make a copy
  7221. andi.l &0x7fff,%d1 # strip sign
  7222. andi.w &0x8000,%d2 # keep old sign
  7223. sub.l %d0,%d1 # add scale factor
  7224. sub.l &0x6000,%d1 # subtract bias
  7225. andi.w &0x7fff,%d1
  7226. or.w %d2,%d1
  7227. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  7228. mov.l (%sp)+,%d2 # restore d2
  7229. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7230. bra.b fin_sd_ovfl_dis
  7231. #
  7232. # the move in MAY overflow. so...
  7233. #
  7234. fin_sd_may_ovfl:
  7235. fmov.l &0x0,%fpsr # clear FPSR
  7236. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7237. fmov.x FP_SCR0(%a6),%fp0 # perform the move
  7238. fmov.l %fpsr,%d1 # save status
  7239. fmov.l &0x0,%fpcr # clear FPCR
  7240. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7241. fabs.x %fp0,%fp1 # make a copy of result
  7242. fcmp.b %fp1,&0x2 # is |result| >= 2.b?
  7243. fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
  7244. # no, it didn't overflow; we have correct result
  7245. bra.w fin_sd_normal_exit
  7246. ##########################################################################
  7247. #
  7248. # operand is not a NORM: check its optype and branch accordingly
  7249. #
  7250. fin_not_norm:
  7251. cmpi.b %d1,&DENORM # weed out DENORM
  7252. beq.w fin_denorm
  7253. cmpi.b %d1,&SNAN # weed out SNANs
  7254. beq.l res_snan_1op
  7255. cmpi.b %d1,&QNAN # weed out QNANs
  7256. beq.l res_qnan_1op
  7257. #
  7258. # do the fmove in; at this point, only possible ops are ZERO and INF.
  7259. # use fmov to determine ccodes.
  7260. # prec:mode should be zero at this point but it won't affect answer anyways.
  7261. #
  7262. fmov.x SRC(%a0),%fp0 # do fmove in
  7263. fmov.l %fpsr,%d0 # no exceptions possible
  7264. rol.l &0x8,%d0 # put ccodes in lo byte
  7265. mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
  7266. rts
  7267. #########################################################################
  7268. # XDEF **************************************************************** #
  7269. # fdiv(): emulates the fdiv instruction #
  7270. # fsdiv(): emulates the fsdiv instruction #
  7271. # fddiv(): emulates the fddiv instruction #
  7272. # #
  7273. # XREF **************************************************************** #
  7274. # scale_to_zero_src() - scale src exponent to zero #
  7275. # scale_to_zero_dst() - scale dst exponent to zero #
  7276. # unf_res() - return default underflow result #
  7277. # ovf_res() - return default overflow result #
  7278. # res_qnan() - return QNAN result #
  7279. # res_snan() - return SNAN result #
  7280. # #
  7281. # INPUT *************************************************************** #
  7282. # a0 = pointer to extended precision source operand #
  7283. # a1 = pointer to extended precision destination operand #
  7284. # d0 rnd prec,mode #
  7285. # #
  7286. # OUTPUT ************************************************************** #
  7287. # fp0 = result #
  7288. # fp1 = EXOP (if exception occurred) #
  7289. # #
  7290. # ALGORITHM *********************************************************** #
  7291. # Handle NANs, infinities, and zeroes as special cases. Divide #
  7292. # norms/denorms into ext/sgl/dbl precision. #
  7293. # For norms/denorms, scale the exponents such that a divide #
  7294. # instruction won't cause an exception. Use the regular fdiv to #
  7295. # compute a result. Check if the regular operands would have taken #
  7296. # an exception. If so, return the default overflow/underflow result #
  7297. # and return the EXOP if exceptions are enabled. Else, scale the #
  7298. # result operand to the proper exponent. #
  7299. # #
  7300. #########################################################################
  7301. align 0x10
  7302. tbl_fdiv_unfl:
  7303. long 0x3fff - 0x0000 # ext_unfl
  7304. long 0x3fff - 0x3f81 # sgl_unfl
  7305. long 0x3fff - 0x3c01 # dbl_unfl
  7306. tbl_fdiv_ovfl:
  7307. long 0x3fff - 0x7ffe # ext overflow exponent
  7308. long 0x3fff - 0x407e # sgl overflow exponent
  7309. long 0x3fff - 0x43fe # dbl overflow exponent
  7310. global fsdiv
  7311. fsdiv:
  7312. andi.b &0x30,%d0 # clear rnd prec
  7313. ori.b &s_mode*0x10,%d0 # insert sgl prec
  7314. bra.b fdiv
  7315. global fddiv
  7316. fddiv:
  7317. andi.b &0x30,%d0 # clear rnd prec
  7318. ori.b &d_mode*0x10,%d0 # insert dbl prec
  7319. global fdiv
  7320. fdiv:
  7321. mov.l %d0,L_SCR3(%a6) # store rnd info
  7322. clr.w %d1
  7323. mov.b DTAG(%a6),%d1
  7324. lsl.b &0x3,%d1
  7325. or.b STAG(%a6),%d1 # combine src tags
  7326. bne.w fdiv_not_norm # optimize on non-norm input
  7327. #
  7328. # DIVIDE: NORMs and DENORMs ONLY!
  7329. #
  7330. fdiv_norm:
  7331. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  7332. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  7333. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  7334. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7335. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7336. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7337. bsr.l scale_to_zero_src # scale src exponent
  7338. mov.l %d0,-(%sp) # save scale factor 1
  7339. bsr.l scale_to_zero_dst # scale dst exponent
  7340. neg.l (%sp) # SCALE FACTOR = scale1 - scale2
  7341. add.l %d0,(%sp)
  7342. mov.w 2+L_SCR3(%a6),%d1 # fetch precision
  7343. lsr.b &0x6,%d1 # shift to lo bits
  7344. mov.l (%sp)+,%d0 # load S.F.
  7345. cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
  7346. ble.w fdiv_may_ovfl # result will overflow
  7347. cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
  7348. beq.w fdiv_may_unfl # maybe
  7349. bgt.w fdiv_unfl # yes; go handle underflow
  7350. fdiv_normal:
  7351. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  7352. fmov.l L_SCR3(%a6),%fpcr # save FPCR
  7353. fmov.l &0x0,%fpsr # clear FPSR
  7354. fdiv.x FP_SCR0(%a6),%fp0 # perform divide
  7355. fmov.l %fpsr,%d1 # save FPSR
  7356. fmov.l &0x0,%fpcr # clear FPCR
  7357. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7358. fdiv_normal_exit:
  7359. fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
  7360. mov.l %d2,-(%sp) # store d2
  7361. mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
  7362. mov.l %d1,%d2 # make a copy
  7363. andi.l &0x7fff,%d1 # strip sign
  7364. andi.w &0x8000,%d2 # keep old sign
  7365. sub.l %d0,%d1 # add scale factor
  7366. or.w %d2,%d1 # concat old sign,new exp
  7367. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  7368. mov.l (%sp)+,%d2 # restore d2
  7369. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  7370. rts
  7371. tbl_fdiv_ovfl2:
  7372. long 0x7fff
  7373. long 0x407f
  7374. long 0x43ff
  7375. fdiv_no_ovfl:
  7376. mov.l (%sp)+,%d0 # restore scale factor
  7377. bra.b fdiv_normal_exit
  7378. fdiv_may_ovfl:
  7379. mov.l %d0,-(%sp) # save scale factor
  7380. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  7381. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7382. fmov.l &0x0,%fpsr # set FPSR
  7383. fdiv.x FP_SCR0(%a6),%fp0 # execute divide
  7384. fmov.l %fpsr,%d0
  7385. fmov.l &0x0,%fpcr
  7386. or.l %d0,USER_FPSR(%a6) # save INEX,N
  7387. fmovm.x &0x01,-(%sp) # save result to stack
  7388. mov.w (%sp),%d0 # fetch new exponent
  7389. add.l &0xc,%sp # clear result from stack
  7390. andi.l &0x7fff,%d0 # strip sign
  7391. sub.l (%sp),%d0 # add scale factor
  7392. cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
  7393. blt.b fdiv_no_ovfl
  7394. mov.l (%sp)+,%d0
  7395. fdiv_ovfl_tst:
  7396. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  7397. mov.b FPCR_ENABLE(%a6),%d1
  7398. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  7399. bne.b fdiv_ovfl_ena # yes
  7400. fdiv_ovfl_dis:
  7401. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  7402. sne %d1 # set sign param accordingly
  7403. mov.l L_SCR3(%a6),%d0 # pass prec:rnd
  7404. bsr.l ovf_res # calculate default result
  7405. or.b %d0,FPSR_CC(%a6) # set INF if applicable
  7406. fmovm.x (%a0),&0x80 # return default result in fp0
  7407. rts
  7408. fdiv_ovfl_ena:
  7409. mov.l L_SCR3(%a6),%d1
  7410. andi.b &0xc0,%d1 # is precision extended?
  7411. bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
  7412. fdiv_ovfl_ena_cont:
  7413. fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
  7414. mov.l %d2,-(%sp) # save d2
  7415. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  7416. mov.w %d1,%d2 # make a copy
  7417. andi.l &0x7fff,%d1 # strip sign
  7418. sub.l %d0,%d1 # add scale factor
  7419. subi.l &0x6000,%d1 # subtract bias
  7420. andi.w &0x7fff,%d1 # clear sign bit
  7421. andi.w &0x8000,%d2 # keep old sign
  7422. or.w %d2,%d1 # concat old sign,new exp
  7423. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  7424. mov.l (%sp)+,%d2 # restore d2
  7425. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7426. bra.b fdiv_ovfl_dis
  7427. fdiv_ovfl_ena_sd:
  7428. fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
  7429. mov.l L_SCR3(%a6),%d1
  7430. andi.b &0x30,%d1 # keep rnd mode
  7431. fmov.l %d1,%fpcr # set FPCR
  7432. fdiv.x FP_SCR0(%a6),%fp0 # execute divide
  7433. fmov.l &0x0,%fpcr # clear FPCR
  7434. bra.b fdiv_ovfl_ena_cont
  7435. fdiv_unfl:
  7436. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  7437. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  7438. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  7439. fmov.l &0x0,%fpsr # clear FPSR
  7440. fdiv.x FP_SCR0(%a6),%fp0 # execute divide
  7441. fmov.l %fpsr,%d1 # save status
  7442. fmov.l &0x0,%fpcr # clear FPCR
  7443. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7444. mov.b FPCR_ENABLE(%a6),%d1
  7445. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  7446. bne.b fdiv_unfl_ena # yes
  7447. fdiv_unfl_dis:
  7448. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  7449. lea FP_SCR0(%a6),%a0 # pass: result addr
  7450. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  7451. bsr.l unf_res # calculate default result
  7452. or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
  7453. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  7454. rts
  7455. #
  7456. # UNFL is enabled.
  7457. #
  7458. fdiv_unfl_ena:
  7459. fmovm.x FP_SCR1(%a6),&0x40 # load dst op
  7460. mov.l L_SCR3(%a6),%d1
  7461. andi.b &0xc0,%d1 # is precision extended?
  7462. bne.b fdiv_unfl_ena_sd # no, sgl or dbl
  7463. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7464. fdiv_unfl_ena_cont:
  7465. fmov.l &0x0,%fpsr # clear FPSR
  7466. fdiv.x FP_SCR0(%a6),%fp1 # execute divide
  7467. fmov.l &0x0,%fpcr # clear FPCR
  7468. fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
  7469. mov.l %d2,-(%sp) # save d2
  7470. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  7471. mov.l %d1,%d2 # make a copy
  7472. andi.l &0x7fff,%d1 # strip sign
  7473. andi.w &0x8000,%d2 # keep old sign
  7474. sub.l %d0,%d1 # add scale factoer
  7475. addi.l &0x6000,%d1 # add bias
  7476. andi.w &0x7fff,%d1
  7477. or.w %d2,%d1 # concat old sign,new exp
  7478. mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
  7479. mov.l (%sp)+,%d2 # restore d2
  7480. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7481. bra.w fdiv_unfl_dis
  7482. fdiv_unfl_ena_sd:
  7483. mov.l L_SCR3(%a6),%d1
  7484. andi.b &0x30,%d1 # use only rnd mode
  7485. fmov.l %d1,%fpcr # set FPCR
  7486. bra.b fdiv_unfl_ena_cont
  7487. #
  7488. # the divide operation MAY underflow:
  7489. #
  7490. fdiv_may_unfl:
  7491. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  7492. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7493. fmov.l &0x0,%fpsr # clear FPSR
  7494. fdiv.x FP_SCR0(%a6),%fp0 # execute divide
  7495. fmov.l %fpsr,%d1 # save status
  7496. fmov.l &0x0,%fpcr # clear FPCR
  7497. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7498. fabs.x %fp0,%fp1 # make a copy of result
  7499. fcmp.b %fp1,&0x1 # is |result| > 1.b?
  7500. fbgt.w fdiv_normal_exit # no; no underflow occurred
  7501. fblt.w fdiv_unfl # yes; underflow occurred
  7502. #
  7503. # we still don't know if underflow occurred. result is ~ equal to 1. but,
  7504. # we don't know if the result was an underflow that rounded up to a 1
  7505. # or a normalized number that rounded down to a 1. so, redo the entire
  7506. # operation using RZ as the rounding mode to see what the pre-rounded
  7507. # result is. this case should be relatively rare.
  7508. #
  7509. fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
  7510. mov.l L_SCR3(%a6),%d1
  7511. andi.b &0xc0,%d1 # keep rnd prec
  7512. ori.b &rz_mode*0x10,%d1 # insert RZ
  7513. fmov.l %d1,%fpcr # set FPCR
  7514. fmov.l &0x0,%fpsr # clear FPSR
  7515. fdiv.x FP_SCR0(%a6),%fp1 # execute divide
  7516. fmov.l &0x0,%fpcr # clear FPCR
  7517. fabs.x %fp1 # make absolute value
  7518. fcmp.b %fp1,&0x1 # is |result| < 1.b?
  7519. fbge.w fdiv_normal_exit # no; no underflow occurred
  7520. bra.w fdiv_unfl # yes; underflow occurred
  7521. ############################################################################
  7522. #
  7523. # Divide: inputs are not both normalized; what are they?
  7524. #
  7525. fdiv_not_norm:
  7526. mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
  7527. jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
  7528. swbeg &48
  7529. tbl_fdiv_op:
  7530. short fdiv_norm - tbl_fdiv_op # NORM / NORM
  7531. short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
  7532. short fdiv_zero_load - tbl_fdiv_op # NORM / INF
  7533. short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
  7534. short fdiv_norm - tbl_fdiv_op # NORM / DENORM
  7535. short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
  7536. short tbl_fdiv_op - tbl_fdiv_op #
  7537. short tbl_fdiv_op - tbl_fdiv_op #
  7538. short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
  7539. short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
  7540. short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
  7541. short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
  7542. short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
  7543. short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
  7544. short tbl_fdiv_op - tbl_fdiv_op #
  7545. short tbl_fdiv_op - tbl_fdiv_op #
  7546. short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
  7547. short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
  7548. short fdiv_res_operr - tbl_fdiv_op # INF / INF
  7549. short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
  7550. short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
  7551. short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
  7552. short tbl_fdiv_op - tbl_fdiv_op #
  7553. short tbl_fdiv_op - tbl_fdiv_op #
  7554. short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
  7555. short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
  7556. short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
  7557. short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
  7558. short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
  7559. short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
  7560. short tbl_fdiv_op - tbl_fdiv_op #
  7561. short tbl_fdiv_op - tbl_fdiv_op #
  7562. short fdiv_norm - tbl_fdiv_op # DENORM / NORM
  7563. short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
  7564. short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
  7565. short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
  7566. short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
  7567. short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
  7568. short tbl_fdiv_op - tbl_fdiv_op #
  7569. short tbl_fdiv_op - tbl_fdiv_op #
  7570. short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
  7571. short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
  7572. short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
  7573. short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
  7574. short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
  7575. short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
  7576. short tbl_fdiv_op - tbl_fdiv_op #
  7577. short tbl_fdiv_op - tbl_fdiv_op #
  7578. fdiv_res_qnan:
  7579. bra.l res_qnan
  7580. fdiv_res_snan:
  7581. bra.l res_snan
  7582. fdiv_res_operr:
  7583. bra.l res_operr
  7584. global fdiv_zero_load # global for fsgldiv
  7585. fdiv_zero_load:
  7586. mov.b SRC_EX(%a0),%d0 # result sign is exclusive
  7587. mov.b DST_EX(%a1),%d1 # or of input signs.
  7588. eor.b %d0,%d1
  7589. bpl.b fdiv_zero_load_p # result is positive
  7590. fmov.s &0x80000000,%fp0 # load a -ZERO
  7591. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
  7592. rts
  7593. fdiv_zero_load_p:
  7594. fmov.s &0x00000000,%fp0 # load a +ZERO
  7595. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  7596. rts
  7597. #
  7598. # The destination was In Range and the source was a ZERO. The result,
  7599. # therefore, is an INF w/ the proper sign.
  7600. # So, determine the sign and return a new INF (w/ the j-bit cleared).
  7601. #
  7602. global fdiv_inf_load # global for fsgldiv
  7603. fdiv_inf_load:
  7604. ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
  7605. mov.b SRC_EX(%a0),%d0 # load both signs
  7606. mov.b DST_EX(%a1),%d1
  7607. eor.b %d0,%d1
  7608. bpl.b fdiv_inf_load_p # result is positive
  7609. fmov.s &0xff800000,%fp0 # make result -INF
  7610. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
  7611. rts
  7612. fdiv_inf_load_p:
  7613. fmov.s &0x7f800000,%fp0 # make result +INF
  7614. mov.b &inf_bmask,FPSR_CC(%a6) # set INF
  7615. rts
  7616. #
  7617. # The destination was an INF w/ an In Range or ZERO source, the result is
  7618. # an INF w/ the proper sign.
  7619. # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
  7620. # dst INF is set, then then j-bit of the result INF is also set).
  7621. #
  7622. global fdiv_inf_dst # global for fsgldiv
  7623. fdiv_inf_dst:
  7624. mov.b DST_EX(%a1),%d0 # load both signs
  7625. mov.b SRC_EX(%a0),%d1
  7626. eor.b %d0,%d1
  7627. bpl.b fdiv_inf_dst_p # result is positive
  7628. fmovm.x DST(%a1),&0x80 # return result in fp0
  7629. fabs.x %fp0 # clear sign bit
  7630. fneg.x %fp0 # set sign bit
  7631. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
  7632. rts
  7633. fdiv_inf_dst_p:
  7634. fmovm.x DST(%a1),&0x80 # return result in fp0
  7635. fabs.x %fp0 # return positive INF
  7636. mov.b &inf_bmask,FPSR_CC(%a6) # set INF
  7637. rts
  7638. #########################################################################
  7639. # XDEF **************************************************************** #
  7640. # fneg(): emulates the fneg instruction #
  7641. # fsneg(): emulates the fsneg instruction #
  7642. # fdneg(): emulates the fdneg instruction #
  7643. # #
  7644. # XREF **************************************************************** #
  7645. # norm() - normalize a denorm to provide EXOP #
  7646. # scale_to_zero_src() - scale sgl/dbl source exponent #
  7647. # ovf_res() - return default overflow result #
  7648. # unf_res() - return default underflow result #
  7649. # res_qnan_1op() - return QNAN result #
  7650. # res_snan_1op() - return SNAN result #
  7651. # #
  7652. # INPUT *************************************************************** #
  7653. # a0 = pointer to extended precision source operand #
  7654. # d0 = rnd prec,mode #
  7655. # #
  7656. # OUTPUT ************************************************************** #
  7657. # fp0 = result #
  7658. # fp1 = EXOP (if exception occurred) #
  7659. # #
  7660. # ALGORITHM *********************************************************** #
  7661. # Handle NANs, zeroes, and infinities as special cases. Separate #
  7662. # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
  7663. # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
  7664. # and an actual fneg performed to see if overflow/underflow would have #
  7665. # occurred. If so, return default underflow/overflow result. Else, #
  7666. # scale the result exponent and return result. FPSR gets set based on #
  7667. # the result value. #
  7668. # #
  7669. #########################################################################
  7670. global fsneg
  7671. fsneg:
  7672. andi.b &0x30,%d0 # clear rnd prec
  7673. ori.b &s_mode*0x10,%d0 # insert sgl precision
  7674. bra.b fneg
  7675. global fdneg
  7676. fdneg:
  7677. andi.b &0x30,%d0 # clear rnd prec
  7678. ori.b &d_mode*0x10,%d0 # insert dbl prec
  7679. global fneg
  7680. fneg:
  7681. mov.l %d0,L_SCR3(%a6) # store rnd info
  7682. mov.b STAG(%a6),%d1
  7683. bne.w fneg_not_norm # optimize on non-norm input
  7684. #
  7685. # NEGATE SIGN : norms and denorms ONLY!
  7686. #
  7687. fneg_norm:
  7688. andi.b &0xc0,%d0 # is precision extended?
  7689. bne.w fneg_not_ext # no; go handle sgl or dbl
  7690. #
  7691. # precision selected is extended. so...we can not get an underflow
  7692. # or overflow because of rounding to the correct precision. so...
  7693. # skip the scaling and unscaling...
  7694. #
  7695. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7696. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7697. mov.w SRC_EX(%a0),%d0
  7698. eori.w &0x8000,%d0 # negate sign
  7699. bpl.b fneg_norm_load # sign is positive
  7700. mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
  7701. fneg_norm_load:
  7702. mov.w %d0,FP_SCR0_EX(%a6)
  7703. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  7704. rts
  7705. #
  7706. # for an extended precision DENORM, the UNFL exception bit is set
  7707. # the accrued bit is NOT set in this instance(no inexactness!)
  7708. #
  7709. fneg_denorm:
  7710. andi.b &0xc0,%d0 # is precision extended?
  7711. bne.b fneg_not_ext # no; go handle sgl or dbl
  7712. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  7713. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7714. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7715. mov.w SRC_EX(%a0),%d0
  7716. eori.w &0x8000,%d0 # negate sign
  7717. bpl.b fneg_denorm_done # no
  7718. mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
  7719. fneg_denorm_done:
  7720. mov.w %d0,FP_SCR0_EX(%a6)
  7721. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  7722. btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  7723. bne.b fneg_ext_unfl_ena # yes
  7724. rts
  7725. #
  7726. # the input is an extended DENORM and underflow is enabled in the FPCR.
  7727. # normalize the mantissa and add the bias of 0x6000 to the resulting negative
  7728. # exponent and insert back into the operand.
  7729. #
  7730. fneg_ext_unfl_ena:
  7731. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  7732. bsr.l norm # normalize result
  7733. neg.w %d0 # new exponent = -(shft val)
  7734. addi.w &0x6000,%d0 # add new bias to exponent
  7735. mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
  7736. andi.w &0x8000,%d1 # keep old sign
  7737. andi.w &0x7fff,%d0 # clear sign position
  7738. or.w %d1,%d0 # concat old sign, new exponent
  7739. mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
  7740. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7741. rts
  7742. #
  7743. # operand is either single or double
  7744. #
  7745. fneg_not_ext:
  7746. cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
  7747. bne.b fneg_dbl
  7748. #
  7749. # operand is to be rounded to single precision
  7750. #
  7751. fneg_sgl:
  7752. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7753. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7754. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7755. bsr.l scale_to_zero_src # calculate scale factor
  7756. cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
  7757. bge.w fneg_sd_unfl # yes; go handle underflow
  7758. cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
  7759. beq.w fneg_sd_may_ovfl # maybe; go check
  7760. blt.w fneg_sd_ovfl # yes; go handle overflow
  7761. #
  7762. # operand will NOT overflow or underflow when moved in to the fp reg file
  7763. #
  7764. fneg_sd_normal:
  7765. fmov.l &0x0,%fpsr # clear FPSR
  7766. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7767. fneg.x FP_SCR0(%a6),%fp0 # perform negation
  7768. fmov.l %fpsr,%d1 # save FPSR
  7769. fmov.l &0x0,%fpcr # clear FPCR
  7770. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7771. fneg_sd_normal_exit:
  7772. mov.l %d2,-(%sp) # save d2
  7773. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  7774. mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
  7775. mov.w %d1,%d2 # make a copy
  7776. andi.l &0x7fff,%d1 # strip sign
  7777. sub.l %d0,%d1 # add scale factor
  7778. andi.w &0x8000,%d2 # keep old sign
  7779. or.w %d1,%d2 # concat old sign,new exp
  7780. mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
  7781. mov.l (%sp)+,%d2 # restore d2
  7782. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  7783. rts
  7784. #
  7785. # operand is to be rounded to double precision
  7786. #
  7787. fneg_dbl:
  7788. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  7789. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  7790. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  7791. bsr.l scale_to_zero_src # calculate scale factor
  7792. cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
  7793. bge.b fneg_sd_unfl # yes; go handle underflow
  7794. cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
  7795. beq.w fneg_sd_may_ovfl # maybe; go check
  7796. blt.w fneg_sd_ovfl # yes; go handle overflow
  7797. bra.w fneg_sd_normal # no; ho handle normalized op
  7798. #
  7799. # operand WILL underflow when moved in to the fp register file
  7800. #
  7801. fneg_sd_unfl:
  7802. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  7803. eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
  7804. bpl.b fneg_sd_unfl_tst
  7805. bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
  7806. # if underflow or inexact is enabled, go calculate EXOP first.
  7807. fneg_sd_unfl_tst:
  7808. mov.b FPCR_ENABLE(%a6),%d1
  7809. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  7810. bne.b fneg_sd_unfl_ena # yes
  7811. fneg_sd_unfl_dis:
  7812. lea FP_SCR0(%a6),%a0 # pass: result addr
  7813. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  7814. bsr.l unf_res # calculate default result
  7815. or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
  7816. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  7817. rts
  7818. #
  7819. # operand will underflow AND underflow is enabled.
  7820. # therefore, we must return the result rounded to extended precision.
  7821. #
  7822. fneg_sd_unfl_ena:
  7823. mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  7824. mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  7825. mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
  7826. mov.l %d2,-(%sp) # save d2
  7827. mov.l %d1,%d2 # make a copy
  7828. andi.l &0x7fff,%d1 # strip sign
  7829. andi.w &0x8000,%d2 # keep old sign
  7830. sub.l %d0,%d1 # subtract scale factor
  7831. addi.l &0x6000,%d1 # add new bias
  7832. andi.w &0x7fff,%d1
  7833. or.w %d2,%d1 # concat new sign,new exp
  7834. mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
  7835. fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
  7836. mov.l (%sp)+,%d2 # restore d2
  7837. bra.b fneg_sd_unfl_dis
  7838. #
  7839. # operand WILL overflow.
  7840. #
  7841. fneg_sd_ovfl:
  7842. fmov.l &0x0,%fpsr # clear FPSR
  7843. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7844. fneg.x FP_SCR0(%a6),%fp0 # perform negation
  7845. fmov.l &0x0,%fpcr # clear FPCR
  7846. fmov.l %fpsr,%d1 # save FPSR
  7847. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7848. fneg_sd_ovfl_tst:
  7849. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  7850. mov.b FPCR_ENABLE(%a6),%d1
  7851. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  7852. bne.b fneg_sd_ovfl_ena # yes
  7853. #
  7854. # OVFL is not enabled; therefore, we must create the default result by
  7855. # calling ovf_res().
  7856. #
  7857. fneg_sd_ovfl_dis:
  7858. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  7859. sne %d1 # set sign param accordingly
  7860. mov.l L_SCR3(%a6),%d0 # pass: prec,mode
  7861. bsr.l ovf_res # calculate default result
  7862. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  7863. fmovm.x (%a0),&0x80 # return default result in fp0
  7864. rts
  7865. #
  7866. # OVFL is enabled.
  7867. # the INEX2 bit has already been updated by the round to the correct precision.
  7868. # now, round to extended(and don't alter the FPSR).
  7869. #
  7870. fneg_sd_ovfl_ena:
  7871. mov.l %d2,-(%sp) # save d2
  7872. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  7873. mov.l %d1,%d2 # make a copy
  7874. andi.l &0x7fff,%d1 # strip sign
  7875. andi.w &0x8000,%d2 # keep old sign
  7876. sub.l %d0,%d1 # add scale factor
  7877. subi.l &0x6000,%d1 # subtract bias
  7878. andi.w &0x7fff,%d1
  7879. or.w %d2,%d1 # concat sign,exp
  7880. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  7881. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  7882. mov.l (%sp)+,%d2 # restore d2
  7883. bra.b fneg_sd_ovfl_dis
  7884. #
  7885. # the move in MAY underflow. so...
  7886. #
  7887. fneg_sd_may_ovfl:
  7888. fmov.l &0x0,%fpsr # clear FPSR
  7889. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  7890. fneg.x FP_SCR0(%a6),%fp0 # perform negation
  7891. fmov.l %fpsr,%d1 # save status
  7892. fmov.l &0x0,%fpcr # clear FPCR
  7893. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  7894. fabs.x %fp0,%fp1 # make a copy of result
  7895. fcmp.b %fp1,&0x2 # is |result| >= 2.b?
  7896. fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
  7897. # no, it didn't overflow; we have correct result
  7898. bra.w fneg_sd_normal_exit
  7899. ##########################################################################
  7900. #
  7901. # input is not normalized; what is it?
  7902. #
  7903. fneg_not_norm:
  7904. cmpi.b %d1,&DENORM # weed out DENORM
  7905. beq.w fneg_denorm
  7906. cmpi.b %d1,&SNAN # weed out SNAN
  7907. beq.l res_snan_1op
  7908. cmpi.b %d1,&QNAN # weed out QNAN
  7909. beq.l res_qnan_1op
  7910. #
  7911. # do the fneg; at this point, only possible ops are ZERO and INF.
  7912. # use fneg to determine ccodes.
  7913. # prec:mode should be zero at this point but it won't affect answer anyways.
  7914. #
  7915. fneg.x SRC_EX(%a0),%fp0 # do fneg
  7916. fmov.l %fpsr,%d0
  7917. rol.l &0x8,%d0 # put ccodes in lo byte
  7918. mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
  7919. rts
  7920. #########################################################################
  7921. # XDEF **************************************************************** #
  7922. # ftst(): emulates the ftest instruction #
  7923. # #
  7924. # XREF **************************************************************** #
  7925. # res{s,q}nan_1op() - set NAN result for monadic instruction #
  7926. # #
  7927. # INPUT *************************************************************** #
  7928. # a0 = pointer to extended precision source operand #
  7929. # #
  7930. # OUTPUT ************************************************************** #
  7931. # none #
  7932. # #
  7933. # ALGORITHM *********************************************************** #
  7934. # Check the source operand tag (STAG) and set the FPCR according #
  7935. # to the operand type and sign. #
  7936. # #
  7937. #########################################################################
  7938. global ftst
  7939. ftst:
  7940. mov.b STAG(%a6),%d1
  7941. bne.b ftst_not_norm # optimize on non-norm input
  7942. #
  7943. # Norm:
  7944. #
  7945. ftst_norm:
  7946. tst.b SRC_EX(%a0) # is operand negative?
  7947. bmi.b ftst_norm_m # yes
  7948. rts
  7949. ftst_norm_m:
  7950. mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
  7951. rts
  7952. #
  7953. # input is not normalized; what is it?
  7954. #
  7955. ftst_not_norm:
  7956. cmpi.b %d1,&ZERO # weed out ZERO
  7957. beq.b ftst_zero
  7958. cmpi.b %d1,&INF # weed out INF
  7959. beq.b ftst_inf
  7960. cmpi.b %d1,&SNAN # weed out SNAN
  7961. beq.l res_snan_1op
  7962. cmpi.b %d1,&QNAN # weed out QNAN
  7963. beq.l res_qnan_1op
  7964. #
  7965. # Denorm:
  7966. #
  7967. ftst_denorm:
  7968. tst.b SRC_EX(%a0) # is operand negative?
  7969. bmi.b ftst_denorm_m # yes
  7970. rts
  7971. ftst_denorm_m:
  7972. mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
  7973. rts
  7974. #
  7975. # Infinity:
  7976. #
  7977. ftst_inf:
  7978. tst.b SRC_EX(%a0) # is operand negative?
  7979. bmi.b ftst_inf_m # yes
  7980. ftst_inf_p:
  7981. mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
  7982. rts
  7983. ftst_inf_m:
  7984. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
  7985. rts
  7986. #
  7987. # Zero:
  7988. #
  7989. ftst_zero:
  7990. tst.b SRC_EX(%a0) # is operand negative?
  7991. bmi.b ftst_zero_m # yes
  7992. ftst_zero_p:
  7993. mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
  7994. rts
  7995. ftst_zero_m:
  7996. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  7997. rts
  7998. #########################################################################
  7999. # XDEF **************************************************************** #
  8000. # fint(): emulates the fint instruction #
  8001. # #
  8002. # XREF **************************************************************** #
  8003. # res_{s,q}nan_1op() - set NAN result for monadic operation #
  8004. # #
  8005. # INPUT *************************************************************** #
  8006. # a0 = pointer to extended precision source operand #
  8007. # d0 = round precision/mode #
  8008. # #
  8009. # OUTPUT ************************************************************** #
  8010. # fp0 = result #
  8011. # #
  8012. # ALGORITHM *********************************************************** #
  8013. # Separate according to operand type. Unnorms don't pass through #
  8014. # here. For norms, load the rounding mode/prec, execute a "fint", then #
  8015. # store the resulting FPSR bits. #
  8016. # For denorms, force the j-bit to a one and do the same as for #
  8017. # norms. Denorms are so low that the answer will either be a zero or a #
  8018. # one. #
  8019. # For zeroes/infs/NANs, return the same while setting the FPSR #
  8020. # as appropriate. #
  8021. # #
  8022. #########################################################################
  8023. global fint
  8024. fint:
  8025. mov.b STAG(%a6),%d1
  8026. bne.b fint_not_norm # optimize on non-norm input
  8027. #
  8028. # Norm:
  8029. #
  8030. fint_norm:
  8031. andi.b &0x30,%d0 # set prec = ext
  8032. fmov.l %d0,%fpcr # set FPCR
  8033. fmov.l &0x0,%fpsr # clear FPSR
  8034. fint.x SRC(%a0),%fp0 # execute fint
  8035. fmov.l &0x0,%fpcr # clear FPCR
  8036. fmov.l %fpsr,%d0 # save FPSR
  8037. or.l %d0,USER_FPSR(%a6) # set exception bits
  8038. rts
  8039. #
  8040. # input is not normalized; what is it?
  8041. #
  8042. fint_not_norm:
  8043. cmpi.b %d1,&ZERO # weed out ZERO
  8044. beq.b fint_zero
  8045. cmpi.b %d1,&INF # weed out INF
  8046. beq.b fint_inf
  8047. cmpi.b %d1,&DENORM # weed out DENORM
  8048. beq.b fint_denorm
  8049. cmpi.b %d1,&SNAN # weed out SNAN
  8050. beq.l res_snan_1op
  8051. bra.l res_qnan_1op # weed out QNAN
  8052. #
  8053. # Denorm:
  8054. #
  8055. # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
  8056. # also, the INEX2 and AINEX exception bits will be set.
  8057. # so, we could either set these manually or force the DENORM
  8058. # to a very small NORM and ship it to the NORM routine.
  8059. # I do the latter.
  8060. #
  8061. fint_denorm:
  8062. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
  8063. mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
  8064. lea FP_SCR0(%a6),%a0
  8065. bra.b fint_norm
  8066. #
  8067. # Zero:
  8068. #
  8069. fint_zero:
  8070. tst.b SRC_EX(%a0) # is ZERO negative?
  8071. bmi.b fint_zero_m # yes
  8072. fint_zero_p:
  8073. fmov.s &0x00000000,%fp0 # return +ZERO in fp0
  8074. mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  8075. rts
  8076. fint_zero_m:
  8077. fmov.s &0x80000000,%fp0 # return -ZERO in fp0
  8078. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  8079. rts
  8080. #
  8081. # Infinity:
  8082. #
  8083. fint_inf:
  8084. fmovm.x SRC(%a0),&0x80 # return result in fp0
  8085. tst.b SRC_EX(%a0) # is INF negative?
  8086. bmi.b fint_inf_m # yes
  8087. fint_inf_p:
  8088. mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
  8089. rts
  8090. fint_inf_m:
  8091. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
  8092. rts
  8093. #########################################################################
  8094. # XDEF **************************************************************** #
  8095. # fintrz(): emulates the fintrz instruction #
  8096. # #
  8097. # XREF **************************************************************** #
  8098. # res_{s,q}nan_1op() - set NAN result for monadic operation #
  8099. # #
  8100. # INPUT *************************************************************** #
  8101. # a0 = pointer to extended precision source operand #
  8102. # d0 = round precision/mode #
  8103. # #
  8104. # OUTPUT ************************************************************** #
  8105. # fp0 = result #
  8106. # #
  8107. # ALGORITHM *********************************************************** #
  8108. # Separate according to operand type. Unnorms don't pass through #
  8109. # here. For norms, load the rounding mode/prec, execute a "fintrz", #
  8110. # then store the resulting FPSR bits. #
  8111. # For denorms, force the j-bit to a one and do the same as for #
  8112. # norms. Denorms are so low that the answer will either be a zero or a #
  8113. # one. #
  8114. # For zeroes/infs/NANs, return the same while setting the FPSR #
  8115. # as appropriate. #
  8116. # #
  8117. #########################################################################
  8118. global fintrz
  8119. fintrz:
  8120. mov.b STAG(%a6),%d1
  8121. bne.b fintrz_not_norm # optimize on non-norm input
  8122. #
  8123. # Norm:
  8124. #
  8125. fintrz_norm:
  8126. fmov.l &0x0,%fpsr # clear FPSR
  8127. fintrz.x SRC(%a0),%fp0 # execute fintrz
  8128. fmov.l %fpsr,%d0 # save FPSR
  8129. or.l %d0,USER_FPSR(%a6) # set exception bits
  8130. rts
  8131. #
  8132. # input is not normalized; what is it?
  8133. #
  8134. fintrz_not_norm:
  8135. cmpi.b %d1,&ZERO # weed out ZERO
  8136. beq.b fintrz_zero
  8137. cmpi.b %d1,&INF # weed out INF
  8138. beq.b fintrz_inf
  8139. cmpi.b %d1,&DENORM # weed out DENORM
  8140. beq.b fintrz_denorm
  8141. cmpi.b %d1,&SNAN # weed out SNAN
  8142. beq.l res_snan_1op
  8143. bra.l res_qnan_1op # weed out QNAN
  8144. #
  8145. # Denorm:
  8146. #
  8147. # for DENORMs, the result will be (+/-)ZERO.
  8148. # also, the INEX2 and AINEX exception bits will be set.
  8149. # so, we could either set these manually or force the DENORM
  8150. # to a very small NORM and ship it to the NORM routine.
  8151. # I do the latter.
  8152. #
  8153. fintrz_denorm:
  8154. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
  8155. mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
  8156. lea FP_SCR0(%a6),%a0
  8157. bra.b fintrz_norm
  8158. #
  8159. # Zero:
  8160. #
  8161. fintrz_zero:
  8162. tst.b SRC_EX(%a0) # is ZERO negative?
  8163. bmi.b fintrz_zero_m # yes
  8164. fintrz_zero_p:
  8165. fmov.s &0x00000000,%fp0 # return +ZERO in fp0
  8166. mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  8167. rts
  8168. fintrz_zero_m:
  8169. fmov.s &0x80000000,%fp0 # return -ZERO in fp0
  8170. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  8171. rts
  8172. #
  8173. # Infinity:
  8174. #
  8175. fintrz_inf:
  8176. fmovm.x SRC(%a0),&0x80 # return result in fp0
  8177. tst.b SRC_EX(%a0) # is INF negative?
  8178. bmi.b fintrz_inf_m # yes
  8179. fintrz_inf_p:
  8180. mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
  8181. rts
  8182. fintrz_inf_m:
  8183. mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
  8184. rts
  8185. #########################################################################
  8186. # XDEF **************************************************************** #
  8187. # fabs(): emulates the fabs instruction #
  8188. # fsabs(): emulates the fsabs instruction #
  8189. # fdabs(): emulates the fdabs instruction #
  8190. # #
  8191. # XREF **************************************************************** #
  8192. # norm() - normalize denorm mantissa to provide EXOP #
  8193. # scale_to_zero_src() - make exponent. = 0; get scale factor #
  8194. # unf_res() - calculate underflow result #
  8195. # ovf_res() - calculate overflow result #
  8196. # res_{s,q}nan_1op() - set NAN result for monadic operation #
  8197. # #
  8198. # INPUT *************************************************************** #
  8199. # a0 = pointer to extended precision source operand #
  8200. # d0 = rnd precision/mode #
  8201. # #
  8202. # OUTPUT ************************************************************** #
  8203. # fp0 = result #
  8204. # fp1 = EXOP (if exception occurred) #
  8205. # #
  8206. # ALGORITHM *********************************************************** #
  8207. # Handle NANs, infinities, and zeroes as special cases. Divide #
  8208. # norms into extended, single, and double precision. #
  8209. # Simply clear sign for extended precision norm. Ext prec denorm #
  8210. # gets an EXOP created for it since it's an underflow. #
  8211. # Double and single precision can overflow and underflow. First, #
  8212. # scale the operand such that the exponent is zero. Perform an "fabs" #
  8213. # using the correct rnd mode/prec. Check to see if the original #
  8214. # exponent would take an exception. If so, use unf_res() or ovf_res() #
  8215. # to calculate the default result. Also, create the EXOP for the #
  8216. # exceptional case. If no exception should occur, insert the correct #
  8217. # result exponent and return. #
  8218. # Unnorms don't pass through here. #
  8219. # #
  8220. #########################################################################
  8221. global fsabs
  8222. fsabs:
  8223. andi.b &0x30,%d0 # clear rnd prec
  8224. ori.b &s_mode*0x10,%d0 # insert sgl precision
  8225. bra.b fabs
  8226. global fdabs
  8227. fdabs:
  8228. andi.b &0x30,%d0 # clear rnd prec
  8229. ori.b &d_mode*0x10,%d0 # insert dbl precision
  8230. global fabs
  8231. fabs:
  8232. mov.l %d0,L_SCR3(%a6) # store rnd info
  8233. mov.b STAG(%a6),%d1
  8234. bne.w fabs_not_norm # optimize on non-norm input
  8235. #
  8236. # ABSOLUTE VALUE: norms and denorms ONLY!
  8237. #
  8238. fabs_norm:
  8239. andi.b &0xc0,%d0 # is precision extended?
  8240. bne.b fabs_not_ext # no; go handle sgl or dbl
  8241. #
  8242. # precision selected is extended. so...we can not get an underflow
  8243. # or overflow because of rounding to the correct precision. so...
  8244. # skip the scaling and unscaling...
  8245. #
  8246. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8247. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8248. mov.w SRC_EX(%a0),%d1
  8249. bclr &15,%d1 # force absolute value
  8250. mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
  8251. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  8252. rts
  8253. #
  8254. # for an extended precision DENORM, the UNFL exception bit is set
  8255. # the accrued bit is NOT set in this instance(no inexactness!)
  8256. #
  8257. fabs_denorm:
  8258. andi.b &0xc0,%d0 # is precision extended?
  8259. bne.b fabs_not_ext # no
  8260. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  8261. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8262. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8263. mov.w SRC_EX(%a0),%d0
  8264. bclr &15,%d0 # clear sign
  8265. mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
  8266. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  8267. btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  8268. bne.b fabs_ext_unfl_ena
  8269. rts
  8270. #
  8271. # the input is an extended DENORM and underflow is enabled in the FPCR.
  8272. # normalize the mantissa and add the bias of 0x6000 to the resulting negative
  8273. # exponent and insert back into the operand.
  8274. #
  8275. fabs_ext_unfl_ena:
  8276. lea FP_SCR0(%a6),%a0 # pass: ptr to operand
  8277. bsr.l norm # normalize result
  8278. neg.w %d0 # new exponent = -(shft val)
  8279. addi.w &0x6000,%d0 # add new bias to exponent
  8280. mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
  8281. andi.w &0x8000,%d1 # keep old sign
  8282. andi.w &0x7fff,%d0 # clear sign position
  8283. or.w %d1,%d0 # concat old sign, new exponent
  8284. mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
  8285. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  8286. rts
  8287. #
  8288. # operand is either single or double
  8289. #
  8290. fabs_not_ext:
  8291. cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
  8292. bne.b fabs_dbl
  8293. #
  8294. # operand is to be rounded to single precision
  8295. #
  8296. fabs_sgl:
  8297. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8298. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8299. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8300. bsr.l scale_to_zero_src # calculate scale factor
  8301. cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
  8302. bge.w fabs_sd_unfl # yes; go handle underflow
  8303. cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
  8304. beq.w fabs_sd_may_ovfl # maybe; go check
  8305. blt.w fabs_sd_ovfl # yes; go handle overflow
  8306. #
  8307. # operand will NOT overflow or underflow when moved in to the fp reg file
  8308. #
  8309. fabs_sd_normal:
  8310. fmov.l &0x0,%fpsr # clear FPSR
  8311. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8312. fabs.x FP_SCR0(%a6),%fp0 # perform absolute
  8313. fmov.l %fpsr,%d1 # save FPSR
  8314. fmov.l &0x0,%fpcr # clear FPCR
  8315. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8316. fabs_sd_normal_exit:
  8317. mov.l %d2,-(%sp) # save d2
  8318. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  8319. mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
  8320. mov.l %d1,%d2 # make a copy
  8321. andi.l &0x7fff,%d1 # strip sign
  8322. sub.l %d0,%d1 # add scale factor
  8323. andi.w &0x8000,%d2 # keep old sign
  8324. or.w %d1,%d2 # concat old sign,new exp
  8325. mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
  8326. mov.l (%sp)+,%d2 # restore d2
  8327. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  8328. rts
  8329. #
  8330. # operand is to be rounded to double precision
  8331. #
  8332. fabs_dbl:
  8333. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8334. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8335. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8336. bsr.l scale_to_zero_src # calculate scale factor
  8337. cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
  8338. bge.b fabs_sd_unfl # yes; go handle underflow
  8339. cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
  8340. beq.w fabs_sd_may_ovfl # maybe; go check
  8341. blt.w fabs_sd_ovfl # yes; go handle overflow
  8342. bra.w fabs_sd_normal # no; ho handle normalized op
  8343. #
  8344. # operand WILL underflow when moved in to the fp register file
  8345. #
  8346. fabs_sd_unfl:
  8347. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  8348. bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
  8349. # if underflow or inexact is enabled, go calculate EXOP first.
  8350. mov.b FPCR_ENABLE(%a6),%d1
  8351. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  8352. bne.b fabs_sd_unfl_ena # yes
  8353. fabs_sd_unfl_dis:
  8354. lea FP_SCR0(%a6),%a0 # pass: result addr
  8355. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  8356. bsr.l unf_res # calculate default result
  8357. or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
  8358. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  8359. rts
  8360. #
  8361. # operand will underflow AND underflow is enabled.
  8362. # therefore, we must return the result rounded to extended precision.
  8363. #
  8364. fabs_sd_unfl_ena:
  8365. mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  8366. mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  8367. mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
  8368. mov.l %d2,-(%sp) # save d2
  8369. mov.l %d1,%d2 # make a copy
  8370. andi.l &0x7fff,%d1 # strip sign
  8371. andi.w &0x8000,%d2 # keep old sign
  8372. sub.l %d0,%d1 # subtract scale factor
  8373. addi.l &0x6000,%d1 # add new bias
  8374. andi.w &0x7fff,%d1
  8375. or.w %d2,%d1 # concat new sign,new exp
  8376. mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
  8377. fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
  8378. mov.l (%sp)+,%d2 # restore d2
  8379. bra.b fabs_sd_unfl_dis
  8380. #
  8381. # operand WILL overflow.
  8382. #
  8383. fabs_sd_ovfl:
  8384. fmov.l &0x0,%fpsr # clear FPSR
  8385. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8386. fabs.x FP_SCR0(%a6),%fp0 # perform absolute
  8387. fmov.l &0x0,%fpcr # clear FPCR
  8388. fmov.l %fpsr,%d1 # save FPSR
  8389. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8390. fabs_sd_ovfl_tst:
  8391. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  8392. mov.b FPCR_ENABLE(%a6),%d1
  8393. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  8394. bne.b fabs_sd_ovfl_ena # yes
  8395. #
  8396. # OVFL is not enabled; therefore, we must create the default result by
  8397. # calling ovf_res().
  8398. #
  8399. fabs_sd_ovfl_dis:
  8400. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  8401. sne %d1 # set sign param accordingly
  8402. mov.l L_SCR3(%a6),%d0 # pass: prec,mode
  8403. bsr.l ovf_res # calculate default result
  8404. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  8405. fmovm.x (%a0),&0x80 # return default result in fp0
  8406. rts
  8407. #
  8408. # OVFL is enabled.
  8409. # the INEX2 bit has already been updated by the round to the correct precision.
  8410. # now, round to extended(and don't alter the FPSR).
  8411. #
  8412. fabs_sd_ovfl_ena:
  8413. mov.l %d2,-(%sp) # save d2
  8414. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  8415. mov.l %d1,%d2 # make a copy
  8416. andi.l &0x7fff,%d1 # strip sign
  8417. andi.w &0x8000,%d2 # keep old sign
  8418. sub.l %d0,%d1 # add scale factor
  8419. subi.l &0x6000,%d1 # subtract bias
  8420. andi.w &0x7fff,%d1
  8421. or.w %d2,%d1 # concat sign,exp
  8422. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  8423. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  8424. mov.l (%sp)+,%d2 # restore d2
  8425. bra.b fabs_sd_ovfl_dis
  8426. #
  8427. # the move in MAY underflow. so...
  8428. #
  8429. fabs_sd_may_ovfl:
  8430. fmov.l &0x0,%fpsr # clear FPSR
  8431. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8432. fabs.x FP_SCR0(%a6),%fp0 # perform absolute
  8433. fmov.l %fpsr,%d1 # save status
  8434. fmov.l &0x0,%fpcr # clear FPCR
  8435. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8436. fabs.x %fp0,%fp1 # make a copy of result
  8437. fcmp.b %fp1,&0x2 # is |result| >= 2.b?
  8438. fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
  8439. # no, it didn't overflow; we have correct result
  8440. bra.w fabs_sd_normal_exit
  8441. ##########################################################################
  8442. #
  8443. # input is not normalized; what is it?
  8444. #
  8445. fabs_not_norm:
  8446. cmpi.b %d1,&DENORM # weed out DENORM
  8447. beq.w fabs_denorm
  8448. cmpi.b %d1,&SNAN # weed out SNAN
  8449. beq.l res_snan_1op
  8450. cmpi.b %d1,&QNAN # weed out QNAN
  8451. beq.l res_qnan_1op
  8452. fabs.x SRC(%a0),%fp0 # force absolute value
  8453. cmpi.b %d1,&INF # weed out INF
  8454. beq.b fabs_inf
  8455. fabs_zero:
  8456. mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  8457. rts
  8458. fabs_inf:
  8459. mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
  8460. rts
  8461. #########################################################################
  8462. # XDEF **************************************************************** #
  8463. # fcmp(): fp compare op routine #
  8464. # #
  8465. # XREF **************************************************************** #
  8466. # res_qnan() - return QNAN result #
  8467. # res_snan() - return SNAN result #
  8468. # #
  8469. # INPUT *************************************************************** #
  8470. # a0 = pointer to extended precision source operand #
  8471. # a1 = pointer to extended precision destination operand #
  8472. # d0 = round prec/mode #
  8473. # #
  8474. # OUTPUT ************************************************************** #
  8475. # None #
  8476. # #
  8477. # ALGORITHM *********************************************************** #
  8478. # Handle NANs and denorms as special cases. For everything else, #
  8479. # just use the actual fcmp instruction to produce the correct condition #
  8480. # codes. #
  8481. # #
  8482. #########################################################################
  8483. global fcmp
  8484. fcmp:
  8485. clr.w %d1
  8486. mov.b DTAG(%a6),%d1
  8487. lsl.b &0x3,%d1
  8488. or.b STAG(%a6),%d1
  8489. bne.b fcmp_not_norm # optimize on non-norm input
  8490. #
  8491. # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
  8492. #
  8493. fcmp_norm:
  8494. fmovm.x DST(%a1),&0x80 # load dst op
  8495. fcmp.x %fp0,SRC(%a0) # do compare
  8496. fmov.l %fpsr,%d0 # save FPSR
  8497. rol.l &0x8,%d0 # extract ccode bits
  8498. mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
  8499. rts
  8500. #
  8501. # fcmp: inputs are not both normalized; what are they?
  8502. #
  8503. fcmp_not_norm:
  8504. mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
  8505. jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
  8506. swbeg &48
  8507. tbl_fcmp_op:
  8508. short fcmp_norm - tbl_fcmp_op # NORM - NORM
  8509. short fcmp_norm - tbl_fcmp_op # NORM - ZERO
  8510. short fcmp_norm - tbl_fcmp_op # NORM - INF
  8511. short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
  8512. short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
  8513. short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
  8514. short tbl_fcmp_op - tbl_fcmp_op #
  8515. short tbl_fcmp_op - tbl_fcmp_op #
  8516. short fcmp_norm - tbl_fcmp_op # ZERO - NORM
  8517. short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
  8518. short fcmp_norm - tbl_fcmp_op # ZERO - INF
  8519. short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
  8520. short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
  8521. short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
  8522. short tbl_fcmp_op - tbl_fcmp_op #
  8523. short tbl_fcmp_op - tbl_fcmp_op #
  8524. short fcmp_norm - tbl_fcmp_op # INF - NORM
  8525. short fcmp_norm - tbl_fcmp_op # INF - ZERO
  8526. short fcmp_norm - tbl_fcmp_op # INF - INF
  8527. short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
  8528. short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
  8529. short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
  8530. short tbl_fcmp_op - tbl_fcmp_op #
  8531. short tbl_fcmp_op - tbl_fcmp_op #
  8532. short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
  8533. short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
  8534. short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
  8535. short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
  8536. short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
  8537. short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
  8538. short tbl_fcmp_op - tbl_fcmp_op #
  8539. short tbl_fcmp_op - tbl_fcmp_op #
  8540. short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
  8541. short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
  8542. short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
  8543. short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
  8544. short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
  8545. short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
  8546. short tbl_fcmp_op - tbl_fcmp_op #
  8547. short tbl_fcmp_op - tbl_fcmp_op #
  8548. short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
  8549. short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
  8550. short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
  8551. short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
  8552. short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
  8553. short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
  8554. short tbl_fcmp_op - tbl_fcmp_op #
  8555. short tbl_fcmp_op - tbl_fcmp_op #
  8556. # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
  8557. # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
  8558. fcmp_res_qnan:
  8559. bsr.l res_qnan
  8560. andi.b &0xf7,FPSR_CC(%a6)
  8561. rts
  8562. fcmp_res_snan:
  8563. bsr.l res_snan
  8564. andi.b &0xf7,FPSR_CC(%a6)
  8565. rts
  8566. #
  8567. # DENORMs are a little more difficult.
  8568. # If you have a 2 DENORMs, then you can just force the j-bit to a one
  8569. # and use the fcmp_norm routine.
  8570. # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
  8571. # and use the fcmp_norm routine.
  8572. # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
  8573. # But with a DENORM and a NORM of the same sign, the neg bit is set if the
  8574. # (1) signs are (+) and the DENORM is the dst or
  8575. # (2) signs are (-) and the DENORM is the src
  8576. #
  8577. fcmp_dnrm_s:
  8578. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8579. mov.l SRC_HI(%a0),%d0
  8580. bset &31,%d0 # DENORM src; make into small norm
  8581. mov.l %d0,FP_SCR0_HI(%a6)
  8582. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8583. lea FP_SCR0(%a6),%a0
  8584. bra.w fcmp_norm
  8585. fcmp_dnrm_d:
  8586. mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
  8587. mov.l DST_HI(%a1),%d0
  8588. bset &31,%d0 # DENORM src; make into small norm
  8589. mov.l %d0,FP_SCR0_HI(%a6)
  8590. mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
  8591. lea FP_SCR0(%a6),%a1
  8592. bra.w fcmp_norm
  8593. fcmp_dnrm_sd:
  8594. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  8595. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8596. mov.l DST_HI(%a1),%d0
  8597. bset &31,%d0 # DENORM dst; make into small norm
  8598. mov.l %d0,FP_SCR1_HI(%a6)
  8599. mov.l SRC_HI(%a0),%d0
  8600. bset &31,%d0 # DENORM dst; make into small norm
  8601. mov.l %d0,FP_SCR0_HI(%a6)
  8602. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  8603. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8604. lea FP_SCR1(%a6),%a1
  8605. lea FP_SCR0(%a6),%a0
  8606. bra.w fcmp_norm
  8607. fcmp_nrm_dnrm:
  8608. mov.b SRC_EX(%a0),%d0 # determine if like signs
  8609. mov.b DST_EX(%a1),%d1
  8610. eor.b %d0,%d1
  8611. bmi.w fcmp_dnrm_s
  8612. # signs are the same, so must determine the answer ourselves.
  8613. tst.b %d0 # is src op negative?
  8614. bmi.b fcmp_nrm_dnrm_m # yes
  8615. rts
  8616. fcmp_nrm_dnrm_m:
  8617. mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  8618. rts
  8619. fcmp_dnrm_nrm:
  8620. mov.b SRC_EX(%a0),%d0 # determine if like signs
  8621. mov.b DST_EX(%a1),%d1
  8622. eor.b %d0,%d1
  8623. bmi.w fcmp_dnrm_d
  8624. # signs are the same, so must determine the answer ourselves.
  8625. tst.b %d0 # is src op negative?
  8626. bpl.b fcmp_dnrm_nrm_m # no
  8627. rts
  8628. fcmp_dnrm_nrm_m:
  8629. mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  8630. rts
  8631. #########################################################################
  8632. # XDEF **************************************************************** #
  8633. # fsglmul(): emulates the fsglmul instruction #
  8634. # #
  8635. # XREF **************************************************************** #
  8636. # scale_to_zero_src() - scale src exponent to zero #
  8637. # scale_to_zero_dst() - scale dst exponent to zero #
  8638. # unf_res4() - return default underflow result for sglop #
  8639. # ovf_res() - return default overflow result #
  8640. # res_qnan() - return QNAN result #
  8641. # res_snan() - return SNAN result #
  8642. # #
  8643. # INPUT *************************************************************** #
  8644. # a0 = pointer to extended precision source operand #
  8645. # a1 = pointer to extended precision destination operand #
  8646. # d0 rnd prec,mode #
  8647. # #
  8648. # OUTPUT ************************************************************** #
  8649. # fp0 = result #
  8650. # fp1 = EXOP (if exception occurred) #
  8651. # #
  8652. # ALGORITHM *********************************************************** #
  8653. # Handle NANs, infinities, and zeroes as special cases. Divide #
  8654. # norms/denorms into ext/sgl/dbl precision. #
  8655. # For norms/denorms, scale the exponents such that a multiply #
  8656. # instruction won't cause an exception. Use the regular fsglmul to #
  8657. # compute a result. Check if the regular operands would have taken #
  8658. # an exception. If so, return the default overflow/underflow result #
  8659. # and return the EXOP if exceptions are enabled. Else, scale the #
  8660. # result operand to the proper exponent. #
  8661. # #
  8662. #########################################################################
  8663. global fsglmul
  8664. fsglmul:
  8665. mov.l %d0,L_SCR3(%a6) # store rnd info
  8666. clr.w %d1
  8667. mov.b DTAG(%a6),%d1
  8668. lsl.b &0x3,%d1
  8669. or.b STAG(%a6),%d1
  8670. bne.w fsglmul_not_norm # optimize on non-norm input
  8671. fsglmul_norm:
  8672. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  8673. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  8674. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  8675. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8676. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8677. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8678. bsr.l scale_to_zero_src # scale exponent
  8679. mov.l %d0,-(%sp) # save scale factor 1
  8680. bsr.l scale_to_zero_dst # scale dst exponent
  8681. add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
  8682. cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
  8683. beq.w fsglmul_may_ovfl # result may rnd to overflow
  8684. blt.w fsglmul_ovfl # result will overflow
  8685. cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
  8686. beq.w fsglmul_may_unfl # result may rnd to no unfl
  8687. bgt.w fsglmul_unfl # result will underflow
  8688. fsglmul_normal:
  8689. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8690. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8691. fmov.l &0x0,%fpsr # clear FPSR
  8692. fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
  8693. fmov.l %fpsr,%d1 # save status
  8694. fmov.l &0x0,%fpcr # clear FPCR
  8695. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8696. fsglmul_normal_exit:
  8697. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  8698. mov.l %d2,-(%sp) # save d2
  8699. mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
  8700. mov.l %d1,%d2 # make a copy
  8701. andi.l &0x7fff,%d1 # strip sign
  8702. andi.w &0x8000,%d2 # keep old sign
  8703. sub.l %d0,%d1 # add scale factor
  8704. or.w %d2,%d1 # concat old sign,new exp
  8705. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  8706. mov.l (%sp)+,%d2 # restore d2
  8707. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  8708. rts
  8709. fsglmul_ovfl:
  8710. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8711. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8712. fmov.l &0x0,%fpsr # clear FPSR
  8713. fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
  8714. fmov.l %fpsr,%d1 # save status
  8715. fmov.l &0x0,%fpcr # clear FPCR
  8716. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8717. fsglmul_ovfl_tst:
  8718. # save setting this until now because this is where fsglmul_may_ovfl may jump in
  8719. or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
  8720. mov.b FPCR_ENABLE(%a6),%d1
  8721. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  8722. bne.b fsglmul_ovfl_ena # yes
  8723. fsglmul_ovfl_dis:
  8724. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  8725. sne %d1 # set sign param accordingly
  8726. mov.l L_SCR3(%a6),%d0 # pass prec:rnd
  8727. andi.b &0x30,%d0 # force prec = ext
  8728. bsr.l ovf_res # calculate default result
  8729. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  8730. fmovm.x (%a0),&0x80 # return default result in fp0
  8731. rts
  8732. fsglmul_ovfl_ena:
  8733. fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
  8734. mov.l %d2,-(%sp) # save d2
  8735. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  8736. mov.l %d1,%d2 # make a copy
  8737. andi.l &0x7fff,%d1 # strip sign
  8738. sub.l %d0,%d1 # add scale factor
  8739. subi.l &0x6000,%d1 # subtract bias
  8740. andi.w &0x7fff,%d1
  8741. andi.w &0x8000,%d2 # keep old sign
  8742. or.w %d2,%d1 # concat old sign,new exp
  8743. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  8744. mov.l (%sp)+,%d2 # restore d2
  8745. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  8746. bra.b fsglmul_ovfl_dis
  8747. fsglmul_may_ovfl:
  8748. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8749. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8750. fmov.l &0x0,%fpsr # clear FPSR
  8751. fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
  8752. fmov.l %fpsr,%d1 # save status
  8753. fmov.l &0x0,%fpcr # clear FPCR
  8754. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8755. fabs.x %fp0,%fp1 # make a copy of result
  8756. fcmp.b %fp1,&0x2 # is |result| >= 2.b?
  8757. fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
  8758. # no, it didn't overflow; we have correct result
  8759. bra.w fsglmul_normal_exit
  8760. fsglmul_unfl:
  8761. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  8762. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8763. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  8764. fmov.l &0x0,%fpsr # clear FPSR
  8765. fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
  8766. fmov.l %fpsr,%d1 # save status
  8767. fmov.l &0x0,%fpcr # clear FPCR
  8768. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8769. mov.b FPCR_ENABLE(%a6),%d1
  8770. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  8771. bne.b fsglmul_unfl_ena # yes
  8772. fsglmul_unfl_dis:
  8773. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  8774. lea FP_SCR0(%a6),%a0 # pass: result addr
  8775. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  8776. bsr.l unf_res4 # calculate default result
  8777. or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
  8778. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  8779. rts
  8780. #
  8781. # UNFL is enabled.
  8782. #
  8783. fsglmul_unfl_ena:
  8784. fmovm.x FP_SCR1(%a6),&0x40 # load dst op
  8785. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8786. fmov.l &0x0,%fpsr # clear FPSR
  8787. fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
  8788. fmov.l &0x0,%fpcr # clear FPCR
  8789. fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
  8790. mov.l %d2,-(%sp) # save d2
  8791. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  8792. mov.l %d1,%d2 # make a copy
  8793. andi.l &0x7fff,%d1 # strip sign
  8794. andi.w &0x8000,%d2 # keep old sign
  8795. sub.l %d0,%d1 # add scale factor
  8796. addi.l &0x6000,%d1 # add bias
  8797. andi.w &0x7fff,%d1
  8798. or.w %d2,%d1 # concat old sign,new exp
  8799. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  8800. mov.l (%sp)+,%d2 # restore d2
  8801. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  8802. bra.w fsglmul_unfl_dis
  8803. fsglmul_may_unfl:
  8804. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8805. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8806. fmov.l &0x0,%fpsr # clear FPSR
  8807. fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
  8808. fmov.l %fpsr,%d1 # save status
  8809. fmov.l &0x0,%fpcr # clear FPCR
  8810. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8811. fabs.x %fp0,%fp1 # make a copy of result
  8812. fcmp.b %fp1,&0x2 # is |result| > 2.b?
  8813. fbgt.w fsglmul_normal_exit # no; no underflow occurred
  8814. fblt.w fsglmul_unfl # yes; underflow occurred
  8815. #
  8816. # we still don't know if underflow occurred. result is ~ equal to 2. but,
  8817. # we don't know if the result was an underflow that rounded up to a 2 or
  8818. # a normalized number that rounded down to a 2. so, redo the entire operation
  8819. # using RZ as the rounding mode to see what the pre-rounded result is.
  8820. # this case should be relatively rare.
  8821. #
  8822. fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
  8823. mov.l L_SCR3(%a6),%d1
  8824. andi.b &0xc0,%d1 # keep rnd prec
  8825. ori.b &rz_mode*0x10,%d1 # insert RZ
  8826. fmov.l %d1,%fpcr # set FPCR
  8827. fmov.l &0x0,%fpsr # clear FPSR
  8828. fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
  8829. fmov.l &0x0,%fpcr # clear FPCR
  8830. fabs.x %fp1 # make absolute value
  8831. fcmp.b %fp1,&0x2 # is |result| < 2.b?
  8832. fbge.w fsglmul_normal_exit # no; no underflow occurred
  8833. bra.w fsglmul_unfl # yes, underflow occurred
  8834. ##############################################################################
  8835. #
  8836. # Single Precision Multiply: inputs are not both normalized; what are they?
  8837. #
  8838. fsglmul_not_norm:
  8839. mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
  8840. jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
  8841. swbeg &48
  8842. tbl_fsglmul_op:
  8843. short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
  8844. short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
  8845. short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
  8846. short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
  8847. short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
  8848. short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
  8849. short tbl_fsglmul_op - tbl_fsglmul_op #
  8850. short tbl_fsglmul_op - tbl_fsglmul_op #
  8851. short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
  8852. short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
  8853. short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
  8854. short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
  8855. short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
  8856. short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
  8857. short tbl_fsglmul_op - tbl_fsglmul_op #
  8858. short tbl_fsglmul_op - tbl_fsglmul_op #
  8859. short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
  8860. short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
  8861. short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
  8862. short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
  8863. short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
  8864. short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
  8865. short tbl_fsglmul_op - tbl_fsglmul_op #
  8866. short tbl_fsglmul_op - tbl_fsglmul_op #
  8867. short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
  8868. short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
  8869. short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
  8870. short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
  8871. short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
  8872. short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
  8873. short tbl_fsglmul_op - tbl_fsglmul_op #
  8874. short tbl_fsglmul_op - tbl_fsglmul_op #
  8875. short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
  8876. short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
  8877. short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
  8878. short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
  8879. short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
  8880. short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
  8881. short tbl_fsglmul_op - tbl_fsglmul_op #
  8882. short tbl_fsglmul_op - tbl_fsglmul_op #
  8883. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
  8884. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
  8885. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
  8886. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
  8887. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
  8888. short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
  8889. short tbl_fsglmul_op - tbl_fsglmul_op #
  8890. short tbl_fsglmul_op - tbl_fsglmul_op #
  8891. fsglmul_res_operr:
  8892. bra.l res_operr
  8893. fsglmul_res_snan:
  8894. bra.l res_snan
  8895. fsglmul_res_qnan:
  8896. bra.l res_qnan
  8897. fsglmul_zero:
  8898. bra.l fmul_zero
  8899. fsglmul_inf_src:
  8900. bra.l fmul_inf_src
  8901. fsglmul_inf_dst:
  8902. bra.l fmul_inf_dst
  8903. #########################################################################
  8904. # XDEF **************************************************************** #
  8905. # fsgldiv(): emulates the fsgldiv instruction #
  8906. # #
  8907. # XREF **************************************************************** #
  8908. # scale_to_zero_src() - scale src exponent to zero #
  8909. # scale_to_zero_dst() - scale dst exponent to zero #
  8910. # unf_res4() - return default underflow result for sglop #
  8911. # ovf_res() - return default overflow result #
  8912. # res_qnan() - return QNAN result #
  8913. # res_snan() - return SNAN result #
  8914. # #
  8915. # INPUT *************************************************************** #
  8916. # a0 = pointer to extended precision source operand #
  8917. # a1 = pointer to extended precision destination operand #
  8918. # d0 rnd prec,mode #
  8919. # #
  8920. # OUTPUT ************************************************************** #
  8921. # fp0 = result #
  8922. # fp1 = EXOP (if exception occurred) #
  8923. # #
  8924. # ALGORITHM *********************************************************** #
  8925. # Handle NANs, infinities, and zeroes as special cases. Divide #
  8926. # norms/denorms into ext/sgl/dbl precision. #
  8927. # For norms/denorms, scale the exponents such that a divide #
  8928. # instruction won't cause an exception. Use the regular fsgldiv to #
  8929. # compute a result. Check if the regular operands would have taken #
  8930. # an exception. If so, return the default overflow/underflow result #
  8931. # and return the EXOP if exceptions are enabled. Else, scale the #
  8932. # result operand to the proper exponent. #
  8933. # #
  8934. #########################################################################
  8935. global fsgldiv
  8936. fsgldiv:
  8937. mov.l %d0,L_SCR3(%a6) # store rnd info
  8938. clr.w %d1
  8939. mov.b DTAG(%a6),%d1
  8940. lsl.b &0x3,%d1
  8941. or.b STAG(%a6),%d1 # combine src tags
  8942. bne.w fsgldiv_not_norm # optimize on non-norm input
  8943. #
  8944. # DIVIDE: NORMs and DENORMs ONLY!
  8945. #
  8946. fsgldiv_norm:
  8947. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  8948. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  8949. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  8950. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  8951. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  8952. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  8953. bsr.l scale_to_zero_src # calculate scale factor 1
  8954. mov.l %d0,-(%sp) # save scale factor 1
  8955. bsr.l scale_to_zero_dst # calculate scale factor 2
  8956. neg.l (%sp) # S.F. = scale1 - scale2
  8957. add.l %d0,(%sp)
  8958. mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
  8959. lsr.b &0x6,%d1
  8960. mov.l (%sp)+,%d0
  8961. cmpi.l %d0,&0x3fff-0x7ffe
  8962. ble.w fsgldiv_may_ovfl
  8963. cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
  8964. beq.w fsgldiv_may_unfl # maybe
  8965. bgt.w fsgldiv_unfl # yes; go handle underflow
  8966. fsgldiv_normal:
  8967. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8968. fmov.l L_SCR3(%a6),%fpcr # save FPCR
  8969. fmov.l &0x0,%fpsr # clear FPSR
  8970. fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
  8971. fmov.l %fpsr,%d1 # save FPSR
  8972. fmov.l &0x0,%fpcr # clear FPCR
  8973. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  8974. fsgldiv_normal_exit:
  8975. fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
  8976. mov.l %d2,-(%sp) # save d2
  8977. mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
  8978. mov.l %d1,%d2 # make a copy
  8979. andi.l &0x7fff,%d1 # strip sign
  8980. andi.w &0x8000,%d2 # keep old sign
  8981. sub.l %d0,%d1 # add scale factor
  8982. or.w %d2,%d1 # concat old sign,new exp
  8983. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  8984. mov.l (%sp)+,%d2 # restore d2
  8985. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  8986. rts
  8987. fsgldiv_may_ovfl:
  8988. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  8989. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  8990. fmov.l &0x0,%fpsr # set FPSR
  8991. fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
  8992. fmov.l %fpsr,%d1
  8993. fmov.l &0x0,%fpcr
  8994. or.l %d1,USER_FPSR(%a6) # save INEX,N
  8995. fmovm.x &0x01,-(%sp) # save result to stack
  8996. mov.w (%sp),%d1 # fetch new exponent
  8997. add.l &0xc,%sp # clear result
  8998. andi.l &0x7fff,%d1 # strip sign
  8999. sub.l %d0,%d1 # add scale factor
  9000. cmp.l %d1,&0x7fff # did divide overflow?
  9001. blt.b fsgldiv_normal_exit
  9002. fsgldiv_ovfl_tst:
  9003. or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  9004. mov.b FPCR_ENABLE(%a6),%d1
  9005. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  9006. bne.b fsgldiv_ovfl_ena # yes
  9007. fsgldiv_ovfl_dis:
  9008. btst &neg_bit,FPSR_CC(%a6) # is result negative
  9009. sne %d1 # set sign param accordingly
  9010. mov.l L_SCR3(%a6),%d0 # pass prec:rnd
  9011. andi.b &0x30,%d0 # kill precision
  9012. bsr.l ovf_res # calculate default result
  9013. or.b %d0,FPSR_CC(%a6) # set INF if applicable
  9014. fmovm.x (%a0),&0x80 # return default result in fp0
  9015. rts
  9016. fsgldiv_ovfl_ena:
  9017. fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
  9018. mov.l %d2,-(%sp) # save d2
  9019. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  9020. mov.l %d1,%d2 # make a copy
  9021. andi.l &0x7fff,%d1 # strip sign
  9022. andi.w &0x8000,%d2 # keep old sign
  9023. sub.l %d0,%d1 # add scale factor
  9024. subi.l &0x6000,%d1 # subtract new bias
  9025. andi.w &0x7fff,%d1 # clear ms bit
  9026. or.w %d2,%d1 # concat old sign,new exp
  9027. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  9028. mov.l (%sp)+,%d2 # restore d2
  9029. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  9030. bra.b fsgldiv_ovfl_dis
  9031. fsgldiv_unfl:
  9032. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  9033. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9034. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  9035. fmov.l &0x0,%fpsr # clear FPSR
  9036. fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
  9037. fmov.l %fpsr,%d1 # save status
  9038. fmov.l &0x0,%fpcr # clear FPCR
  9039. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  9040. mov.b FPCR_ENABLE(%a6),%d1
  9041. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  9042. bne.b fsgldiv_unfl_ena # yes
  9043. fsgldiv_unfl_dis:
  9044. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  9045. lea FP_SCR0(%a6),%a0 # pass: result addr
  9046. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  9047. bsr.l unf_res4 # calculate default result
  9048. or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
  9049. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  9050. rts
  9051. #
  9052. # UNFL is enabled.
  9053. #
  9054. fsgldiv_unfl_ena:
  9055. fmovm.x FP_SCR1(%a6),&0x40 # load dst op
  9056. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9057. fmov.l &0x0,%fpsr # clear FPSR
  9058. fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
  9059. fmov.l &0x0,%fpcr # clear FPCR
  9060. fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
  9061. mov.l %d2,-(%sp) # save d2
  9062. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  9063. mov.l %d1,%d2 # make a copy
  9064. andi.l &0x7fff,%d1 # strip sign
  9065. andi.w &0x8000,%d2 # keep old sign
  9066. sub.l %d0,%d1 # add scale factor
  9067. addi.l &0x6000,%d1 # add bias
  9068. andi.w &0x7fff,%d1 # clear top bit
  9069. or.w %d2,%d1 # concat old sign, new exp
  9070. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  9071. mov.l (%sp)+,%d2 # restore d2
  9072. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  9073. bra.b fsgldiv_unfl_dis
  9074. #
  9075. # the divide operation MAY underflow:
  9076. #
  9077. fsgldiv_may_unfl:
  9078. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9079. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9080. fmov.l &0x0,%fpsr # clear FPSR
  9081. fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
  9082. fmov.l %fpsr,%d1 # save status
  9083. fmov.l &0x0,%fpcr # clear FPCR
  9084. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  9085. fabs.x %fp0,%fp1 # make a copy of result
  9086. fcmp.b %fp1,&0x1 # is |result| > 1.b?
  9087. fbgt.w fsgldiv_normal_exit # no; no underflow occurred
  9088. fblt.w fsgldiv_unfl # yes; underflow occurred
  9089. #
  9090. # we still don't know if underflow occurred. result is ~ equal to 1. but,
  9091. # we don't know if the result was an underflow that rounded up to a 1
  9092. # or a normalized number that rounded down to a 1. so, redo the entire
  9093. # operation using RZ as the rounding mode to see what the pre-rounded
  9094. # result is. this case should be relatively rare.
  9095. #
  9096. fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
  9097. clr.l %d1 # clear scratch register
  9098. ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
  9099. fmov.l %d1,%fpcr # set FPCR
  9100. fmov.l &0x0,%fpsr # clear FPSR
  9101. fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
  9102. fmov.l &0x0,%fpcr # clear FPCR
  9103. fabs.x %fp1 # make absolute value
  9104. fcmp.b %fp1,&0x1 # is |result| < 1.b?
  9105. fbge.w fsgldiv_normal_exit # no; no underflow occurred
  9106. bra.w fsgldiv_unfl # yes; underflow occurred
  9107. ############################################################################
  9108. #
  9109. # Divide: inputs are not both normalized; what are they?
  9110. #
  9111. fsgldiv_not_norm:
  9112. mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
  9113. jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
  9114. swbeg &48
  9115. tbl_fsgldiv_op:
  9116. short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
  9117. short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
  9118. short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
  9119. short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
  9120. short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
  9121. short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
  9122. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9123. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9124. short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
  9125. short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
  9126. short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
  9127. short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
  9128. short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
  9129. short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
  9130. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9131. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9132. short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
  9133. short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
  9134. short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
  9135. short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
  9136. short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
  9137. short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
  9138. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9139. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9140. short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
  9141. short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
  9142. short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
  9143. short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
  9144. short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
  9145. short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
  9146. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9147. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9148. short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
  9149. short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
  9150. short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
  9151. short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
  9152. short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
  9153. short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
  9154. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9155. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9156. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
  9157. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
  9158. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
  9159. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
  9160. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
  9161. short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
  9162. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9163. short tbl_fsgldiv_op - tbl_fsgldiv_op #
  9164. fsgldiv_res_qnan:
  9165. bra.l res_qnan
  9166. fsgldiv_res_snan:
  9167. bra.l res_snan
  9168. fsgldiv_res_operr:
  9169. bra.l res_operr
  9170. fsgldiv_inf_load:
  9171. bra.l fdiv_inf_load
  9172. fsgldiv_zero_load:
  9173. bra.l fdiv_zero_load
  9174. fsgldiv_inf_dst:
  9175. bra.l fdiv_inf_dst
  9176. #########################################################################
  9177. # XDEF **************************************************************** #
  9178. # fadd(): emulates the fadd instruction #
  9179. # fsadd(): emulates the fadd instruction #
  9180. # fdadd(): emulates the fdadd instruction #
  9181. # #
  9182. # XREF **************************************************************** #
  9183. # addsub_scaler2() - scale the operands so they won't take exc #
  9184. # ovf_res() - return default overflow result #
  9185. # unf_res() - return default underflow result #
  9186. # res_qnan() - set QNAN result #
  9187. # res_snan() - set SNAN result #
  9188. # res_operr() - set OPERR result #
  9189. # scale_to_zero_src() - set src operand exponent equal to zero #
  9190. # scale_to_zero_dst() - set dst operand exponent equal to zero #
  9191. # #
  9192. # INPUT *************************************************************** #
  9193. # a0 = pointer to extended precision source operand #
  9194. # a1 = pointer to extended precision destination operand #
  9195. # #
  9196. # OUTPUT ************************************************************** #
  9197. # fp0 = result #
  9198. # fp1 = EXOP (if exception occurred) #
  9199. # #
  9200. # ALGORITHM *********************************************************** #
  9201. # Handle NANs, infinities, and zeroes as special cases. Divide #
  9202. # norms into extended, single, and double precision. #
  9203. # Do addition after scaling exponents such that exception won't #
  9204. # occur. Then, check result exponent to see if exception would have #
  9205. # occurred. If so, return default result and maybe EXOP. Else, insert #
  9206. # the correct result exponent and return. Set FPSR bits as appropriate. #
  9207. # #
  9208. #########################################################################
  9209. global fsadd
  9210. fsadd:
  9211. andi.b &0x30,%d0 # clear rnd prec
  9212. ori.b &s_mode*0x10,%d0 # insert sgl prec
  9213. bra.b fadd
  9214. global fdadd
  9215. fdadd:
  9216. andi.b &0x30,%d0 # clear rnd prec
  9217. ori.b &d_mode*0x10,%d0 # insert dbl prec
  9218. global fadd
  9219. fadd:
  9220. mov.l %d0,L_SCR3(%a6) # store rnd info
  9221. clr.w %d1
  9222. mov.b DTAG(%a6),%d1
  9223. lsl.b &0x3,%d1
  9224. or.b STAG(%a6),%d1 # combine src tags
  9225. bne.w fadd_not_norm # optimize on non-norm input
  9226. #
  9227. # ADD: norms and denorms
  9228. #
  9229. fadd_norm:
  9230. bsr.l addsub_scaler2 # scale exponents
  9231. fadd_zero_entry:
  9232. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9233. fmov.l &0x0,%fpsr # clear FPSR
  9234. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9235. fadd.x FP_SCR0(%a6),%fp0 # execute add
  9236. fmov.l &0x0,%fpcr # clear FPCR
  9237. fmov.l %fpsr,%d1 # fetch INEX2,N,Z
  9238. or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
  9239. fbeq.w fadd_zero_exit # if result is zero, end now
  9240. mov.l %d2,-(%sp) # save d2
  9241. fmovm.x &0x01,-(%sp) # save result to stack
  9242. mov.w 2+L_SCR3(%a6),%d1
  9243. lsr.b &0x6,%d1
  9244. mov.w (%sp),%d2 # fetch new sign, exp
  9245. andi.l &0x7fff,%d2 # strip sign
  9246. sub.l %d0,%d2 # add scale factor
  9247. cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  9248. bge.b fadd_ovfl # yes
  9249. cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
  9250. blt.w fadd_unfl # yes
  9251. beq.w fadd_may_unfl # maybe; go find out
  9252. fadd_normal:
  9253. mov.w (%sp),%d1
  9254. andi.w &0x8000,%d1 # keep sign
  9255. or.w %d2,%d1 # concat sign,new exp
  9256. mov.w %d1,(%sp) # insert new exponent
  9257. fmovm.x (%sp)+,&0x80 # return result in fp0
  9258. mov.l (%sp)+,%d2 # restore d2
  9259. rts
  9260. fadd_zero_exit:
  9261. # fmov.s &0x00000000,%fp0 # return zero in fp0
  9262. rts
  9263. tbl_fadd_ovfl:
  9264. long 0x7fff # ext ovfl
  9265. long 0x407f # sgl ovfl
  9266. long 0x43ff # dbl ovfl
  9267. tbl_fadd_unfl:
  9268. long 0x0000 # ext unfl
  9269. long 0x3f81 # sgl unfl
  9270. long 0x3c01 # dbl unfl
  9271. fadd_ovfl:
  9272. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  9273. mov.b FPCR_ENABLE(%a6),%d1
  9274. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  9275. bne.b fadd_ovfl_ena # yes
  9276. add.l &0xc,%sp
  9277. fadd_ovfl_dis:
  9278. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  9279. sne %d1 # set sign param accordingly
  9280. mov.l L_SCR3(%a6),%d0 # pass prec:rnd
  9281. bsr.l ovf_res # calculate default result
  9282. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  9283. fmovm.x (%a0),&0x80 # return default result in fp0
  9284. mov.l (%sp)+,%d2 # restore d2
  9285. rts
  9286. fadd_ovfl_ena:
  9287. mov.b L_SCR3(%a6),%d1
  9288. andi.b &0xc0,%d1 # is precision extended?
  9289. bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
  9290. fadd_ovfl_ena_cont:
  9291. mov.w (%sp),%d1
  9292. andi.w &0x8000,%d1 # keep sign
  9293. subi.l &0x6000,%d2 # add extra bias
  9294. andi.w &0x7fff,%d2
  9295. or.w %d2,%d1 # concat sign,new exp
  9296. mov.w %d1,(%sp) # insert new exponent
  9297. fmovm.x (%sp)+,&0x40 # return EXOP in fp1
  9298. bra.b fadd_ovfl_dis
  9299. fadd_ovfl_ena_sd:
  9300. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9301. mov.l L_SCR3(%a6),%d1
  9302. andi.b &0x30,%d1 # keep rnd mode
  9303. fmov.l %d1,%fpcr # set FPCR
  9304. fadd.x FP_SCR0(%a6),%fp0 # execute add
  9305. fmov.l &0x0,%fpcr # clear FPCR
  9306. add.l &0xc,%sp
  9307. fmovm.x &0x01,-(%sp)
  9308. bra.b fadd_ovfl_ena_cont
  9309. fadd_unfl:
  9310. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  9311. add.l &0xc,%sp
  9312. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9313. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  9314. fmov.l &0x0,%fpsr # clear FPSR
  9315. fadd.x FP_SCR0(%a6),%fp0 # execute add
  9316. fmov.l &0x0,%fpcr # clear FPCR
  9317. fmov.l %fpsr,%d1 # save status
  9318. or.l %d1,USER_FPSR(%a6) # save INEX,N
  9319. mov.b FPCR_ENABLE(%a6),%d1
  9320. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  9321. bne.b fadd_unfl_ena # yes
  9322. fadd_unfl_dis:
  9323. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  9324. lea FP_SCR0(%a6),%a0 # pass: result addr
  9325. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  9326. bsr.l unf_res # calculate default result
  9327. or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
  9328. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  9329. mov.l (%sp)+,%d2 # restore d2
  9330. rts
  9331. fadd_unfl_ena:
  9332. fmovm.x FP_SCR1(%a6),&0x40 # load dst op
  9333. mov.l L_SCR3(%a6),%d1
  9334. andi.b &0xc0,%d1 # is precision extended?
  9335. bne.b fadd_unfl_ena_sd # no; sgl or dbl
  9336. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9337. fadd_unfl_ena_cont:
  9338. fmov.l &0x0,%fpsr # clear FPSR
  9339. fadd.x FP_SCR0(%a6),%fp1 # execute multiply
  9340. fmov.l &0x0,%fpcr # clear FPCR
  9341. fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
  9342. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  9343. mov.l %d1,%d2 # make a copy
  9344. andi.l &0x7fff,%d1 # strip sign
  9345. andi.w &0x8000,%d2 # keep old sign
  9346. sub.l %d0,%d1 # add scale factor
  9347. addi.l &0x6000,%d1 # add new bias
  9348. andi.w &0x7fff,%d1 # clear top bit
  9349. or.w %d2,%d1 # concat sign,new exp
  9350. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  9351. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  9352. bra.w fadd_unfl_dis
  9353. fadd_unfl_ena_sd:
  9354. mov.l L_SCR3(%a6),%d1
  9355. andi.b &0x30,%d1 # use only rnd mode
  9356. fmov.l %d1,%fpcr # set FPCR
  9357. bra.b fadd_unfl_ena_cont
  9358. #
  9359. # result is equal to the smallest normalized number in the selected precision
  9360. # if the precision is extended, this result could not have come from an
  9361. # underflow that rounded up.
  9362. #
  9363. fadd_may_unfl:
  9364. mov.l L_SCR3(%a6),%d1
  9365. andi.b &0xc0,%d1
  9366. beq.w fadd_normal # yes; no underflow occurred
  9367. mov.l 0x4(%sp),%d1 # extract hi(man)
  9368. cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
  9369. bne.w fadd_normal # no; no underflow occurred
  9370. tst.l 0x8(%sp) # is lo(man) = 0x0?
  9371. bne.w fadd_normal # no; no underflow occurred
  9372. btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  9373. beq.w fadd_normal # no; no underflow occurred
  9374. #
  9375. # ok, so now the result has a exponent equal to the smallest normalized
  9376. # exponent for the selected precision. also, the mantissa is equal to
  9377. # 0x8000000000000000 and this mantissa is the result of rounding non-zero
  9378. # g,r,s.
  9379. # now, we must determine whether the pre-rounded result was an underflow
  9380. # rounded "up" or a normalized number rounded "down".
  9381. # so, we do this be re-executing the add using RZ as the rounding mode and
  9382. # seeing if the new result is smaller or equal to the current result.
  9383. #
  9384. fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
  9385. mov.l L_SCR3(%a6),%d1
  9386. andi.b &0xc0,%d1 # keep rnd prec
  9387. ori.b &rz_mode*0x10,%d1 # insert rnd mode
  9388. fmov.l %d1,%fpcr # set FPCR
  9389. fmov.l &0x0,%fpsr # clear FPSR
  9390. fadd.x FP_SCR0(%a6),%fp1 # execute add
  9391. fmov.l &0x0,%fpcr # clear FPCR
  9392. fabs.x %fp0 # compare absolute values
  9393. fabs.x %fp1
  9394. fcmp.x %fp0,%fp1 # is first result > second?
  9395. fbgt.w fadd_unfl # yes; it's an underflow
  9396. bra.w fadd_normal # no; it's not an underflow
  9397. ##########################################################################
  9398. #
  9399. # Add: inputs are not both normalized; what are they?
  9400. #
  9401. fadd_not_norm:
  9402. mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
  9403. jmp (tbl_fadd_op.b,%pc,%d1.w*1)
  9404. swbeg &48
  9405. tbl_fadd_op:
  9406. short fadd_norm - tbl_fadd_op # NORM + NORM
  9407. short fadd_zero_src - tbl_fadd_op # NORM + ZERO
  9408. short fadd_inf_src - tbl_fadd_op # NORM + INF
  9409. short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
  9410. short fadd_norm - tbl_fadd_op # NORM + DENORM
  9411. short fadd_res_snan - tbl_fadd_op # NORM + SNAN
  9412. short tbl_fadd_op - tbl_fadd_op #
  9413. short tbl_fadd_op - tbl_fadd_op #
  9414. short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
  9415. short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
  9416. short fadd_inf_src - tbl_fadd_op # ZERO + INF
  9417. short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
  9418. short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
  9419. short fadd_res_snan - tbl_fadd_op # NORM + SNAN
  9420. short tbl_fadd_op - tbl_fadd_op #
  9421. short tbl_fadd_op - tbl_fadd_op #
  9422. short fadd_inf_dst - tbl_fadd_op # INF + NORM
  9423. short fadd_inf_dst - tbl_fadd_op # INF + ZERO
  9424. short fadd_inf_2 - tbl_fadd_op # INF + INF
  9425. short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
  9426. short fadd_inf_dst - tbl_fadd_op # INF + DENORM
  9427. short fadd_res_snan - tbl_fadd_op # NORM + SNAN
  9428. short tbl_fadd_op - tbl_fadd_op #
  9429. short tbl_fadd_op - tbl_fadd_op #
  9430. short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
  9431. short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
  9432. short fadd_res_qnan - tbl_fadd_op # QNAN + INF
  9433. short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
  9434. short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
  9435. short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
  9436. short tbl_fadd_op - tbl_fadd_op #
  9437. short tbl_fadd_op - tbl_fadd_op #
  9438. short fadd_norm - tbl_fadd_op # DENORM + NORM
  9439. short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
  9440. short fadd_inf_src - tbl_fadd_op # DENORM + INF
  9441. short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
  9442. short fadd_norm - tbl_fadd_op # DENORM + DENORM
  9443. short fadd_res_snan - tbl_fadd_op # NORM + SNAN
  9444. short tbl_fadd_op - tbl_fadd_op #
  9445. short tbl_fadd_op - tbl_fadd_op #
  9446. short fadd_res_snan - tbl_fadd_op # SNAN + NORM
  9447. short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
  9448. short fadd_res_snan - tbl_fadd_op # SNAN + INF
  9449. short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
  9450. short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
  9451. short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
  9452. short tbl_fadd_op - tbl_fadd_op #
  9453. short tbl_fadd_op - tbl_fadd_op #
  9454. fadd_res_qnan:
  9455. bra.l res_qnan
  9456. fadd_res_snan:
  9457. bra.l res_snan
  9458. #
  9459. # both operands are ZEROes
  9460. #
  9461. fadd_zero_2:
  9462. mov.b SRC_EX(%a0),%d0 # are the signs opposite
  9463. mov.b DST_EX(%a1),%d1
  9464. eor.b %d0,%d1
  9465. bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
  9466. # the signs are the same. so determine whether they are positive or negative
  9467. # and return the appropriately signed zero.
  9468. tst.b %d0 # are ZEROes positive or negative?
  9469. bmi.b fadd_zero_rm # negative
  9470. fmov.s &0x00000000,%fp0 # return +ZERO
  9471. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  9472. rts
  9473. #
  9474. # the ZEROes have opposite signs:
  9475. # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
  9476. # - -ZERO is returned in the case of RM.
  9477. #
  9478. fadd_zero_2_chk_rm:
  9479. mov.b 3+L_SCR3(%a6),%d1
  9480. andi.b &0x30,%d1 # extract rnd mode
  9481. cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
  9482. beq.b fadd_zero_rm # yes
  9483. fmov.s &0x00000000,%fp0 # return +ZERO
  9484. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  9485. rts
  9486. fadd_zero_rm:
  9487. fmov.s &0x80000000,%fp0 # return -ZERO
  9488. mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
  9489. rts
  9490. #
  9491. # one operand is a ZERO and the other is a DENORM or NORM. scale
  9492. # the DENORM or NORM and jump to the regular fadd routine.
  9493. #
  9494. fadd_zero_dst:
  9495. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  9496. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  9497. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  9498. bsr.l scale_to_zero_src # scale the operand
  9499. clr.w FP_SCR1_EX(%a6)
  9500. clr.l FP_SCR1_HI(%a6)
  9501. clr.l FP_SCR1_LO(%a6)
  9502. bra.w fadd_zero_entry # go execute fadd
  9503. fadd_zero_src:
  9504. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  9505. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  9506. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  9507. bsr.l scale_to_zero_dst # scale the operand
  9508. clr.w FP_SCR0_EX(%a6)
  9509. clr.l FP_SCR0_HI(%a6)
  9510. clr.l FP_SCR0_LO(%a6)
  9511. bra.w fadd_zero_entry # go execute fadd
  9512. #
  9513. # both operands are INFs. an OPERR will result if the INFs have
  9514. # different signs. else, an INF of the same sign is returned
  9515. #
  9516. fadd_inf_2:
  9517. mov.b SRC_EX(%a0),%d0 # exclusive or the signs
  9518. mov.b DST_EX(%a1),%d1
  9519. eor.b %d1,%d0
  9520. bmi.l res_operr # weed out (-INF)+(+INF)
  9521. # ok, so it's not an OPERR. but, we do have to remember to return the
  9522. # src INF since that's where the 881/882 gets the j-bit from...
  9523. #
  9524. # operands are INF and one of {ZERO, INF, DENORM, NORM}
  9525. #
  9526. fadd_inf_src:
  9527. fmovm.x SRC(%a0),&0x80 # return src INF
  9528. tst.b SRC_EX(%a0) # is INF positive?
  9529. bpl.b fadd_inf_done # yes; we're done
  9530. mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  9531. rts
  9532. #
  9533. # operands are INF and one of {ZERO, INF, DENORM, NORM}
  9534. #
  9535. fadd_inf_dst:
  9536. fmovm.x DST(%a1),&0x80 # return dst INF
  9537. tst.b DST_EX(%a1) # is INF positive?
  9538. bpl.b fadd_inf_done # yes; we're done
  9539. mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  9540. rts
  9541. fadd_inf_done:
  9542. mov.b &inf_bmask,FPSR_CC(%a6) # set INF
  9543. rts
  9544. #########################################################################
  9545. # XDEF **************************************************************** #
  9546. # fsub(): emulates the fsub instruction #
  9547. # fssub(): emulates the fssub instruction #
  9548. # fdsub(): emulates the fdsub instruction #
  9549. # #
  9550. # XREF **************************************************************** #
  9551. # addsub_scaler2() - scale the operands so they won't take exc #
  9552. # ovf_res() - return default overflow result #
  9553. # unf_res() - return default underflow result #
  9554. # res_qnan() - set QNAN result #
  9555. # res_snan() - set SNAN result #
  9556. # res_operr() - set OPERR result #
  9557. # scale_to_zero_src() - set src operand exponent equal to zero #
  9558. # scale_to_zero_dst() - set dst operand exponent equal to zero #
  9559. # #
  9560. # INPUT *************************************************************** #
  9561. # a0 = pointer to extended precision source operand #
  9562. # a1 = pointer to extended precision destination operand #
  9563. # #
  9564. # OUTPUT ************************************************************** #
  9565. # fp0 = result #
  9566. # fp1 = EXOP (if exception occurred) #
  9567. # #
  9568. # ALGORITHM *********************************************************** #
  9569. # Handle NANs, infinities, and zeroes as special cases. Divide #
  9570. # norms into extended, single, and double precision. #
  9571. # Do subtraction after scaling exponents such that exception won't#
  9572. # occur. Then, check result exponent to see if exception would have #
  9573. # occurred. If so, return default result and maybe EXOP. Else, insert #
  9574. # the correct result exponent and return. Set FPSR bits as appropriate. #
  9575. # #
  9576. #########################################################################
  9577. global fssub
  9578. fssub:
  9579. andi.b &0x30,%d0 # clear rnd prec
  9580. ori.b &s_mode*0x10,%d0 # insert sgl prec
  9581. bra.b fsub
  9582. global fdsub
  9583. fdsub:
  9584. andi.b &0x30,%d0 # clear rnd prec
  9585. ori.b &d_mode*0x10,%d0 # insert dbl prec
  9586. global fsub
  9587. fsub:
  9588. mov.l %d0,L_SCR3(%a6) # store rnd info
  9589. clr.w %d1
  9590. mov.b DTAG(%a6),%d1
  9591. lsl.b &0x3,%d1
  9592. or.b STAG(%a6),%d1 # combine src tags
  9593. bne.w fsub_not_norm # optimize on non-norm input
  9594. #
  9595. # SUB: norms and denorms
  9596. #
  9597. fsub_norm:
  9598. bsr.l addsub_scaler2 # scale exponents
  9599. fsub_zero_entry:
  9600. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9601. fmov.l &0x0,%fpsr # clear FPSR
  9602. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9603. fsub.x FP_SCR0(%a6),%fp0 # execute subtract
  9604. fmov.l &0x0,%fpcr # clear FPCR
  9605. fmov.l %fpsr,%d1 # fetch INEX2, N, Z
  9606. or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
  9607. fbeq.w fsub_zero_exit # if result zero, end now
  9608. mov.l %d2,-(%sp) # save d2
  9609. fmovm.x &0x01,-(%sp) # save result to stack
  9610. mov.w 2+L_SCR3(%a6),%d1
  9611. lsr.b &0x6,%d1
  9612. mov.w (%sp),%d2 # fetch new exponent
  9613. andi.l &0x7fff,%d2 # strip sign
  9614. sub.l %d0,%d2 # add scale factor
  9615. cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  9616. bge.b fsub_ovfl # yes
  9617. cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
  9618. blt.w fsub_unfl # yes
  9619. beq.w fsub_may_unfl # maybe; go find out
  9620. fsub_normal:
  9621. mov.w (%sp),%d1
  9622. andi.w &0x8000,%d1 # keep sign
  9623. or.w %d2,%d1 # insert new exponent
  9624. mov.w %d1,(%sp) # insert new exponent
  9625. fmovm.x (%sp)+,&0x80 # return result in fp0
  9626. mov.l (%sp)+,%d2 # restore d2
  9627. rts
  9628. fsub_zero_exit:
  9629. # fmov.s &0x00000000,%fp0 # return zero in fp0
  9630. rts
  9631. tbl_fsub_ovfl:
  9632. long 0x7fff # ext ovfl
  9633. long 0x407f # sgl ovfl
  9634. long 0x43ff # dbl ovfl
  9635. tbl_fsub_unfl:
  9636. long 0x0000 # ext unfl
  9637. long 0x3f81 # sgl unfl
  9638. long 0x3c01 # dbl unfl
  9639. fsub_ovfl:
  9640. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  9641. mov.b FPCR_ENABLE(%a6),%d1
  9642. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  9643. bne.b fsub_ovfl_ena # yes
  9644. add.l &0xc,%sp
  9645. fsub_ovfl_dis:
  9646. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  9647. sne %d1 # set sign param accordingly
  9648. mov.l L_SCR3(%a6),%d0 # pass prec:rnd
  9649. bsr.l ovf_res # calculate default result
  9650. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  9651. fmovm.x (%a0),&0x80 # return default result in fp0
  9652. mov.l (%sp)+,%d2 # restore d2
  9653. rts
  9654. fsub_ovfl_ena:
  9655. mov.b L_SCR3(%a6),%d1
  9656. andi.b &0xc0,%d1 # is precision extended?
  9657. bne.b fsub_ovfl_ena_sd # no
  9658. fsub_ovfl_ena_cont:
  9659. mov.w (%sp),%d1 # fetch {sgn,exp}
  9660. andi.w &0x8000,%d1 # keep sign
  9661. subi.l &0x6000,%d2 # subtract new bias
  9662. andi.w &0x7fff,%d2 # clear top bit
  9663. or.w %d2,%d1 # concat sign,exp
  9664. mov.w %d1,(%sp) # insert new exponent
  9665. fmovm.x (%sp)+,&0x40 # return EXOP in fp1
  9666. bra.b fsub_ovfl_dis
  9667. fsub_ovfl_ena_sd:
  9668. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9669. mov.l L_SCR3(%a6),%d1
  9670. andi.b &0x30,%d1 # clear rnd prec
  9671. fmov.l %d1,%fpcr # set FPCR
  9672. fsub.x FP_SCR0(%a6),%fp0 # execute subtract
  9673. fmov.l &0x0,%fpcr # clear FPCR
  9674. add.l &0xc,%sp
  9675. fmovm.x &0x01,-(%sp)
  9676. bra.b fsub_ovfl_ena_cont
  9677. fsub_unfl:
  9678. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  9679. add.l &0xc,%sp
  9680. fmovm.x FP_SCR1(%a6),&0x80 # load dst op
  9681. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  9682. fmov.l &0x0,%fpsr # clear FPSR
  9683. fsub.x FP_SCR0(%a6),%fp0 # execute subtract
  9684. fmov.l &0x0,%fpcr # clear FPCR
  9685. fmov.l %fpsr,%d1 # save status
  9686. or.l %d1,USER_FPSR(%a6)
  9687. mov.b FPCR_ENABLE(%a6),%d1
  9688. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  9689. bne.b fsub_unfl_ena # yes
  9690. fsub_unfl_dis:
  9691. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  9692. lea FP_SCR0(%a6),%a0 # pass: result addr
  9693. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  9694. bsr.l unf_res # calculate default result
  9695. or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
  9696. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  9697. mov.l (%sp)+,%d2 # restore d2
  9698. rts
  9699. fsub_unfl_ena:
  9700. fmovm.x FP_SCR1(%a6),&0x40
  9701. mov.l L_SCR3(%a6),%d1
  9702. andi.b &0xc0,%d1 # is precision extended?
  9703. bne.b fsub_unfl_ena_sd # no
  9704. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9705. fsub_unfl_ena_cont:
  9706. fmov.l &0x0,%fpsr # clear FPSR
  9707. fsub.x FP_SCR0(%a6),%fp1 # execute subtract
  9708. fmov.l &0x0,%fpcr # clear FPCR
  9709. fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
  9710. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  9711. mov.l %d1,%d2 # make a copy
  9712. andi.l &0x7fff,%d1 # strip sign
  9713. andi.w &0x8000,%d2 # keep old sign
  9714. sub.l %d0,%d1 # add scale factor
  9715. addi.l &0x6000,%d1 # subtract new bias
  9716. andi.w &0x7fff,%d1 # clear top bit
  9717. or.w %d2,%d1 # concat sgn,exp
  9718. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  9719. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  9720. bra.w fsub_unfl_dis
  9721. fsub_unfl_ena_sd:
  9722. mov.l L_SCR3(%a6),%d1
  9723. andi.b &0x30,%d1 # clear rnd prec
  9724. fmov.l %d1,%fpcr # set FPCR
  9725. bra.b fsub_unfl_ena_cont
  9726. #
  9727. # result is equal to the smallest normalized number in the selected precision
  9728. # if the precision is extended, this result could not have come from an
  9729. # underflow that rounded up.
  9730. #
  9731. fsub_may_unfl:
  9732. mov.l L_SCR3(%a6),%d1
  9733. andi.b &0xc0,%d1 # fetch rnd prec
  9734. beq.w fsub_normal # yes; no underflow occurred
  9735. mov.l 0x4(%sp),%d1
  9736. cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
  9737. bne.w fsub_normal # no; no underflow occurred
  9738. tst.l 0x8(%sp) # is lo(man) = 0x0?
  9739. bne.w fsub_normal # no; no underflow occurred
  9740. btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  9741. beq.w fsub_normal # no; no underflow occurred
  9742. #
  9743. # ok, so now the result has a exponent equal to the smallest normalized
  9744. # exponent for the selected precision. also, the mantissa is equal to
  9745. # 0x8000000000000000 and this mantissa is the result of rounding non-zero
  9746. # g,r,s.
  9747. # now, we must determine whether the pre-rounded result was an underflow
  9748. # rounded "up" or a normalized number rounded "down".
  9749. # so, we do this be re-executing the add using RZ as the rounding mode and
  9750. # seeing if the new result is smaller or equal to the current result.
  9751. #
  9752. fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
  9753. mov.l L_SCR3(%a6),%d1
  9754. andi.b &0xc0,%d1 # keep rnd prec
  9755. ori.b &rz_mode*0x10,%d1 # insert rnd mode
  9756. fmov.l %d1,%fpcr # set FPCR
  9757. fmov.l &0x0,%fpsr # clear FPSR
  9758. fsub.x FP_SCR0(%a6),%fp1 # execute subtract
  9759. fmov.l &0x0,%fpcr # clear FPCR
  9760. fabs.x %fp0 # compare absolute values
  9761. fabs.x %fp1
  9762. fcmp.x %fp0,%fp1 # is first result > second?
  9763. fbgt.w fsub_unfl # yes; it's an underflow
  9764. bra.w fsub_normal # no; it's not an underflow
  9765. ##########################################################################
  9766. #
  9767. # Sub: inputs are not both normalized; what are they?
  9768. #
  9769. fsub_not_norm:
  9770. mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
  9771. jmp (tbl_fsub_op.b,%pc,%d1.w*1)
  9772. swbeg &48
  9773. tbl_fsub_op:
  9774. short fsub_norm - tbl_fsub_op # NORM - NORM
  9775. short fsub_zero_src - tbl_fsub_op # NORM - ZERO
  9776. short fsub_inf_src - tbl_fsub_op # NORM - INF
  9777. short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
  9778. short fsub_norm - tbl_fsub_op # NORM - DENORM
  9779. short fsub_res_snan - tbl_fsub_op # NORM - SNAN
  9780. short tbl_fsub_op - tbl_fsub_op #
  9781. short tbl_fsub_op - tbl_fsub_op #
  9782. short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
  9783. short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
  9784. short fsub_inf_src - tbl_fsub_op # ZERO - INF
  9785. short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
  9786. short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
  9787. short fsub_res_snan - tbl_fsub_op # NORM - SNAN
  9788. short tbl_fsub_op - tbl_fsub_op #
  9789. short tbl_fsub_op - tbl_fsub_op #
  9790. short fsub_inf_dst - tbl_fsub_op # INF - NORM
  9791. short fsub_inf_dst - tbl_fsub_op # INF - ZERO
  9792. short fsub_inf_2 - tbl_fsub_op # INF - INF
  9793. short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
  9794. short fsub_inf_dst - tbl_fsub_op # INF - DENORM
  9795. short fsub_res_snan - tbl_fsub_op # NORM - SNAN
  9796. short tbl_fsub_op - tbl_fsub_op #
  9797. short tbl_fsub_op - tbl_fsub_op #
  9798. short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
  9799. short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
  9800. short fsub_res_qnan - tbl_fsub_op # QNAN - INF
  9801. short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
  9802. short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
  9803. short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
  9804. short tbl_fsub_op - tbl_fsub_op #
  9805. short tbl_fsub_op - tbl_fsub_op #
  9806. short fsub_norm - tbl_fsub_op # DENORM - NORM
  9807. short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
  9808. short fsub_inf_src - tbl_fsub_op # DENORM - INF
  9809. short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
  9810. short fsub_norm - tbl_fsub_op # DENORM - DENORM
  9811. short fsub_res_snan - tbl_fsub_op # NORM - SNAN
  9812. short tbl_fsub_op - tbl_fsub_op #
  9813. short tbl_fsub_op - tbl_fsub_op #
  9814. short fsub_res_snan - tbl_fsub_op # SNAN - NORM
  9815. short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
  9816. short fsub_res_snan - tbl_fsub_op # SNAN - INF
  9817. short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
  9818. short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
  9819. short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
  9820. short tbl_fsub_op - tbl_fsub_op #
  9821. short tbl_fsub_op - tbl_fsub_op #
  9822. fsub_res_qnan:
  9823. bra.l res_qnan
  9824. fsub_res_snan:
  9825. bra.l res_snan
  9826. #
  9827. # both operands are ZEROes
  9828. #
  9829. fsub_zero_2:
  9830. mov.b SRC_EX(%a0),%d0
  9831. mov.b DST_EX(%a1),%d1
  9832. eor.b %d1,%d0
  9833. bpl.b fsub_zero_2_chk_rm
  9834. # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
  9835. tst.b %d0 # is dst negative?
  9836. bmi.b fsub_zero_2_rm # yes
  9837. fmov.s &0x00000000,%fp0 # no; return +ZERO
  9838. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  9839. rts
  9840. #
  9841. # the ZEROes have the same signs:
  9842. # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
  9843. # - -ZERO is returned in the case of RM.
  9844. #
  9845. fsub_zero_2_chk_rm:
  9846. mov.b 3+L_SCR3(%a6),%d1
  9847. andi.b &0x30,%d1 # extract rnd mode
  9848. cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
  9849. beq.b fsub_zero_2_rm # yes
  9850. fmov.s &0x00000000,%fp0 # no; return +ZERO
  9851. mov.b &z_bmask,FPSR_CC(%a6) # set Z
  9852. rts
  9853. fsub_zero_2_rm:
  9854. fmov.s &0x80000000,%fp0 # return -ZERO
  9855. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
  9856. rts
  9857. #
  9858. # one operand is a ZERO and the other is a DENORM or a NORM.
  9859. # scale the DENORM or NORM and jump to the regular fsub routine.
  9860. #
  9861. fsub_zero_dst:
  9862. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  9863. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  9864. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  9865. bsr.l scale_to_zero_src # scale the operand
  9866. clr.w FP_SCR1_EX(%a6)
  9867. clr.l FP_SCR1_HI(%a6)
  9868. clr.l FP_SCR1_LO(%a6)
  9869. bra.w fsub_zero_entry # go execute fsub
  9870. fsub_zero_src:
  9871. mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
  9872. mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
  9873. mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
  9874. bsr.l scale_to_zero_dst # scale the operand
  9875. clr.w FP_SCR0_EX(%a6)
  9876. clr.l FP_SCR0_HI(%a6)
  9877. clr.l FP_SCR0_LO(%a6)
  9878. bra.w fsub_zero_entry # go execute fsub
  9879. #
  9880. # both operands are INFs. an OPERR will result if the INFs have the
  9881. # same signs. else,
  9882. #
  9883. fsub_inf_2:
  9884. mov.b SRC_EX(%a0),%d0 # exclusive or the signs
  9885. mov.b DST_EX(%a1),%d1
  9886. eor.b %d1,%d0
  9887. bpl.l res_operr # weed out (-INF)+(+INF)
  9888. # ok, so it's not an OPERR. but we do have to remember to return
  9889. # the src INF since that's where the 881/882 gets the j-bit.
  9890. fsub_inf_src:
  9891. fmovm.x SRC(%a0),&0x80 # return src INF
  9892. fneg.x %fp0 # invert sign
  9893. fbge.w fsub_inf_done # sign is now positive
  9894. mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  9895. rts
  9896. fsub_inf_dst:
  9897. fmovm.x DST(%a1),&0x80 # return dst INF
  9898. tst.b DST_EX(%a1) # is INF negative?
  9899. bpl.b fsub_inf_done # no
  9900. mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  9901. rts
  9902. fsub_inf_done:
  9903. mov.b &inf_bmask,FPSR_CC(%a6) # set INF
  9904. rts
  9905. #########################################################################
  9906. # XDEF **************************************************************** #
  9907. # fsqrt(): emulates the fsqrt instruction #
  9908. # fssqrt(): emulates the fssqrt instruction #
  9909. # fdsqrt(): emulates the fdsqrt instruction #
  9910. # #
  9911. # XREF **************************************************************** #
  9912. # scale_sqrt() - scale the source operand #
  9913. # unf_res() - return default underflow result #
  9914. # ovf_res() - return default overflow result #
  9915. # res_qnan_1op() - return QNAN result #
  9916. # res_snan_1op() - return SNAN result #
  9917. # #
  9918. # INPUT *************************************************************** #
  9919. # a0 = pointer to extended precision source operand #
  9920. # d0 rnd prec,mode #
  9921. # #
  9922. # OUTPUT ************************************************************** #
  9923. # fp0 = result #
  9924. # fp1 = EXOP (if exception occurred) #
  9925. # #
  9926. # ALGORITHM *********************************************************** #
  9927. # Handle NANs, infinities, and zeroes as special cases. Divide #
  9928. # norms/denorms into ext/sgl/dbl precision. #
  9929. # For norms/denorms, scale the exponents such that a sqrt #
  9930. # instruction won't cause an exception. Use the regular fsqrt to #
  9931. # compute a result. Check if the regular operands would have taken #
  9932. # an exception. If so, return the default overflow/underflow result #
  9933. # and return the EXOP if exceptions are enabled. Else, scale the #
  9934. # result operand to the proper exponent. #
  9935. # #
  9936. #########################################################################
  9937. global fssqrt
  9938. fssqrt:
  9939. andi.b &0x30,%d0 # clear rnd prec
  9940. ori.b &s_mode*0x10,%d0 # insert sgl precision
  9941. bra.b fsqrt
  9942. global fdsqrt
  9943. fdsqrt:
  9944. andi.b &0x30,%d0 # clear rnd prec
  9945. ori.b &d_mode*0x10,%d0 # insert dbl precision
  9946. global fsqrt
  9947. fsqrt:
  9948. mov.l %d0,L_SCR3(%a6) # store rnd info
  9949. clr.w %d1
  9950. mov.b STAG(%a6),%d1
  9951. bne.w fsqrt_not_norm # optimize on non-norm input
  9952. #
  9953. # SQUARE ROOT: norms and denorms ONLY!
  9954. #
  9955. fsqrt_norm:
  9956. tst.b SRC_EX(%a0) # is operand negative?
  9957. bmi.l res_operr # yes
  9958. andi.b &0xc0,%d0 # is precision extended?
  9959. bne.b fsqrt_not_ext # no; go handle sgl or dbl
  9960. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  9961. fmov.l &0x0,%fpsr # clear FPSR
  9962. fsqrt.x (%a0),%fp0 # execute square root
  9963. fmov.l %fpsr,%d1
  9964. or.l %d1,USER_FPSR(%a6) # set N,INEX
  9965. rts
  9966. fsqrt_denorm:
  9967. tst.b SRC_EX(%a0) # is operand negative?
  9968. bmi.l res_operr # yes
  9969. andi.b &0xc0,%d0 # is precision extended?
  9970. bne.b fsqrt_not_ext # no; go handle sgl or dbl
  9971. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  9972. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  9973. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  9974. bsr.l scale_sqrt # calculate scale factor
  9975. bra.w fsqrt_sd_normal
  9976. #
  9977. # operand is either single or double
  9978. #
  9979. fsqrt_not_ext:
  9980. cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
  9981. bne.w fsqrt_dbl
  9982. #
  9983. # operand is to be rounded to single precision
  9984. #
  9985. fsqrt_sgl:
  9986. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  9987. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  9988. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  9989. bsr.l scale_sqrt # calculate scale factor
  9990. cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
  9991. beq.w fsqrt_sd_may_unfl
  9992. bgt.w fsqrt_sd_unfl # yes; go handle underflow
  9993. cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
  9994. beq.w fsqrt_sd_may_ovfl # maybe; go check
  9995. blt.w fsqrt_sd_ovfl # yes; go handle overflow
  9996. #
  9997. # operand will NOT overflow or underflow when moved in to the fp reg file
  9998. #
  9999. fsqrt_sd_normal:
  10000. fmov.l &0x0,%fpsr # clear FPSR
  10001. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  10002. fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
  10003. fmov.l %fpsr,%d1 # save FPSR
  10004. fmov.l &0x0,%fpcr # clear FPCR
  10005. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  10006. fsqrt_sd_normal_exit:
  10007. mov.l %d2,-(%sp) # save d2
  10008. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  10009. mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
  10010. mov.l %d1,%d2 # make a copy
  10011. andi.l &0x7fff,%d1 # strip sign
  10012. sub.l %d0,%d1 # add scale factor
  10013. andi.w &0x8000,%d2 # keep old sign
  10014. or.w %d1,%d2 # concat old sign,new exp
  10015. mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
  10016. mov.l (%sp)+,%d2 # restore d2
  10017. fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
  10018. rts
  10019. #
  10020. # operand is to be rounded to double precision
  10021. #
  10022. fsqrt_dbl:
  10023. mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
  10024. mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
  10025. mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
  10026. bsr.l scale_sqrt # calculate scale factor
  10027. cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
  10028. beq.w fsqrt_sd_may_unfl
  10029. bgt.b fsqrt_sd_unfl # yes; go handle underflow
  10030. cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
  10031. beq.w fsqrt_sd_may_ovfl # maybe; go check
  10032. blt.w fsqrt_sd_ovfl # yes; go handle overflow
  10033. bra.w fsqrt_sd_normal # no; ho handle normalized op
  10034. # we're on the line here and the distinguising characteristic is whether
  10035. # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
  10036. # elsewise fall through to underflow.
  10037. fsqrt_sd_may_unfl:
  10038. btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
  10039. bne.w fsqrt_sd_normal # yes, so no underflow
  10040. #
  10041. # operand WILL underflow when moved in to the fp register file
  10042. #
  10043. fsqrt_sd_unfl:
  10044. bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  10045. fmov.l &rz_mode*0x10,%fpcr # set FPCR
  10046. fmov.l &0x0,%fpsr # clear FPSR
  10047. fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
  10048. fmov.l %fpsr,%d1 # save status
  10049. fmov.l &0x0,%fpcr # clear FPCR
  10050. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  10051. # if underflow or inexact is enabled, go calculate EXOP first.
  10052. mov.b FPCR_ENABLE(%a6),%d1
  10053. andi.b &0x0b,%d1 # is UNFL or INEX enabled?
  10054. bne.b fsqrt_sd_unfl_ena # yes
  10055. fsqrt_sd_unfl_dis:
  10056. fmovm.x &0x80,FP_SCR0(%a6) # store out result
  10057. lea FP_SCR0(%a6),%a0 # pass: result addr
  10058. mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
  10059. bsr.l unf_res # calculate default result
  10060. or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
  10061. fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
  10062. rts
  10063. #
  10064. # operand will underflow AND underflow is enabled.
  10065. # therefore, we must return the result rounded to extended precision.
  10066. #
  10067. fsqrt_sd_unfl_ena:
  10068. mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  10069. mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  10070. mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
  10071. mov.l %d2,-(%sp) # save d2
  10072. mov.l %d1,%d2 # make a copy
  10073. andi.l &0x7fff,%d1 # strip sign
  10074. andi.w &0x8000,%d2 # keep old sign
  10075. sub.l %d0,%d1 # subtract scale factor
  10076. addi.l &0x6000,%d1 # add new bias
  10077. andi.w &0x7fff,%d1
  10078. or.w %d2,%d1 # concat new sign,new exp
  10079. mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
  10080. fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
  10081. mov.l (%sp)+,%d2 # restore d2
  10082. bra.b fsqrt_sd_unfl_dis
  10083. #
  10084. # operand WILL overflow.
  10085. #
  10086. fsqrt_sd_ovfl:
  10087. fmov.l &0x0,%fpsr # clear FPSR
  10088. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  10089. fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
  10090. fmov.l &0x0,%fpcr # clear FPCR
  10091. fmov.l %fpsr,%d1 # save FPSR
  10092. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  10093. fsqrt_sd_ovfl_tst:
  10094. or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  10095. mov.b FPCR_ENABLE(%a6),%d1
  10096. andi.b &0x13,%d1 # is OVFL or INEX enabled?
  10097. bne.b fsqrt_sd_ovfl_ena # yes
  10098. #
  10099. # OVFL is not enabled; therefore, we must create the default result by
  10100. # calling ovf_res().
  10101. #
  10102. fsqrt_sd_ovfl_dis:
  10103. btst &neg_bit,FPSR_CC(%a6) # is result negative?
  10104. sne %d1 # set sign param accordingly
  10105. mov.l L_SCR3(%a6),%d0 # pass: prec,mode
  10106. bsr.l ovf_res # calculate default result
  10107. or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
  10108. fmovm.x (%a0),&0x80 # return default result in fp0
  10109. rts
  10110. #
  10111. # OVFL is enabled.
  10112. # the INEX2 bit has already been updated by the round to the correct precision.
  10113. # now, round to extended(and don't alter the FPSR).
  10114. #
  10115. fsqrt_sd_ovfl_ena:
  10116. mov.l %d2,-(%sp) # save d2
  10117. mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
  10118. mov.l %d1,%d2 # make a copy
  10119. andi.l &0x7fff,%d1 # strip sign
  10120. andi.w &0x8000,%d2 # keep old sign
  10121. sub.l %d0,%d1 # add scale factor
  10122. subi.l &0x6000,%d1 # subtract bias
  10123. andi.w &0x7fff,%d1
  10124. or.w %d2,%d1 # concat sign,exp
  10125. mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
  10126. fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
  10127. mov.l (%sp)+,%d2 # restore d2
  10128. bra.b fsqrt_sd_ovfl_dis
  10129. #
  10130. # the move in MAY underflow. so...
  10131. #
  10132. fsqrt_sd_may_ovfl:
  10133. btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
  10134. bne.w fsqrt_sd_ovfl # yes, so overflow
  10135. fmov.l &0x0,%fpsr # clear FPSR
  10136. fmov.l L_SCR3(%a6),%fpcr # set FPCR
  10137. fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
  10138. fmov.l %fpsr,%d1 # save status
  10139. fmov.l &0x0,%fpcr # clear FPCR
  10140. or.l %d1,USER_FPSR(%a6) # save INEX2,N
  10141. fmov.x %fp0,%fp1 # make a copy of result
  10142. fcmp.b %fp1,&0x1 # is |result| >= 1.b?
  10143. fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
  10144. # no, it didn't overflow; we have correct result
  10145. bra.w fsqrt_sd_normal_exit
  10146. ##########################################################################
  10147. #
  10148. # input is not normalized; what is it?
  10149. #
  10150. fsqrt_not_norm:
  10151. cmpi.b %d1,&DENORM # weed out DENORM
  10152. beq.w fsqrt_denorm
  10153. cmpi.b %d1,&ZERO # weed out ZERO
  10154. beq.b fsqrt_zero
  10155. cmpi.b %d1,&INF # weed out INF
  10156. beq.b fsqrt_inf
  10157. cmpi.b %d1,&SNAN # weed out SNAN
  10158. beq.l res_snan_1op
  10159. bra.l res_qnan_1op
  10160. #
  10161. # fsqrt(+0) = +0
  10162. # fsqrt(-0) = -0
  10163. # fsqrt(+INF) = +INF
  10164. # fsqrt(-INF) = OPERR
  10165. #
  10166. fsqrt_zero:
  10167. tst.b SRC_EX(%a0) # is ZERO positive or negative?
  10168. bmi.b fsqrt_zero_m # negative
  10169. fsqrt_zero_p:
  10170. fmov.s &0x00000000,%fp0 # return +ZERO
  10171. mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
  10172. rts
  10173. fsqrt_zero_m:
  10174. fmov.s &0x80000000,%fp0 # return -ZERO
  10175. mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  10176. rts
  10177. fsqrt_inf:
  10178. tst.b SRC_EX(%a0) # is INF positive or negative?
  10179. bmi.l res_operr # negative
  10180. fsqrt_inf_p:
  10181. fmovm.x SRC(%a0),&0x80 # return +INF in fp0
  10182. mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
  10183. rts
  10184. #########################################################################
  10185. # XDEF **************************************************************** #
  10186. # fetch_dreg(): fetch register according to index in d1 #
  10187. # #
  10188. # XREF **************************************************************** #
  10189. # None #
  10190. # #
  10191. # INPUT *************************************************************** #
  10192. # d1 = index of register to fetch from #
  10193. # #
  10194. # OUTPUT ************************************************************** #
  10195. # d0 = value of register fetched #
  10196. # #
  10197. # ALGORITHM *********************************************************** #
  10198. # According to the index value in d1 which can range from zero #
  10199. # to fifteen, load the corresponding register file value (where #
  10200. # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
  10201. # stack. The rest should still be in their original places. #
  10202. # #
  10203. #########################################################################
  10204. # this routine leaves d1 intact for subsequent store_dreg calls.
  10205. global fetch_dreg
  10206. fetch_dreg:
  10207. mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
  10208. jmp (tbl_fdreg.b,%pc,%d0.w*1)
  10209. tbl_fdreg:
  10210. short fdreg0 - tbl_fdreg
  10211. short fdreg1 - tbl_fdreg
  10212. short fdreg2 - tbl_fdreg
  10213. short fdreg3 - tbl_fdreg
  10214. short fdreg4 - tbl_fdreg
  10215. short fdreg5 - tbl_fdreg
  10216. short fdreg6 - tbl_fdreg
  10217. short fdreg7 - tbl_fdreg
  10218. short fdreg8 - tbl_fdreg
  10219. short fdreg9 - tbl_fdreg
  10220. short fdrega - tbl_fdreg
  10221. short fdregb - tbl_fdreg
  10222. short fdregc - tbl_fdreg
  10223. short fdregd - tbl_fdreg
  10224. short fdrege - tbl_fdreg
  10225. short fdregf - tbl_fdreg
  10226. fdreg0:
  10227. mov.l EXC_DREGS+0x0(%a6),%d0
  10228. rts
  10229. fdreg1:
  10230. mov.l EXC_DREGS+0x4(%a6),%d0
  10231. rts
  10232. fdreg2:
  10233. mov.l %d2,%d0
  10234. rts
  10235. fdreg3:
  10236. mov.l %d3,%d0
  10237. rts
  10238. fdreg4:
  10239. mov.l %d4,%d0
  10240. rts
  10241. fdreg5:
  10242. mov.l %d5,%d0
  10243. rts
  10244. fdreg6:
  10245. mov.l %d6,%d0
  10246. rts
  10247. fdreg7:
  10248. mov.l %d7,%d0
  10249. rts
  10250. fdreg8:
  10251. mov.l EXC_DREGS+0x8(%a6),%d0
  10252. rts
  10253. fdreg9:
  10254. mov.l EXC_DREGS+0xc(%a6),%d0
  10255. rts
  10256. fdrega:
  10257. mov.l %a2,%d0
  10258. rts
  10259. fdregb:
  10260. mov.l %a3,%d0
  10261. rts
  10262. fdregc:
  10263. mov.l %a4,%d0
  10264. rts
  10265. fdregd:
  10266. mov.l %a5,%d0
  10267. rts
  10268. fdrege:
  10269. mov.l (%a6),%d0
  10270. rts
  10271. fdregf:
  10272. mov.l EXC_A7(%a6),%d0
  10273. rts
  10274. #########################################################################
  10275. # XDEF **************************************************************** #
  10276. # store_dreg_l(): store longword to data register specified by d1 #
  10277. # #
  10278. # XREF **************************************************************** #
  10279. # None #
  10280. # #
  10281. # INPUT *************************************************************** #
  10282. # d0 = longowrd value to store #
  10283. # d1 = index of register to fetch from #
  10284. # #
  10285. # OUTPUT ************************************************************** #
  10286. # (data register is updated) #
  10287. # #
  10288. # ALGORITHM *********************************************************** #
  10289. # According to the index value in d1, store the longword value #
  10290. # in d0 to the corresponding data register. D0/D1 are on the stack #
  10291. # while the rest are in their initial places. #
  10292. # #
  10293. #########################################################################
  10294. global store_dreg_l
  10295. store_dreg_l:
  10296. mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
  10297. jmp (tbl_sdregl.b,%pc,%d1.w*1)
  10298. tbl_sdregl:
  10299. short sdregl0 - tbl_sdregl
  10300. short sdregl1 - tbl_sdregl
  10301. short sdregl2 - tbl_sdregl
  10302. short sdregl3 - tbl_sdregl
  10303. short sdregl4 - tbl_sdregl
  10304. short sdregl5 - tbl_sdregl
  10305. short sdregl6 - tbl_sdregl
  10306. short sdregl7 - tbl_sdregl
  10307. sdregl0:
  10308. mov.l %d0,EXC_DREGS+0x0(%a6)
  10309. rts
  10310. sdregl1:
  10311. mov.l %d0,EXC_DREGS+0x4(%a6)
  10312. rts
  10313. sdregl2:
  10314. mov.l %d0,%d2
  10315. rts
  10316. sdregl3:
  10317. mov.l %d0,%d3
  10318. rts
  10319. sdregl4:
  10320. mov.l %d0,%d4
  10321. rts
  10322. sdregl5:
  10323. mov.l %d0,%d5
  10324. rts
  10325. sdregl6:
  10326. mov.l %d0,%d6
  10327. rts
  10328. sdregl7:
  10329. mov.l %d0,%d7
  10330. rts
  10331. #########################################################################
  10332. # XDEF **************************************************************** #
  10333. # store_dreg_w(): store word to data register specified by d1 #
  10334. # #
  10335. # XREF **************************************************************** #
  10336. # None #
  10337. # #
  10338. # INPUT *************************************************************** #
  10339. # d0 = word value to store #
  10340. # d1 = index of register to fetch from #
  10341. # #
  10342. # OUTPUT ************************************************************** #
  10343. # (data register is updated) #
  10344. # #
  10345. # ALGORITHM *********************************************************** #
  10346. # According to the index value in d1, store the word value #
  10347. # in d0 to the corresponding data register. D0/D1 are on the stack #
  10348. # while the rest are in their initial places. #
  10349. # #
  10350. #########################################################################
  10351. global store_dreg_w
  10352. store_dreg_w:
  10353. mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
  10354. jmp (tbl_sdregw.b,%pc,%d1.w*1)
  10355. tbl_sdregw:
  10356. short sdregw0 - tbl_sdregw
  10357. short sdregw1 - tbl_sdregw
  10358. short sdregw2 - tbl_sdregw
  10359. short sdregw3 - tbl_sdregw
  10360. short sdregw4 - tbl_sdregw
  10361. short sdregw5 - tbl_sdregw
  10362. short sdregw6 - tbl_sdregw
  10363. short sdregw7 - tbl_sdregw
  10364. sdregw0:
  10365. mov.w %d0,2+EXC_DREGS+0x0(%a6)
  10366. rts
  10367. sdregw1:
  10368. mov.w %d0,2+EXC_DREGS+0x4(%a6)
  10369. rts
  10370. sdregw2:
  10371. mov.w %d0,%d2
  10372. rts
  10373. sdregw3:
  10374. mov.w %d0,%d3
  10375. rts
  10376. sdregw4:
  10377. mov.w %d0,%d4
  10378. rts
  10379. sdregw5:
  10380. mov.w %d0,%d5
  10381. rts
  10382. sdregw6:
  10383. mov.w %d0,%d6
  10384. rts
  10385. sdregw7:
  10386. mov.w %d0,%d7
  10387. rts
  10388. #########################################################################
  10389. # XDEF **************************************************************** #
  10390. # store_dreg_b(): store byte to data register specified by d1 #
  10391. # #
  10392. # XREF **************************************************************** #
  10393. # None #
  10394. # #
  10395. # INPUT *************************************************************** #
  10396. # d0 = byte value to store #
  10397. # d1 = index of register to fetch from #
  10398. # #
  10399. # OUTPUT ************************************************************** #
  10400. # (data register is updated) #
  10401. # #
  10402. # ALGORITHM *********************************************************** #
  10403. # According to the index value in d1, store the byte value #
  10404. # in d0 to the corresponding data register. D0/D1 are on the stack #
  10405. # while the rest are in their initial places. #
  10406. # #
  10407. #########################################################################
  10408. global store_dreg_b
  10409. store_dreg_b:
  10410. mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
  10411. jmp (tbl_sdregb.b,%pc,%d1.w*1)
  10412. tbl_sdregb:
  10413. short sdregb0 - tbl_sdregb
  10414. short sdregb1 - tbl_sdregb
  10415. short sdregb2 - tbl_sdregb
  10416. short sdregb3 - tbl_sdregb
  10417. short sdregb4 - tbl_sdregb
  10418. short sdregb5 - tbl_sdregb
  10419. short sdregb6 - tbl_sdregb
  10420. short sdregb7 - tbl_sdregb
  10421. sdregb0:
  10422. mov.b %d0,3+EXC_DREGS+0x0(%a6)
  10423. rts
  10424. sdregb1:
  10425. mov.b %d0,3+EXC_DREGS+0x4(%a6)
  10426. rts
  10427. sdregb2:
  10428. mov.b %d0,%d2
  10429. rts
  10430. sdregb3:
  10431. mov.b %d0,%d3
  10432. rts
  10433. sdregb4:
  10434. mov.b %d0,%d4
  10435. rts
  10436. sdregb5:
  10437. mov.b %d0,%d5
  10438. rts
  10439. sdregb6:
  10440. mov.b %d0,%d6
  10441. rts
  10442. sdregb7:
  10443. mov.b %d0,%d7
  10444. rts
  10445. #########################################################################
  10446. # XDEF **************************************************************** #
  10447. # inc_areg(): increment an address register by the value in d0 #
  10448. # #
  10449. # XREF **************************************************************** #
  10450. # None #
  10451. # #
  10452. # INPUT *************************************************************** #
  10453. # d0 = amount to increment by #
  10454. # d1 = index of address register to increment #
  10455. # #
  10456. # OUTPUT ************************************************************** #
  10457. # (address register is updated) #
  10458. # #
  10459. # ALGORITHM *********************************************************** #
  10460. # Typically used for an instruction w/ a post-increment <ea>, #
  10461. # this routine adds the increment value in d0 to the address register #
  10462. # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
  10463. # in their original places. #
  10464. # For a7, if the increment amount is one, then we have to #
  10465. # increment by two. For any a7 update, set the mia7_flag so that if #
  10466. # an access error exception occurs later in emulation, this address #
  10467. # register update can be undone. #
  10468. # #
  10469. #########################################################################
  10470. global inc_areg
  10471. inc_areg:
  10472. mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
  10473. jmp (tbl_iareg.b,%pc,%d1.w*1)
  10474. tbl_iareg:
  10475. short iareg0 - tbl_iareg
  10476. short iareg1 - tbl_iareg
  10477. short iareg2 - tbl_iareg
  10478. short iareg3 - tbl_iareg
  10479. short iareg4 - tbl_iareg
  10480. short iareg5 - tbl_iareg
  10481. short iareg6 - tbl_iareg
  10482. short iareg7 - tbl_iareg
  10483. iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
  10484. rts
  10485. iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
  10486. rts
  10487. iareg2: add.l %d0,%a2
  10488. rts
  10489. iareg3: add.l %d0,%a3
  10490. rts
  10491. iareg4: add.l %d0,%a4
  10492. rts
  10493. iareg5: add.l %d0,%a5
  10494. rts
  10495. iareg6: add.l %d0,(%a6)
  10496. rts
  10497. iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
  10498. cmpi.b %d0,&0x1
  10499. beq.b iareg7b
  10500. add.l %d0,EXC_A7(%a6)
  10501. rts
  10502. iareg7b:
  10503. addq.l &0x2,EXC_A7(%a6)
  10504. rts
  10505. #########################################################################
  10506. # XDEF **************************************************************** #
  10507. # dec_areg(): decrement an address register by the value in d0 #
  10508. # #
  10509. # XREF **************************************************************** #
  10510. # None #
  10511. # #
  10512. # INPUT *************************************************************** #
  10513. # d0 = amount to decrement by #
  10514. # d1 = index of address register to decrement #
  10515. # #
  10516. # OUTPUT ************************************************************** #
  10517. # (address register is updated) #
  10518. # #
  10519. # ALGORITHM *********************************************************** #
  10520. # Typically used for an instruction w/ a pre-decrement <ea>, #
  10521. # this routine adds the decrement value in d0 to the address register #
  10522. # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
  10523. # in their original places. #
  10524. # For a7, if the decrement amount is one, then we have to #
  10525. # decrement by two. For any a7 update, set the mda7_flag so that if #
  10526. # an access error exception occurs later in emulation, this address #
  10527. # register update can be undone. #
  10528. # #
  10529. #########################################################################
  10530. global dec_areg
  10531. dec_areg:
  10532. mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
  10533. jmp (tbl_dareg.b,%pc,%d1.w*1)
  10534. tbl_dareg:
  10535. short dareg0 - tbl_dareg
  10536. short dareg1 - tbl_dareg
  10537. short dareg2 - tbl_dareg
  10538. short dareg3 - tbl_dareg
  10539. short dareg4 - tbl_dareg
  10540. short dareg5 - tbl_dareg
  10541. short dareg6 - tbl_dareg
  10542. short dareg7 - tbl_dareg
  10543. dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
  10544. rts
  10545. dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
  10546. rts
  10547. dareg2: sub.l %d0,%a2
  10548. rts
  10549. dareg3: sub.l %d0,%a3
  10550. rts
  10551. dareg4: sub.l %d0,%a4
  10552. rts
  10553. dareg5: sub.l %d0,%a5
  10554. rts
  10555. dareg6: sub.l %d0,(%a6)
  10556. rts
  10557. dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
  10558. cmpi.b %d0,&0x1
  10559. beq.b dareg7b
  10560. sub.l %d0,EXC_A7(%a6)
  10561. rts
  10562. dareg7b:
  10563. subq.l &0x2,EXC_A7(%a6)
  10564. rts
  10565. ##############################################################################
  10566. #########################################################################
  10567. # XDEF **************************************************************** #
  10568. # load_fpn1(): load FP register value into FP_SRC(a6). #
  10569. # #
  10570. # XREF **************************************************************** #
  10571. # None #
  10572. # #
  10573. # INPUT *************************************************************** #
  10574. # d0 = index of FP register to load #
  10575. # #
  10576. # OUTPUT ************************************************************** #
  10577. # FP_SRC(a6) = value loaded from FP register file #
  10578. # #
  10579. # ALGORITHM *********************************************************** #
  10580. # Using the index in d0, load FP_SRC(a6) with a number from the #
  10581. # FP register file. #
  10582. # #
  10583. #########################################################################
  10584. global load_fpn1
  10585. load_fpn1:
  10586. mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
  10587. jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
  10588. tbl_load_fpn1:
  10589. short load_fpn1_0 - tbl_load_fpn1
  10590. short load_fpn1_1 - tbl_load_fpn1
  10591. short load_fpn1_2 - tbl_load_fpn1
  10592. short load_fpn1_3 - tbl_load_fpn1
  10593. short load_fpn1_4 - tbl_load_fpn1
  10594. short load_fpn1_5 - tbl_load_fpn1
  10595. short load_fpn1_6 - tbl_load_fpn1
  10596. short load_fpn1_7 - tbl_load_fpn1
  10597. load_fpn1_0:
  10598. mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
  10599. mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
  10600. mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
  10601. lea FP_SRC(%a6), %a0
  10602. rts
  10603. load_fpn1_1:
  10604. mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
  10605. mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
  10606. mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
  10607. lea FP_SRC(%a6), %a0
  10608. rts
  10609. load_fpn1_2:
  10610. fmovm.x &0x20, FP_SRC(%a6)
  10611. lea FP_SRC(%a6), %a0
  10612. rts
  10613. load_fpn1_3:
  10614. fmovm.x &0x10, FP_SRC(%a6)
  10615. lea FP_SRC(%a6), %a0
  10616. rts
  10617. load_fpn1_4:
  10618. fmovm.x &0x08, FP_SRC(%a6)
  10619. lea FP_SRC(%a6), %a0
  10620. rts
  10621. load_fpn1_5:
  10622. fmovm.x &0x04, FP_SRC(%a6)
  10623. lea FP_SRC(%a6), %a0
  10624. rts
  10625. load_fpn1_6:
  10626. fmovm.x &0x02, FP_SRC(%a6)
  10627. lea FP_SRC(%a6), %a0
  10628. rts
  10629. load_fpn1_7:
  10630. fmovm.x &0x01, FP_SRC(%a6)
  10631. lea FP_SRC(%a6), %a0
  10632. rts
  10633. #############################################################################
  10634. #########################################################################
  10635. # XDEF **************************************************************** #
  10636. # load_fpn2(): load FP register value into FP_DST(a6). #
  10637. # #
  10638. # XREF **************************************************************** #
  10639. # None #
  10640. # #
  10641. # INPUT *************************************************************** #
  10642. # d0 = index of FP register to load #
  10643. # #
  10644. # OUTPUT ************************************************************** #
  10645. # FP_DST(a6) = value loaded from FP register file #
  10646. # #
  10647. # ALGORITHM *********************************************************** #
  10648. # Using the index in d0, load FP_DST(a6) with a number from the #
  10649. # FP register file. #
  10650. # #
  10651. #########################################################################
  10652. global load_fpn2
  10653. load_fpn2:
  10654. mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
  10655. jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
  10656. tbl_load_fpn2:
  10657. short load_fpn2_0 - tbl_load_fpn2
  10658. short load_fpn2_1 - tbl_load_fpn2
  10659. short load_fpn2_2 - tbl_load_fpn2
  10660. short load_fpn2_3 - tbl_load_fpn2
  10661. short load_fpn2_4 - tbl_load_fpn2
  10662. short load_fpn2_5 - tbl_load_fpn2
  10663. short load_fpn2_6 - tbl_load_fpn2
  10664. short load_fpn2_7 - tbl_load_fpn2
  10665. load_fpn2_0:
  10666. mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
  10667. mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
  10668. mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
  10669. lea FP_DST(%a6), %a0
  10670. rts
  10671. load_fpn2_1:
  10672. mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
  10673. mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
  10674. mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
  10675. lea FP_DST(%a6), %a0
  10676. rts
  10677. load_fpn2_2:
  10678. fmovm.x &0x20, FP_DST(%a6)
  10679. lea FP_DST(%a6), %a0
  10680. rts
  10681. load_fpn2_3:
  10682. fmovm.x &0x10, FP_DST(%a6)
  10683. lea FP_DST(%a6), %a0
  10684. rts
  10685. load_fpn2_4:
  10686. fmovm.x &0x08, FP_DST(%a6)
  10687. lea FP_DST(%a6), %a0
  10688. rts
  10689. load_fpn2_5:
  10690. fmovm.x &0x04, FP_DST(%a6)
  10691. lea FP_DST(%a6), %a0
  10692. rts
  10693. load_fpn2_6:
  10694. fmovm.x &0x02, FP_DST(%a6)
  10695. lea FP_DST(%a6), %a0
  10696. rts
  10697. load_fpn2_7:
  10698. fmovm.x &0x01, FP_DST(%a6)
  10699. lea FP_DST(%a6), %a0
  10700. rts
  10701. #############################################################################
  10702. #########################################################################
  10703. # XDEF **************************************************************** #
  10704. # store_fpreg(): store an fp value to the fpreg designated d0. #
  10705. # #
  10706. # XREF **************************************************************** #
  10707. # None #
  10708. # #
  10709. # INPUT *************************************************************** #
  10710. # fp0 = extended precision value to store #
  10711. # d0 = index of floating-point register #
  10712. # #
  10713. # OUTPUT ************************************************************** #
  10714. # None #
  10715. # #
  10716. # ALGORITHM *********************************************************** #
  10717. # Store the value in fp0 to the FP register designated by the #
  10718. # value in d0. The FP number can be DENORM or SNAN so we have to be #
  10719. # careful that we don't take an exception here. #
  10720. # #
  10721. #########################################################################
  10722. global store_fpreg
  10723. store_fpreg:
  10724. mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
  10725. jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
  10726. tbl_store_fpreg:
  10727. short store_fpreg_0 - tbl_store_fpreg
  10728. short store_fpreg_1 - tbl_store_fpreg
  10729. short store_fpreg_2 - tbl_store_fpreg
  10730. short store_fpreg_3 - tbl_store_fpreg
  10731. short store_fpreg_4 - tbl_store_fpreg
  10732. short store_fpreg_5 - tbl_store_fpreg
  10733. short store_fpreg_6 - tbl_store_fpreg
  10734. short store_fpreg_7 - tbl_store_fpreg
  10735. store_fpreg_0:
  10736. fmovm.x &0x80, EXC_FP0(%a6)
  10737. rts
  10738. store_fpreg_1:
  10739. fmovm.x &0x80, EXC_FP1(%a6)
  10740. rts
  10741. store_fpreg_2:
  10742. fmovm.x &0x01, -(%sp)
  10743. fmovm.x (%sp)+, &0x20
  10744. rts
  10745. store_fpreg_3:
  10746. fmovm.x &0x01, -(%sp)
  10747. fmovm.x (%sp)+, &0x10
  10748. rts
  10749. store_fpreg_4:
  10750. fmovm.x &0x01, -(%sp)
  10751. fmovm.x (%sp)+, &0x08
  10752. rts
  10753. store_fpreg_5:
  10754. fmovm.x &0x01, -(%sp)
  10755. fmovm.x (%sp)+, &0x04
  10756. rts
  10757. store_fpreg_6:
  10758. fmovm.x &0x01, -(%sp)
  10759. fmovm.x (%sp)+, &0x02
  10760. rts
  10761. store_fpreg_7:
  10762. fmovm.x &0x01, -(%sp)
  10763. fmovm.x (%sp)+, &0x01
  10764. rts
  10765. #########################################################################
  10766. # XDEF **************************************************************** #
  10767. # get_packed(): fetch a packed operand from memory and then #
  10768. # convert it to a floating-point binary number. #
  10769. # #
  10770. # XREF **************************************************************** #
  10771. # _dcalc_ea() - calculate the correct <ea> #
  10772. # _mem_read() - fetch the packed operand from memory #
  10773. # facc_in_x() - the fetch failed so jump to special exit code #
  10774. # decbin() - convert packed to binary extended precision #
  10775. # #
  10776. # INPUT *************************************************************** #
  10777. # None #
  10778. # #
  10779. # OUTPUT ************************************************************** #
  10780. # If no failure on _mem_read(): #
  10781. # FP_SRC(a6) = packed operand now as a binary FP number #
  10782. # #
  10783. # ALGORITHM *********************************************************** #
  10784. # Get the correct <ea> whihc is the value on the exception stack #
  10785. # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
  10786. # Then, fetch the operand from memory. If the fetch fails, exit #
  10787. # through facc_in_x(). #
  10788. # If the packed operand is a ZERO,NAN, or INF, convert it to #
  10789. # its binary representation here. Else, call decbin() which will #
  10790. # convert the packed value to an extended precision binary value. #
  10791. # #
  10792. #########################################################################
  10793. # the stacked <ea> for packed is correct except for -(An).
  10794. # the base reg must be updated for both -(An) and (An)+.
  10795. global get_packed
  10796. get_packed:
  10797. mov.l &0xc,%d0 # packed is 12 bytes
  10798. bsr.l _dcalc_ea # fetch <ea>; correct An
  10799. lea FP_SRC(%a6),%a1 # pass: ptr to super dst
  10800. mov.l &0xc,%d0 # pass: 12 bytes
  10801. bsr.l _dmem_read # read packed operand
  10802. tst.l %d1 # did dfetch fail?
  10803. bne.l facc_in_x # yes
  10804. # The packed operand is an INF or a NAN if the exponent field is all ones.
  10805. bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
  10806. cmpi.w %d0,&0x7fff # INF or NAN?
  10807. bne.b gp_try_zero # no
  10808. rts # operand is an INF or NAN
  10809. # The packed operand is a zero if the mantissa is all zero, else it's
  10810. # a normal packed op.
  10811. gp_try_zero:
  10812. mov.b 3+FP_SRC(%a6),%d0 # get byte 4
  10813. andi.b &0x0f,%d0 # clear all but last nybble
  10814. bne.b gp_not_spec # not a zero
  10815. tst.l FP_SRC_HI(%a6) # is lw 2 zero?
  10816. bne.b gp_not_spec # not a zero
  10817. tst.l FP_SRC_LO(%a6) # is lw 3 zero?
  10818. bne.b gp_not_spec # not a zero
  10819. rts # operand is a ZERO
  10820. gp_not_spec:
  10821. lea FP_SRC(%a6),%a0 # pass: ptr to packed op
  10822. bsr.l decbin # convert to extended
  10823. fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
  10824. rts
  10825. #########################################################################
  10826. # decbin(): Converts normalized packed bcd value pointed to by register #
  10827. # a0 to extended-precision value in fp0. #
  10828. # #
  10829. # INPUT *************************************************************** #
  10830. # a0 = pointer to normalized packed bcd value #
  10831. # #
  10832. # OUTPUT ************************************************************** #
  10833. # fp0 = exact fp representation of the packed bcd value. #
  10834. # #
  10835. # ALGORITHM *********************************************************** #
  10836. # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
  10837. # and NaN operands are dispatched without entering this routine) #
  10838. # value in 68881/882 format at location (a0). #
  10839. # #
  10840. # A1. Convert the bcd exponent to binary by successive adds and #
  10841. # muls. Set the sign according to SE. Subtract 16 to compensate #
  10842. # for the mantissa which is to be interpreted as 17 integer #
  10843. # digits, rather than 1 integer and 16 fraction digits. #
  10844. # Note: this operation can never overflow. #
  10845. # #
  10846. # A2. Convert the bcd mantissa to binary by successive #
  10847. # adds and muls in FP0. Set the sign according to SM. #
  10848. # The mantissa digits will be converted with the decimal point #
  10849. # assumed following the least-significant digit. #
  10850. # Note: this operation can never overflow. #
  10851. # #
  10852. # A3. Count the number of leading/trailing zeros in the #
  10853. # bcd string. If SE is positive, count the leading zeros; #
  10854. # if negative, count the trailing zeros. Set the adjusted #
  10855. # exponent equal to the exponent from A1 and the zero count #
  10856. # added if SM = 1 and subtracted if SM = 0. Scale the #
  10857. # mantissa the equivalent of forcing in the bcd value: #
  10858. # #
  10859. # SM = 0 a non-zero digit in the integer position #
  10860. # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
  10861. # #
  10862. # this will insure that any value, regardless of its #
  10863. # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
  10864. # consistently. #
  10865. # #
  10866. # A4. Calculate the factor 10^exp in FP1 using a table of #
  10867. # 10^(2^n) values. To reduce the error in forming factors #
  10868. # greater than 10^27, a directed rounding scheme is used with #
  10869. # tables rounded to RN, RM, and RP, according to the table #
  10870. # in the comments of the pwrten section. #
  10871. # #
  10872. # A5. Form the final binary number by scaling the mantissa by #
  10873. # the exponent factor. This is done by multiplying the #
  10874. # mantissa in FP0 by the factor in FP1 if the adjusted #
  10875. # exponent sign is positive, and dividing FP0 by FP1 if #
  10876. # it is negative. #
  10877. # #
  10878. # Clean up and return. Check if the final mul or div was inexact. #
  10879. # If so, set INEX1 in USER_FPSR. #
  10880. # #
  10881. #########################################################################
  10882. #
  10883. # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
  10884. # to nearest, minus, and plus, respectively. The tables include
  10885. # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
  10886. # is required until the power is greater than 27, however, all
  10887. # tables include the first 5 for ease of indexing.
  10888. #
  10889. RTABLE:
  10890. byte 0,0,0,0
  10891. byte 2,3,2,3
  10892. byte 2,3,3,2
  10893. byte 3,2,2,3
  10894. set FNIBS,7
  10895. set FSTRT,0
  10896. set ESTRT,4
  10897. set EDIGITS,2
  10898. global decbin
  10899. decbin:
  10900. mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
  10901. mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
  10902. mov.l 0x8(%a0),FP_SCR0_LO(%a6)
  10903. lea FP_SCR0(%a6),%a0
  10904. movm.l &0x3c00,-(%sp) # save d2-d5
  10905. fmovm.x &0x1,-(%sp) # save fp1
  10906. #
  10907. # Calculate exponent:
  10908. # 1. Copy bcd value in memory for use as a working copy.
  10909. # 2. Calculate absolute value of exponent in d1 by mul and add.
  10910. # 3. Correct for exponent sign.
  10911. # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
  10912. # (i.e., all digits assumed left of the decimal point.)
  10913. #
  10914. # Register usage:
  10915. #
  10916. # calc_e:
  10917. # (*) d0: temp digit storage
  10918. # (*) d1: accumulator for binary exponent
  10919. # (*) d2: digit count
  10920. # (*) d3: offset pointer
  10921. # ( ) d4: first word of bcd
  10922. # ( ) a0: pointer to working bcd value
  10923. # ( ) a6: pointer to original bcd value
  10924. # (*) FP_SCR1: working copy of original bcd value
  10925. # (*) L_SCR1: copy of original exponent word
  10926. #
  10927. calc_e:
  10928. mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
  10929. mov.l &ESTRT,%d3 # counter to pick up digits
  10930. mov.l (%a0),%d4 # get first word of bcd
  10931. clr.l %d1 # zero d1 for accumulator
  10932. e_gd:
  10933. mulu.l &0xa,%d1 # mul partial product by one digit place
  10934. bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
  10935. add.l %d0,%d1 # d1 = d1 + d0
  10936. addq.b &4,%d3 # advance d3 to the next digit
  10937. dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
  10938. btst &30,%d4 # get SE
  10939. beq.b e_pos # don't negate if pos
  10940. neg.l %d1 # negate before subtracting
  10941. e_pos:
  10942. sub.l &16,%d1 # sub to compensate for shift of mant
  10943. bge.b e_save # if still pos, do not neg
  10944. neg.l %d1 # now negative, make pos and set SE
  10945. or.l &0x40000000,%d4 # set SE in d4,
  10946. or.l &0x40000000,(%a0) # and in working bcd
  10947. e_save:
  10948. mov.l %d1,-(%sp) # save exp on stack
  10949. #
  10950. #
  10951. # Calculate mantissa:
  10952. # 1. Calculate absolute value of mantissa in fp0 by mul and add.
  10953. # 2. Correct for mantissa sign.
  10954. # (i.e., all digits assumed left of the decimal point.)
  10955. #
  10956. # Register usage:
  10957. #
  10958. # calc_m:
  10959. # (*) d0: temp digit storage
  10960. # (*) d1: lword counter
  10961. # (*) d2: digit count
  10962. # (*) d3: offset pointer
  10963. # ( ) d4: words 2 and 3 of bcd
  10964. # ( ) a0: pointer to working bcd value
  10965. # ( ) a6: pointer to original bcd value
  10966. # (*) fp0: mantissa accumulator
  10967. # ( ) FP_SCR1: working copy of original bcd value
  10968. # ( ) L_SCR1: copy of original exponent word
  10969. #
  10970. calc_m:
  10971. mov.l &1,%d1 # word counter, init to 1
  10972. fmov.s &0x00000000,%fp0 # accumulator
  10973. #
  10974. #
  10975. # Since the packed number has a long word between the first & second parts,
  10976. # get the integer digit then skip down & get the rest of the
  10977. # mantissa. We will unroll the loop once.
  10978. #
  10979. bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
  10980. fadd.b %d0,%fp0 # add digit to sum in fp0
  10981. #
  10982. #
  10983. # Get the rest of the mantissa.
  10984. #
  10985. loadlw:
  10986. mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
  10987. mov.l &FSTRT,%d3 # counter to pick up digits
  10988. mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
  10989. md2b:
  10990. fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
  10991. bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
  10992. fadd.b %d0,%fp0 # fp0 = fp0 + digit
  10993. #
  10994. #
  10995. # If all the digits (8) in that long word have been converted (d2=0),
  10996. # then inc d1 (=2) to point to the next long word and reset d3 to 0
  10997. # to initialize the digit offset, and set d2 to 7 for the digit count;
  10998. # else continue with this long word.
  10999. #
  11000. addq.b &4,%d3 # advance d3 to the next digit
  11001. dbf.w %d2,md2b # check for last digit in this lw
  11002. nextlw:
  11003. addq.l &1,%d1 # inc lw pointer in mantissa
  11004. cmp.l %d1,&2 # test for last lw
  11005. ble.b loadlw # if not, get last one
  11006. #
  11007. # Check the sign of the mant and make the value in fp0 the same sign.
  11008. #
  11009. m_sign:
  11010. btst &31,(%a0) # test sign of the mantissa
  11011. beq.b ap_st_z # if clear, go to append/strip zeros
  11012. fneg.x %fp0 # if set, negate fp0
  11013. #
  11014. # Append/strip zeros:
  11015. #
  11016. # For adjusted exponents which have an absolute value greater than 27*,
  11017. # this routine calculates the amount needed to normalize the mantissa
  11018. # for the adjusted exponent. That number is subtracted from the exp
  11019. # if the exp was positive, and added if it was negative. The purpose
  11020. # of this is to reduce the value of the exponent and the possibility
  11021. # of error in calculation of pwrten.
  11022. #
  11023. # 1. Branch on the sign of the adjusted exponent.
  11024. # 2p.(positive exp)
  11025. # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
  11026. # 3. Add one for each zero encountered until a non-zero digit.
  11027. # 4. Subtract the count from the exp.
  11028. # 5. Check if the exp has crossed zero in #3 above; make the exp abs
  11029. # and set SE.
  11030. # 6. Multiply the mantissa by 10**count.
  11031. # 2n.(negative exp)
  11032. # 2. Check the digits in lwords 3 and 2 in decending order.
  11033. # 3. Add one for each zero encountered until a non-zero digit.
  11034. # 4. Add the count to the exp.
  11035. # 5. Check if the exp has crossed zero in #3 above; clear SE.
  11036. # 6. Divide the mantissa by 10**count.
  11037. #
  11038. # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
  11039. # any adjustment due to append/strip zeros will drive the resultane
  11040. # exponent towards zero. Since all pwrten constants with a power
  11041. # of 27 or less are exact, there is no need to use this routine to
  11042. # attempt to lessen the resultant exponent.
  11043. #
  11044. # Register usage:
  11045. #
  11046. # ap_st_z:
  11047. # (*) d0: temp digit storage
  11048. # (*) d1: zero count
  11049. # (*) d2: digit count
  11050. # (*) d3: offset pointer
  11051. # ( ) d4: first word of bcd
  11052. # (*) d5: lword counter
  11053. # ( ) a0: pointer to working bcd value
  11054. # ( ) FP_SCR1: working copy of original bcd value
  11055. # ( ) L_SCR1: copy of original exponent word
  11056. #
  11057. #
  11058. # First check the absolute value of the exponent to see if this
  11059. # routine is necessary. If so, then check the sign of the exponent
  11060. # and do append (+) or strip (-) zeros accordingly.
  11061. # This section handles a positive adjusted exponent.
  11062. #
  11063. ap_st_z:
  11064. mov.l (%sp),%d1 # load expA for range test
  11065. cmp.l %d1,&27 # test is with 27
  11066. ble.w pwrten # if abs(expA) <28, skip ap/st zeros
  11067. btst &30,(%a0) # check sign of exp
  11068. bne.b ap_st_n # if neg, go to neg side
  11069. clr.l %d1 # zero count reg
  11070. mov.l (%a0),%d4 # load lword 1 to d4
  11071. bfextu %d4{&28:&4},%d0 # get M16 in d0
  11072. bne.b ap_p_fx # if M16 is non-zero, go fix exp
  11073. addq.l &1,%d1 # inc zero count
  11074. mov.l &1,%d5 # init lword counter
  11075. mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
  11076. bne.b ap_p_cl # if lw 2 is zero, skip it
  11077. addq.l &8,%d1 # and inc count by 8
  11078. addq.l &1,%d5 # inc lword counter
  11079. mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
  11080. ap_p_cl:
  11081. clr.l %d3 # init offset reg
  11082. mov.l &7,%d2 # init digit counter
  11083. ap_p_gd:
  11084. bfextu %d4{%d3:&4},%d0 # get digit
  11085. bne.b ap_p_fx # if non-zero, go to fix exp
  11086. addq.l &4,%d3 # point to next digit
  11087. addq.l &1,%d1 # inc digit counter
  11088. dbf.w %d2,ap_p_gd # get next digit
  11089. ap_p_fx:
  11090. mov.l %d1,%d0 # copy counter to d2
  11091. mov.l (%sp),%d1 # get adjusted exp from memory
  11092. sub.l %d0,%d1 # subtract count from exp
  11093. bge.b ap_p_fm # if still pos, go to pwrten
  11094. neg.l %d1 # now its neg; get abs
  11095. mov.l (%a0),%d4 # load lword 1 to d4
  11096. or.l &0x40000000,%d4 # and set SE in d4
  11097. or.l &0x40000000,(%a0) # and in memory
  11098. #
  11099. # Calculate the mantissa multiplier to compensate for the striping of
  11100. # zeros from the mantissa.
  11101. #
  11102. ap_p_fm:
  11103. lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
  11104. clr.l %d3 # init table index
  11105. fmov.s &0x3f800000,%fp1 # init fp1 to 1
  11106. mov.l &3,%d2 # init d2 to count bits in counter
  11107. ap_p_el:
  11108. asr.l &1,%d0 # shift lsb into carry
  11109. bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
  11110. fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
  11111. ap_p_en:
  11112. add.l &12,%d3 # inc d3 to next rtable entry
  11113. tst.l %d0 # check if d0 is zero
  11114. bne.b ap_p_el # if not, get next bit
  11115. fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
  11116. bra.b pwrten # go calc pwrten
  11117. #
  11118. # This section handles a negative adjusted exponent.
  11119. #
  11120. ap_st_n:
  11121. clr.l %d1 # clr counter
  11122. mov.l &2,%d5 # set up d5 to point to lword 3
  11123. mov.l (%a0,%d5.L*4),%d4 # get lword 3
  11124. bne.b ap_n_cl # if not zero, check digits
  11125. sub.l &1,%d5 # dec d5 to point to lword 2
  11126. addq.l &8,%d1 # inc counter by 8
  11127. mov.l (%a0,%d5.L*4),%d4 # get lword 2
  11128. ap_n_cl:
  11129. mov.l &28,%d3 # point to last digit
  11130. mov.l &7,%d2 # init digit counter
  11131. ap_n_gd:
  11132. bfextu %d4{%d3:&4},%d0 # get digit
  11133. bne.b ap_n_fx # if non-zero, go to exp fix
  11134. subq.l &4,%d3 # point to previous digit
  11135. addq.l &1,%d1 # inc digit counter
  11136. dbf.w %d2,ap_n_gd # get next digit
  11137. ap_n_fx:
  11138. mov.l %d1,%d0 # copy counter to d0
  11139. mov.l (%sp),%d1 # get adjusted exp from memory
  11140. sub.l %d0,%d1 # subtract count from exp
  11141. bgt.b ap_n_fm # if still pos, go fix mantissa
  11142. neg.l %d1 # take abs of exp and clr SE
  11143. mov.l (%a0),%d4 # load lword 1 to d4
  11144. and.l &0xbfffffff,%d4 # and clr SE in d4
  11145. and.l &0xbfffffff,(%a0) # and in memory
  11146. #
  11147. # Calculate the mantissa multiplier to compensate for the appending of
  11148. # zeros to the mantissa.
  11149. #
  11150. ap_n_fm:
  11151. lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
  11152. clr.l %d3 # init table index
  11153. fmov.s &0x3f800000,%fp1 # init fp1 to 1
  11154. mov.l &3,%d2 # init d2 to count bits in counter
  11155. ap_n_el:
  11156. asr.l &1,%d0 # shift lsb into carry
  11157. bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
  11158. fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
  11159. ap_n_en:
  11160. add.l &12,%d3 # inc d3 to next rtable entry
  11161. tst.l %d0 # check if d0 is zero
  11162. bne.b ap_n_el # if not, get next bit
  11163. fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
  11164. #
  11165. #
  11166. # Calculate power-of-ten factor from adjusted and shifted exponent.
  11167. #
  11168. # Register usage:
  11169. #
  11170. # pwrten:
  11171. # (*) d0: temp
  11172. # ( ) d1: exponent
  11173. # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
  11174. # (*) d3: FPCR work copy
  11175. # ( ) d4: first word of bcd
  11176. # (*) a1: RTABLE pointer
  11177. # calc_p:
  11178. # (*) d0: temp
  11179. # ( ) d1: exponent
  11180. # (*) d3: PWRTxx table index
  11181. # ( ) a0: pointer to working copy of bcd
  11182. # (*) a1: PWRTxx pointer
  11183. # (*) fp1: power-of-ten accumulator
  11184. #
  11185. # Pwrten calculates the exponent factor in the selected rounding mode
  11186. # according to the following table:
  11187. #
  11188. # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
  11189. #
  11190. # ANY ANY RN RN
  11191. #
  11192. # + + RP RP
  11193. # - + RP RM
  11194. # + - RP RM
  11195. # - - RP RP
  11196. #
  11197. # + + RM RM
  11198. # - + RM RP
  11199. # + - RM RP
  11200. # - - RM RM
  11201. #
  11202. # + + RZ RM
  11203. # - + RZ RM
  11204. # + - RZ RP
  11205. # - - RZ RP
  11206. #
  11207. #
  11208. pwrten:
  11209. mov.l USER_FPCR(%a6),%d3 # get user's FPCR
  11210. bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
  11211. mov.l (%a0),%d4 # reload 1st bcd word to d4
  11212. asl.l &2,%d2 # format d2 to be
  11213. bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
  11214. add.l %d0,%d2 # in d2 as index into RTABLE
  11215. lea.l RTABLE(%pc),%a1 # load rtable base
  11216. mov.b (%a1,%d2),%d0 # load new rounding bits from table
  11217. clr.l %d3 # clear d3 to force no exc and extended
  11218. bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
  11219. fmov.l %d3,%fpcr # write new FPCR
  11220. asr.l &1,%d0 # write correct PTENxx table
  11221. bcc.b not_rp # to a1
  11222. lea.l PTENRP(%pc),%a1 # it is RP
  11223. bra.b calc_p # go to init section
  11224. not_rp:
  11225. asr.l &1,%d0 # keep checking
  11226. bcc.b not_rm
  11227. lea.l PTENRM(%pc),%a1 # it is RM
  11228. bra.b calc_p # go to init section
  11229. not_rm:
  11230. lea.l PTENRN(%pc),%a1 # it is RN
  11231. calc_p:
  11232. mov.l %d1,%d0 # copy exp to d0;use d0
  11233. bpl.b no_neg # if exp is negative,
  11234. neg.l %d0 # invert it
  11235. or.l &0x40000000,(%a0) # and set SE bit
  11236. no_neg:
  11237. clr.l %d3 # table index
  11238. fmov.s &0x3f800000,%fp1 # init fp1 to 1
  11239. e_loop:
  11240. asr.l &1,%d0 # shift next bit into carry
  11241. bcc.b e_next # if zero, skip the mul
  11242. fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
  11243. e_next:
  11244. add.l &12,%d3 # inc d3 to next rtable entry
  11245. tst.l %d0 # check if d0 is zero
  11246. bne.b e_loop # not zero, continue shifting
  11247. #
  11248. #
  11249. # Check the sign of the adjusted exp and make the value in fp0 the
  11250. # same sign. If the exp was pos then multiply fp1*fp0;
  11251. # else divide fp0/fp1.
  11252. #
  11253. # Register Usage:
  11254. # norm:
  11255. # ( ) a0: pointer to working bcd value
  11256. # (*) fp0: mantissa accumulator
  11257. # ( ) fp1: scaling factor - 10**(abs(exp))
  11258. #
  11259. pnorm:
  11260. btst &30,(%a0) # test the sign of the exponent
  11261. beq.b mul # if clear, go to multiply
  11262. div:
  11263. fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
  11264. bra.b end_dec
  11265. mul:
  11266. fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
  11267. #
  11268. #
  11269. # Clean up and return with result in fp0.
  11270. #
  11271. # If the final mul/div in decbin incurred an inex exception,
  11272. # it will be inex2, but will be reported as inex1 by get_op.
  11273. #
  11274. end_dec:
  11275. fmov.l %fpsr,%d0 # get status register
  11276. bclr &inex2_bit+8,%d0 # test for inex2 and clear it
  11277. beq.b no_exc # skip this if no exc
  11278. ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
  11279. no_exc:
  11280. add.l &0x4,%sp # clear 1 lw param
  11281. fmovm.x (%sp)+,&0x40 # restore fp1
  11282. movm.l (%sp)+,&0x3c # restore d2-d5
  11283. fmov.l &0x0,%fpcr
  11284. fmov.l &0x0,%fpsr
  11285. rts
  11286. #########################################################################
  11287. # bindec(): Converts an input in extended precision format to bcd format#
  11288. # #
  11289. # INPUT *************************************************************** #
  11290. # a0 = pointer to the input extended precision value in memory. #
  11291. # the input may be either normalized, unnormalized, or #
  11292. # denormalized. #
  11293. # d0 = contains the k-factor sign-extended to 32-bits. #
  11294. # #
  11295. # OUTPUT ************************************************************** #
  11296. # FP_SCR0(a6) = bcd format result on the stack. #
  11297. # #
  11298. # ALGORITHM *********************************************************** #
  11299. # #
  11300. # A1. Set RM and size ext; Set SIGMA = sign of input. #
  11301. # The k-factor is saved for use in d7. Clear the #
  11302. # BINDEC_FLG for separating normalized/denormalized #
  11303. # input. If input is unnormalized or denormalized, #
  11304. # normalize it. #
  11305. # #
  11306. # A2. Set X = abs(input). #
  11307. # #
  11308. # A3. Compute ILOG. #
  11309. # ILOG is the log base 10 of the input value. It is #
  11310. # approximated by adding e + 0.f when the original #
  11311. # value is viewed as 2^^e * 1.f in extended precision. #
  11312. # This value is stored in d6. #
  11313. # #
  11314. # A4. Clr INEX bit. #
  11315. # The operation in A3 above may have set INEX2. #
  11316. # #
  11317. # A5. Set ICTR = 0; #
  11318. # ICTR is a flag used in A13. It must be set before the #
  11319. # loop entry A6. #
  11320. # #
  11321. # A6. Calculate LEN. #
  11322. # LEN is the number of digits to be displayed. The #
  11323. # k-factor can dictate either the total number of digits, #
  11324. # if it is a positive number, or the number of digits #
  11325. # after the decimal point which are to be included as #
  11326. # significant. See the 68882 manual for examples. #
  11327. # If LEN is computed to be greater than 17, set OPERR in #
  11328. # USER_FPSR. LEN is stored in d4. #
  11329. # #
  11330. # A7. Calculate SCALE. #
  11331. # SCALE is equal to 10^ISCALE, where ISCALE is the number #
  11332. # of decimal places needed to insure LEN integer digits #
  11333. # in the output before conversion to bcd. LAMBDA is the #
  11334. # sign of ISCALE, used in A9. Fp1 contains #
  11335. # 10^^(abs(ISCALE)) using a rounding mode which is a #
  11336. # function of the original rounding mode and the signs #
  11337. # of ISCALE and X. A table is given in the code. #
  11338. # #
  11339. # A8. Clr INEX; Force RZ. #
  11340. # The operation in A3 above may have set INEX2. #
  11341. # RZ mode is forced for the scaling operation to insure #
  11342. # only one rounding error. The grs bits are collected in #
  11343. # the INEX flag for use in A10. #
  11344. # #
  11345. # A9. Scale X -> Y. #
  11346. # The mantissa is scaled to the desired number of #
  11347. # significant digits. The excess digits are collected #
  11348. # in INEX2. #
  11349. # #
  11350. # A10. Or in INEX. #
  11351. # If INEX is set, round error occurred. This is #
  11352. # compensated for by 'or-ing' in the INEX2 flag to #
  11353. # the lsb of Y. #
  11354. # #
  11355. # A11. Restore original FPCR; set size ext. #
  11356. # Perform FINT operation in the user's rounding mode. #
  11357. # Keep the size to extended. #
  11358. # #
  11359. # A12. Calculate YINT = FINT(Y) according to user's rounding #
  11360. # mode. The FPSP routine sintd0 is used. The output #
  11361. # is in fp0. #
  11362. # #
  11363. # A13. Check for LEN digits. #
  11364. # If the int operation results in more than LEN digits, #
  11365. # or less than LEN -1 digits, adjust ILOG and repeat from #
  11366. # A6. This test occurs only on the first pass. If the #
  11367. # result is exactly 10^LEN, decrement ILOG and divide #
  11368. # the mantissa by 10. #
  11369. # #
  11370. # A14. Convert the mantissa to bcd. #
  11371. # The binstr routine is used to convert the LEN digit #
  11372. # mantissa to bcd in memory. The input to binstr is #
  11373. # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
  11374. # such that the decimal point is to the left of bit 63. #
  11375. # The bcd digits are stored in the correct position in #
  11376. # the final string area in memory. #
  11377. # #
  11378. # A15. Convert the exponent to bcd. #
  11379. # As in A14 above, the exp is converted to bcd and the #
  11380. # digits are stored in the final string. #
  11381. # Test the length of the final exponent string. If the #
  11382. # length is 4, set operr. #
  11383. # #
  11384. # A16. Write sign bits to final string. #
  11385. # #
  11386. #########################################################################
  11387. set BINDEC_FLG, EXC_TEMP # DENORM flag
  11388. # Constants in extended precision
  11389. PLOG2:
  11390. long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
  11391. PLOG2UP1:
  11392. long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
  11393. # Constants in single precision
  11394. FONE:
  11395. long 0x3F800000,0x00000000,0x00000000,0x00000000
  11396. FTWO:
  11397. long 0x40000000,0x00000000,0x00000000,0x00000000
  11398. FTEN:
  11399. long 0x41200000,0x00000000,0x00000000,0x00000000
  11400. F4933:
  11401. long 0x459A2800,0x00000000,0x00000000,0x00000000
  11402. RBDTBL:
  11403. byte 0,0,0,0
  11404. byte 3,3,2,2
  11405. byte 3,2,2,3
  11406. byte 2,3,3,2
  11407. # Implementation Notes:
  11408. #
  11409. # The registers are used as follows:
  11410. #
  11411. # d0: scratch; LEN input to binstr
  11412. # d1: scratch
  11413. # d2: upper 32-bits of mantissa for binstr
  11414. # d3: scratch;lower 32-bits of mantissa for binstr
  11415. # d4: LEN
  11416. # d5: LAMBDA/ICTR
  11417. # d6: ILOG
  11418. # d7: k-factor
  11419. # a0: ptr for original operand/final result
  11420. # a1: scratch pointer
  11421. # a2: pointer to FP_X; abs(original value) in ext
  11422. # fp0: scratch
  11423. # fp1: scratch
  11424. # fp2: scratch
  11425. # F_SCR1:
  11426. # F_SCR2:
  11427. # L_SCR1:
  11428. # L_SCR2:
  11429. global bindec
  11430. bindec:
  11431. movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
  11432. fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
  11433. # A1. Set RM and size ext. Set SIGMA = sign input;
  11434. # The k-factor is saved for use in d7. Clear BINDEC_FLG for
  11435. # separating normalized/denormalized input. If the input
  11436. # is a denormalized number, set the BINDEC_FLG memory word
  11437. # to signal denorm. If the input is unnormalized, normalize
  11438. # the input and test for denormalized result.
  11439. #
  11440. fmov.l &rm_mode*0x10,%fpcr # set RM and ext
  11441. mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
  11442. mov.l %d0,%d7 # move k-factor to d7
  11443. clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
  11444. cmpi.b STAG(%a6),&DENORM # is input a DENORM?
  11445. bne.w A2_str # no; input is a NORM
  11446. #
  11447. # Normalize the denorm
  11448. #
  11449. un_de_norm:
  11450. mov.w (%a0),%d0
  11451. and.w &0x7fff,%d0 # strip sign of normalized exp
  11452. mov.l 4(%a0),%d1
  11453. mov.l 8(%a0),%d2
  11454. norm_loop:
  11455. sub.w &1,%d0
  11456. lsl.l &1,%d2
  11457. roxl.l &1,%d1
  11458. tst.l %d1
  11459. bge.b norm_loop
  11460. #
  11461. # Test if the normalized input is denormalized
  11462. #
  11463. tst.w %d0
  11464. bgt.b pos_exp # if greater than zero, it is a norm
  11465. st BINDEC_FLG(%a6) # set flag for denorm
  11466. pos_exp:
  11467. and.w &0x7fff,%d0 # strip sign of normalized exp
  11468. mov.w %d0,(%a0)
  11469. mov.l %d1,4(%a0)
  11470. mov.l %d2,8(%a0)
  11471. # A2. Set X = abs(input).
  11472. #
  11473. A2_str:
  11474. mov.l (%a0),FP_SCR1(%a6) # move input to work space
  11475. mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
  11476. mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
  11477. and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
  11478. # A3. Compute ILOG.
  11479. # ILOG is the log base 10 of the input value. It is approx-
  11480. # imated by adding e + 0.f when the original value is viewed
  11481. # as 2^^e * 1.f in extended precision. This value is stored
  11482. # in d6.
  11483. #
  11484. # Register usage:
  11485. # Input/Output
  11486. # d0: k-factor/exponent
  11487. # d2: x/x
  11488. # d3: x/x
  11489. # d4: x/x
  11490. # d5: x/x
  11491. # d6: x/ILOG
  11492. # d7: k-factor/Unchanged
  11493. # a0: ptr for original operand/final result
  11494. # a1: x/x
  11495. # a2: x/x
  11496. # fp0: x/float(ILOG)
  11497. # fp1: x/x
  11498. # fp2: x/x
  11499. # F_SCR1:x/x
  11500. # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
  11501. # L_SCR1:x/x
  11502. # L_SCR2:first word of X packed/Unchanged
  11503. tst.b BINDEC_FLG(%a6) # check for denorm
  11504. beq.b A3_cont # if clr, continue with norm
  11505. mov.l &-4933,%d6 # force ILOG = -4933
  11506. bra.b A4_str
  11507. A3_cont:
  11508. mov.w FP_SCR1(%a6),%d0 # move exp to d0
  11509. mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
  11510. fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
  11511. sub.w &0x3fff,%d0 # strip off bias
  11512. fadd.w %d0,%fp0 # add in exp
  11513. fsub.s FONE(%pc),%fp0 # subtract off 1.0
  11514. fbge.w pos_res # if pos, branch
  11515. fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
  11516. fmov.l %fp0,%d6 # put ILOG in d6 as a lword
  11517. bra.b A4_str # go move out ILOG
  11518. pos_res:
  11519. fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
  11520. fmov.l %fp0,%d6 # put ILOG in d6 as a lword
  11521. # A4. Clr INEX bit.
  11522. # The operation in A3 above may have set INEX2.
  11523. A4_str:
  11524. fmov.l &0,%fpsr # zero all of fpsr - nothing needed
  11525. # A5. Set ICTR = 0;
  11526. # ICTR is a flag used in A13. It must be set before the
  11527. # loop entry A6. The lower word of d5 is used for ICTR.
  11528. clr.w %d5 # clear ICTR
  11529. # A6. Calculate LEN.
  11530. # LEN is the number of digits to be displayed. The k-factor
  11531. # can dictate either the total number of digits, if it is
  11532. # a positive number, or the number of digits after the
  11533. # original decimal point which are to be included as
  11534. # significant. See the 68882 manual for examples.
  11535. # If LEN is computed to be greater than 17, set OPERR in
  11536. # USER_FPSR. LEN is stored in d4.
  11537. #
  11538. # Register usage:
  11539. # Input/Output
  11540. # d0: exponent/Unchanged
  11541. # d2: x/x/scratch
  11542. # d3: x/x
  11543. # d4: exc picture/LEN
  11544. # d5: ICTR/Unchanged
  11545. # d6: ILOG/Unchanged
  11546. # d7: k-factor/Unchanged
  11547. # a0: ptr for original operand/final result
  11548. # a1: x/x
  11549. # a2: x/x
  11550. # fp0: float(ILOG)/Unchanged
  11551. # fp1: x/x
  11552. # fp2: x/x
  11553. # F_SCR1:x/x
  11554. # F_SCR2:Abs(X) with $3fff exponent/Unchanged
  11555. # L_SCR1:x/x
  11556. # L_SCR2:first word of X packed/Unchanged
  11557. A6_str:
  11558. tst.l %d7 # branch on sign of k
  11559. ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
  11560. mov.l %d7,%d4 # if k > 0, LEN = k
  11561. bra.b len_ck # skip to LEN check
  11562. k_neg:
  11563. mov.l %d6,%d4 # first load ILOG to d4
  11564. sub.l %d7,%d4 # subtract off k
  11565. addq.l &1,%d4 # add in the 1
  11566. len_ck:
  11567. tst.l %d4 # LEN check: branch on sign of LEN
  11568. ble.b LEN_ng # if neg, set LEN = 1
  11569. cmp.l %d4,&17 # test if LEN > 17
  11570. ble.b A7_str # if not, forget it
  11571. mov.l &17,%d4 # set max LEN = 17
  11572. tst.l %d7 # if negative, never set OPERR
  11573. ble.b A7_str # if positive, continue
  11574. or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
  11575. bra.b A7_str # finished here
  11576. LEN_ng:
  11577. mov.l &1,%d4 # min LEN is 1
  11578. # A7. Calculate SCALE.
  11579. # SCALE is equal to 10^ISCALE, where ISCALE is the number
  11580. # of decimal places needed to insure LEN integer digits
  11581. # in the output before conversion to bcd. LAMBDA is the sign
  11582. # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
  11583. # the rounding mode as given in the following table (see
  11584. # Coonen, p. 7.23 as ref.; however, the SCALE variable is
  11585. # of opposite sign in bindec.sa from Coonen).
  11586. #
  11587. # Initial USE
  11588. # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
  11589. # ----------------------------------------------
  11590. # RN 00 0 0 00/0 RN
  11591. # RN 00 0 1 00/0 RN
  11592. # RN 00 1 0 00/0 RN
  11593. # RN 00 1 1 00/0 RN
  11594. # RZ 01 0 0 11/3 RP
  11595. # RZ 01 0 1 11/3 RP
  11596. # RZ 01 1 0 10/2 RM
  11597. # RZ 01 1 1 10/2 RM
  11598. # RM 10 0 0 11/3 RP
  11599. # RM 10 0 1 10/2 RM
  11600. # RM 10 1 0 10/2 RM
  11601. # RM 10 1 1 11/3 RP
  11602. # RP 11 0 0 10/2 RM
  11603. # RP 11 0 1 11/3 RP
  11604. # RP 11 1 0 11/3 RP
  11605. # RP 11 1 1 10/2 RM
  11606. #
  11607. # Register usage:
  11608. # Input/Output
  11609. # d0: exponent/scratch - final is 0
  11610. # d2: x/0 or 24 for A9
  11611. # d3: x/scratch - offset ptr into PTENRM array
  11612. # d4: LEN/Unchanged
  11613. # d5: 0/ICTR:LAMBDA
  11614. # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
  11615. # d7: k-factor/Unchanged
  11616. # a0: ptr for original operand/final result
  11617. # a1: x/ptr to PTENRM array
  11618. # a2: x/x
  11619. # fp0: float(ILOG)/Unchanged
  11620. # fp1: x/10^ISCALE
  11621. # fp2: x/x
  11622. # F_SCR1:x/x
  11623. # F_SCR2:Abs(X) with $3fff exponent/Unchanged
  11624. # L_SCR1:x/x
  11625. # L_SCR2:first word of X packed/Unchanged
  11626. A7_str:
  11627. tst.l %d7 # test sign of k
  11628. bgt.b k_pos # if pos and > 0, skip this
  11629. cmp.l %d7,%d6 # test k - ILOG
  11630. blt.b k_pos # if ILOG >= k, skip this
  11631. mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
  11632. k_pos:
  11633. mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
  11634. addq.l &1,%d0 # add the 1
  11635. sub.l %d4,%d0 # sub off LEN
  11636. swap %d5 # use upper word of d5 for LAMBDA
  11637. clr.w %d5 # set it zero initially
  11638. clr.w %d2 # set up d2 for very small case
  11639. tst.l %d0 # test sign of ISCALE
  11640. bge.b iscale # if pos, skip next inst
  11641. addq.w &1,%d5 # if neg, set LAMBDA true
  11642. cmp.l %d0,&0xffffecd4 # test iscale <= -4908
  11643. bgt.b no_inf # if false, skip rest
  11644. add.l &24,%d0 # add in 24 to iscale
  11645. mov.l &24,%d2 # put 24 in d2 for A9
  11646. no_inf:
  11647. neg.l %d0 # and take abs of ISCALE
  11648. iscale:
  11649. fmov.s FONE(%pc),%fp1 # init fp1 to 1
  11650. bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
  11651. lsl.w &1,%d1 # put them in bits 2:1
  11652. add.w %d5,%d1 # add in LAMBDA
  11653. lsl.w &1,%d1 # put them in bits 3:1
  11654. tst.l L_SCR2(%a6) # test sign of original x
  11655. bge.b x_pos # if pos, don't set bit 0
  11656. addq.l &1,%d1 # if neg, set bit 0
  11657. x_pos:
  11658. lea.l RBDTBL(%pc),%a2 # load rbdtbl base
  11659. mov.b (%a2,%d1),%d3 # load d3 with new rmode
  11660. lsl.l &4,%d3 # put bits in proper position
  11661. fmov.l %d3,%fpcr # load bits into fpu
  11662. lsr.l &4,%d3 # put bits in proper position
  11663. tst.b %d3 # decode new rmode for pten table
  11664. bne.b not_rn # if zero, it is RN
  11665. lea.l PTENRN(%pc),%a1 # load a1 with RN table base
  11666. bra.b rmode # exit decode
  11667. not_rn:
  11668. lsr.b &1,%d3 # get lsb in carry
  11669. bcc.b not_rp2 # if carry clear, it is RM
  11670. lea.l PTENRP(%pc),%a1 # load a1 with RP table base
  11671. bra.b rmode # exit decode
  11672. not_rp2:
  11673. lea.l PTENRM(%pc),%a1 # load a1 with RM table base
  11674. rmode:
  11675. clr.l %d3 # clr table index
  11676. e_loop2:
  11677. lsr.l &1,%d0 # shift next bit into carry
  11678. bcc.b e_next2 # if zero, skip the mul
  11679. fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
  11680. e_next2:
  11681. add.l &12,%d3 # inc d3 to next pwrten table entry
  11682. tst.l %d0 # test if ISCALE is zero
  11683. bne.b e_loop2 # if not, loop
  11684. # A8. Clr INEX; Force RZ.
  11685. # The operation in A3 above may have set INEX2.
  11686. # RZ mode is forced for the scaling operation to insure
  11687. # only one rounding error. The grs bits are collected in
  11688. # the INEX flag for use in A10.
  11689. #
  11690. # Register usage:
  11691. # Input/Output
  11692. fmov.l &0,%fpsr # clr INEX
  11693. fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
  11694. # A9. Scale X -> Y.
  11695. # The mantissa is scaled to the desired number of significant
  11696. # digits. The excess digits are collected in INEX2. If mul,
  11697. # Check d2 for excess 10 exponential value. If not zero,
  11698. # the iscale value would have caused the pwrten calculation
  11699. # to overflow. Only a negative iscale can cause this, so
  11700. # multiply by 10^(d2), which is now only allowed to be 24,
  11701. # with a multiply by 10^8 and 10^16, which is exact since
  11702. # 10^24 is exact. If the input was denormalized, we must
  11703. # create a busy stack frame with the mul command and the
  11704. # two operands, and allow the fpu to complete the multiply.
  11705. #
  11706. # Register usage:
  11707. # Input/Output
  11708. # d0: FPCR with RZ mode/Unchanged
  11709. # d2: 0 or 24/unchanged
  11710. # d3: x/x
  11711. # d4: LEN/Unchanged
  11712. # d5: ICTR:LAMBDA
  11713. # d6: ILOG/Unchanged
  11714. # d7: k-factor/Unchanged
  11715. # a0: ptr for original operand/final result
  11716. # a1: ptr to PTENRM array/Unchanged
  11717. # a2: x/x
  11718. # fp0: float(ILOG)/X adjusted for SCALE (Y)
  11719. # fp1: 10^ISCALE/Unchanged
  11720. # fp2: x/x
  11721. # F_SCR1:x/x
  11722. # F_SCR2:Abs(X) with $3fff exponent/Unchanged
  11723. # L_SCR1:x/x
  11724. # L_SCR2:first word of X packed/Unchanged
  11725. A9_str:
  11726. fmov.x (%a0),%fp0 # load X from memory
  11727. fabs.x %fp0 # use abs(X)
  11728. tst.w %d5 # LAMBDA is in lower word of d5
  11729. bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
  11730. fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
  11731. bra.w A10_st # branch to A10
  11732. sc_mul:
  11733. tst.b BINDEC_FLG(%a6) # check for denorm
  11734. beq.w A9_norm # if norm, continue with mul
  11735. # for DENORM, we must calculate:
  11736. # fp0 = input_op * 10^ISCALE * 10^24
  11737. # since the input operand is a DENORM, we can't multiply it directly.
  11738. # so, we do the multiplication of the exponents and mantissas separately.
  11739. # in this way, we avoid underflow on intermediate stages of the
  11740. # multiplication and guarantee a result without exception.
  11741. fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
  11742. mov.w (%sp),%d3 # grab exponent
  11743. andi.w &0x7fff,%d3 # clear sign
  11744. ori.w &0x8000,(%a0) # make DENORM exp negative
  11745. add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
  11746. subi.w &0x3fff,%d3 # subtract BIAS
  11747. add.w 36(%a1),%d3
  11748. subi.w &0x3fff,%d3 # subtract BIAS
  11749. add.w 48(%a1),%d3
  11750. subi.w &0x3fff,%d3 # subtract BIAS
  11751. bmi.w sc_mul_err # is result is DENORM, punt!!!
  11752. andi.w &0x8000,(%sp) # keep sign
  11753. or.w %d3,(%sp) # insert new exponent
  11754. andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
  11755. mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
  11756. mov.l 0x4(%a0),-(%sp)
  11757. mov.l &0x3fff0000,-(%sp) # force exp to zero
  11758. fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
  11759. fmul.x (%sp)+,%fp0
  11760. # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
  11761. # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
  11762. mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
  11763. mov.l 36+4(%a1),-(%sp)
  11764. mov.l &0x3fff0000,-(%sp) # force exp to zero
  11765. mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
  11766. mov.l 48+4(%a1),-(%sp)
  11767. mov.l &0x3fff0000,-(%sp)# force exp to zero
  11768. fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
  11769. fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
  11770. bra.b A10_st
  11771. sc_mul_err:
  11772. bra.b sc_mul_err
  11773. A9_norm:
  11774. tst.w %d2 # test for small exp case
  11775. beq.b A9_con # if zero, continue as normal
  11776. fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
  11777. fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
  11778. A9_con:
  11779. fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
  11780. # A10. Or in INEX.
  11781. # If INEX is set, round error occurred. This is compensated
  11782. # for by 'or-ing' in the INEX2 flag to the lsb of Y.
  11783. #
  11784. # Register usage:
  11785. # Input/Output
  11786. # d0: FPCR with RZ mode/FPSR with INEX2 isolated
  11787. # d2: x/x
  11788. # d3: x/x
  11789. # d4: LEN/Unchanged
  11790. # d5: ICTR:LAMBDA
  11791. # d6: ILOG/Unchanged
  11792. # d7: k-factor/Unchanged
  11793. # a0: ptr for original operand/final result
  11794. # a1: ptr to PTENxx array/Unchanged
  11795. # a2: x/ptr to FP_SCR1(a6)
  11796. # fp0: Y/Y with lsb adjusted
  11797. # fp1: 10^ISCALE/Unchanged
  11798. # fp2: x/x
  11799. A10_st:
  11800. fmov.l %fpsr,%d0 # get FPSR
  11801. fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
  11802. lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
  11803. btst &9,%d0 # check if INEX2 set
  11804. beq.b A11_st # if clear, skip rest
  11805. or.l &1,8(%a2) # or in 1 to lsb of mantissa
  11806. fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
  11807. # A11. Restore original FPCR; set size ext.
  11808. # Perform FINT operation in the user's rounding mode. Keep
  11809. # the size to extended. The sintdo entry point in the sint
  11810. # routine expects the FPCR value to be in USER_FPCR for
  11811. # mode and precision. The original FPCR is saved in L_SCR1.
  11812. A11_st:
  11813. mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
  11814. and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
  11815. # ;block exceptions
  11816. # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
  11817. # The FPSP routine sintd0 is used. The output is in fp0.
  11818. #
  11819. # Register usage:
  11820. # Input/Output
  11821. # d0: FPSR with AINEX cleared/FPCR with size set to ext
  11822. # d2: x/x/scratch
  11823. # d3: x/x
  11824. # d4: LEN/Unchanged
  11825. # d5: ICTR:LAMBDA/Unchanged
  11826. # d6: ILOG/Unchanged
  11827. # d7: k-factor/Unchanged
  11828. # a0: ptr for original operand/src ptr for sintdo
  11829. # a1: ptr to PTENxx array/Unchanged
  11830. # a2: ptr to FP_SCR1(a6)/Unchanged
  11831. # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
  11832. # fp0: Y/YINT
  11833. # fp1: 10^ISCALE/Unchanged
  11834. # fp2: x/x
  11835. # F_SCR1:x/x
  11836. # F_SCR2:Y adjusted for inex/Y with original exponent
  11837. # L_SCR1:x/original USER_FPCR
  11838. # L_SCR2:first word of X packed/Unchanged
  11839. A12_st:
  11840. movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
  11841. mov.l L_SCR1(%a6),-(%sp)
  11842. mov.l L_SCR2(%a6),-(%sp)
  11843. lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
  11844. fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
  11845. tst.l L_SCR2(%a6) # test sign of original operand
  11846. bge.b do_fint12 # if pos, use Y
  11847. or.l &0x80000000,(%a0) # if neg, use -Y
  11848. do_fint12:
  11849. mov.l USER_FPSR(%a6),-(%sp)
  11850. # bsr sintdo # sint routine returns int in fp0
  11851. fmov.l USER_FPCR(%a6),%fpcr
  11852. fmov.l &0x0,%fpsr # clear the AEXC bits!!!
  11853. ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
  11854. ## andi.l &0x00000030,%d0
  11855. ## fmov.l %d0,%fpcr
  11856. fint.x FP_SCR1(%a6),%fp0 # do fint()
  11857. fmov.l %fpsr,%d0
  11858. or.w %d0,FPSR_EXCEPT(%a6)
  11859. ## fmov.l &0x0,%fpcr
  11860. ## fmov.l %fpsr,%d0 # don't keep ccodes
  11861. ## or.w %d0,FPSR_EXCEPT(%a6)
  11862. mov.b (%sp),USER_FPSR(%a6)
  11863. add.l &4,%sp
  11864. mov.l (%sp)+,L_SCR2(%a6)
  11865. mov.l (%sp)+,L_SCR1(%a6)
  11866. movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
  11867. mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
  11868. mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
  11869. # A13. Check for LEN digits.
  11870. # If the int operation results in more than LEN digits,
  11871. # or less than LEN -1 digits, adjust ILOG and repeat from
  11872. # A6. This test occurs only on the first pass. If the
  11873. # result is exactly 10^LEN, decrement ILOG and divide
  11874. # the mantissa by 10. The calculation of 10^LEN cannot
  11875. # be inexact, since all powers of ten upto 10^27 are exact
  11876. # in extended precision, so the use of a previous power-of-ten
  11877. # table will introduce no error.
  11878. #
  11879. #
  11880. # Register usage:
  11881. # Input/Output
  11882. # d0: FPCR with size set to ext/scratch final = 0
  11883. # d2: x/x
  11884. # d3: x/scratch final = x
  11885. # d4: LEN/LEN adjusted
  11886. # d5: ICTR:LAMBDA/LAMBDA:ICTR
  11887. # d6: ILOG/ILOG adjusted
  11888. # d7: k-factor/Unchanged
  11889. # a0: pointer into memory for packed bcd string formation
  11890. # a1: ptr to PTENxx array/Unchanged
  11891. # a2: ptr to FP_SCR1(a6)/Unchanged
  11892. # fp0: int portion of Y/abs(YINT) adjusted
  11893. # fp1: 10^ISCALE/Unchanged
  11894. # fp2: x/10^LEN
  11895. # F_SCR1:x/x
  11896. # F_SCR2:Y with original exponent/Unchanged
  11897. # L_SCR1:original USER_FPCR/Unchanged
  11898. # L_SCR2:first word of X packed/Unchanged
  11899. A13_st:
  11900. swap %d5 # put ICTR in lower word of d5
  11901. tst.w %d5 # check if ICTR = 0
  11902. bne not_zr # if non-zero, go to second test
  11903. #
  11904. # Compute 10^(LEN-1)
  11905. #
  11906. fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
  11907. mov.l %d4,%d0 # put LEN in d0
  11908. subq.l &1,%d0 # d0 = LEN -1
  11909. clr.l %d3 # clr table index
  11910. l_loop:
  11911. lsr.l &1,%d0 # shift next bit into carry
  11912. bcc.b l_next # if zero, skip the mul
  11913. fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
  11914. l_next:
  11915. add.l &12,%d3 # inc d3 to next pwrten table entry
  11916. tst.l %d0 # test if LEN is zero
  11917. bne.b l_loop # if not, loop
  11918. #
  11919. # 10^LEN-1 is computed for this test and A14. If the input was
  11920. # denormalized, check only the case in which YINT > 10^LEN.
  11921. #
  11922. tst.b BINDEC_FLG(%a6) # check if input was norm
  11923. beq.b A13_con # if norm, continue with checking
  11924. fabs.x %fp0 # take abs of YINT
  11925. bra test_2
  11926. #
  11927. # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
  11928. #
  11929. A13_con:
  11930. fabs.x %fp0 # take abs of YINT
  11931. fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
  11932. fbge.w test_2 # if greater, do next test
  11933. subq.l &1,%d6 # subtract 1 from ILOG
  11934. mov.w &1,%d5 # set ICTR
  11935. fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
  11936. fmul.s FTEN(%pc),%fp2 # compute 10^LEN
  11937. bra.w A6_str # return to A6 and recompute YINT
  11938. test_2:
  11939. fmul.s FTEN(%pc),%fp2 # compute 10^LEN
  11940. fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
  11941. fblt.w A14_st # if less, all is ok, go to A14
  11942. fbgt.w fix_ex # if greater, fix and redo
  11943. fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
  11944. addq.l &1,%d6 # and inc ILOG
  11945. bra.b A14_st # and continue elsewhere
  11946. fix_ex:
  11947. addq.l &1,%d6 # increment ILOG by 1
  11948. mov.w &1,%d5 # set ICTR
  11949. fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
  11950. bra.w A6_str # return to A6 and recompute YINT
  11951. #
  11952. # Since ICTR <> 0, we have already been through one adjustment,
  11953. # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
  11954. # 10^LEN is again computed using whatever table is in a1 since the
  11955. # value calculated cannot be inexact.
  11956. #
  11957. not_zr:
  11958. fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
  11959. mov.l %d4,%d0 # put LEN in d0
  11960. clr.l %d3 # clr table index
  11961. z_loop:
  11962. lsr.l &1,%d0 # shift next bit into carry
  11963. bcc.b z_next # if zero, skip the mul
  11964. fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
  11965. z_next:
  11966. add.l &12,%d3 # inc d3 to next pwrten table entry
  11967. tst.l %d0 # test if LEN is zero
  11968. bne.b z_loop # if not, loop
  11969. fabs.x %fp0 # get abs(YINT)
  11970. fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
  11971. fbneq.w A14_st # if not, skip this
  11972. fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
  11973. addq.l &1,%d6 # and inc ILOG by 1
  11974. addq.l &1,%d4 # and inc LEN
  11975. fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
  11976. # A14. Convert the mantissa to bcd.
  11977. # The binstr routine is used to convert the LEN digit
  11978. # mantissa to bcd in memory. The input to binstr is
  11979. # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
  11980. # such that the decimal point is to the left of bit 63.
  11981. # The bcd digits are stored in the correct position in
  11982. # the final string area in memory.
  11983. #
  11984. #
  11985. # Register usage:
  11986. # Input/Output
  11987. # d0: x/LEN call to binstr - final is 0
  11988. # d1: x/0
  11989. # d2: x/ms 32-bits of mant of abs(YINT)
  11990. # d3: x/ls 32-bits of mant of abs(YINT)
  11991. # d4: LEN/Unchanged
  11992. # d5: ICTR:LAMBDA/LAMBDA:ICTR
  11993. # d6: ILOG
  11994. # d7: k-factor/Unchanged
  11995. # a0: pointer into memory for packed bcd string formation
  11996. # /ptr to first mantissa byte in result string
  11997. # a1: ptr to PTENxx array/Unchanged
  11998. # a2: ptr to FP_SCR1(a6)/Unchanged
  11999. # fp0: int portion of Y/abs(YINT) adjusted
  12000. # fp1: 10^ISCALE/Unchanged
  12001. # fp2: 10^LEN/Unchanged
  12002. # F_SCR1:x/Work area for final result
  12003. # F_SCR2:Y with original exponent/Unchanged
  12004. # L_SCR1:original USER_FPCR/Unchanged
  12005. # L_SCR2:first word of X packed/Unchanged
  12006. A14_st:
  12007. fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
  12008. fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
  12009. lea.l FP_SCR0(%a6),%a0
  12010. fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
  12011. mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
  12012. mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
  12013. clr.l 4(%a0) # zero word 2 of FP_RES
  12014. clr.l 8(%a0) # zero word 3 of FP_RES
  12015. mov.l (%a0),%d0 # move exponent to d0
  12016. swap %d0 # put exponent in lower word
  12017. beq.b no_sft # if zero, don't shift
  12018. sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
  12019. tst.l %d0 # check if > 1
  12020. bgt.b no_sft # if so, don't shift
  12021. neg.l %d0 # make exp positive
  12022. m_loop:
  12023. lsr.l &1,%d2 # shift d2:d3 right, add 0s
  12024. roxr.l &1,%d3 # the number of places
  12025. dbf.w %d0,m_loop # given in d0
  12026. no_sft:
  12027. tst.l %d2 # check for mantissa of zero
  12028. bne.b no_zr # if not, go on
  12029. tst.l %d3 # continue zero check
  12030. beq.b zer_m # if zero, go directly to binstr
  12031. no_zr:
  12032. clr.l %d1 # put zero in d1 for addx
  12033. add.l &0x00000080,%d3 # inc at bit 7
  12034. addx.l %d1,%d2 # continue inc
  12035. and.l &0xffffff80,%d3 # strip off lsb not used by 882
  12036. zer_m:
  12037. mov.l %d4,%d0 # put LEN in d0 for binstr call
  12038. addq.l &3,%a0 # a0 points to M16 byte in result
  12039. bsr binstr # call binstr to convert mant
  12040. # A15. Convert the exponent to bcd.
  12041. # As in A14 above, the exp is converted to bcd and the
  12042. # digits are stored in the final string.
  12043. #
  12044. # Digits are stored in L_SCR1(a6) on return from BINDEC as:
  12045. #
  12046. # 32 16 15 0
  12047. # -----------------------------------------
  12048. # | 0 | e3 | e2 | e1 | e4 | X | X | X |
  12049. # -----------------------------------------
  12050. #
  12051. # And are moved into their proper places in FP_SCR0. If digit e4
  12052. # is non-zero, OPERR is signaled. In all cases, all 4 digits are
  12053. # written as specified in the 881/882 manual for packed decimal.
  12054. #
  12055. # Register usage:
  12056. # Input/Output
  12057. # d0: x/LEN call to binstr - final is 0
  12058. # d1: x/scratch (0);shift count for final exponent packing
  12059. # d2: x/ms 32-bits of exp fraction/scratch
  12060. # d3: x/ls 32-bits of exp fraction
  12061. # d4: LEN/Unchanged
  12062. # d5: ICTR:LAMBDA/LAMBDA:ICTR
  12063. # d6: ILOG
  12064. # d7: k-factor/Unchanged
  12065. # a0: ptr to result string/ptr to L_SCR1(a6)
  12066. # a1: ptr to PTENxx array/Unchanged
  12067. # a2: ptr to FP_SCR1(a6)/Unchanged
  12068. # fp0: abs(YINT) adjusted/float(ILOG)
  12069. # fp1: 10^ISCALE/Unchanged
  12070. # fp2: 10^LEN/Unchanged
  12071. # F_SCR1:Work area for final result/BCD result
  12072. # F_SCR2:Y with original exponent/ILOG/10^4
  12073. # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
  12074. # L_SCR2:first word of X packed/Unchanged
  12075. A15_st:
  12076. tst.b BINDEC_FLG(%a6) # check for denorm
  12077. beq.b not_denorm
  12078. ftest.x %fp0 # test for zero
  12079. fbeq.w den_zero # if zero, use k-factor or 4933
  12080. fmov.l %d6,%fp0 # float ILOG
  12081. fabs.x %fp0 # get abs of ILOG
  12082. bra.b convrt
  12083. den_zero:
  12084. tst.l %d7 # check sign of the k-factor
  12085. blt.b use_ilog # if negative, use ILOG
  12086. fmov.s F4933(%pc),%fp0 # force exponent to 4933
  12087. bra.b convrt # do it
  12088. use_ilog:
  12089. fmov.l %d6,%fp0 # float ILOG
  12090. fabs.x %fp0 # get abs of ILOG
  12091. bra.b convrt
  12092. not_denorm:
  12093. ftest.x %fp0 # test for zero
  12094. fbneq.w not_zero # if zero, force exponent
  12095. fmov.s FONE(%pc),%fp0 # force exponent to 1
  12096. bra.b convrt # do it
  12097. not_zero:
  12098. fmov.l %d6,%fp0 # float ILOG
  12099. fabs.x %fp0 # get abs of ILOG
  12100. convrt:
  12101. fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
  12102. fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
  12103. mov.l 4(%a2),%d2 # move word 2 to d2
  12104. mov.l 8(%a2),%d3 # move word 3 to d3
  12105. mov.w (%a2),%d0 # move exp to d0
  12106. beq.b x_loop_fin # if zero, skip the shift
  12107. sub.w &0x3ffd,%d0 # subtract off bias
  12108. neg.w %d0 # make exp positive
  12109. x_loop:
  12110. lsr.l &1,%d2 # shift d2:d3 right
  12111. roxr.l &1,%d3 # the number of places
  12112. dbf.w %d0,x_loop # given in d0
  12113. x_loop_fin:
  12114. clr.l %d1 # put zero in d1 for addx
  12115. add.l &0x00000080,%d3 # inc at bit 6
  12116. addx.l %d1,%d2 # continue inc
  12117. and.l &0xffffff80,%d3 # strip off lsb not used by 882
  12118. mov.l &4,%d0 # put 4 in d0 for binstr call
  12119. lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
  12120. bsr binstr # call binstr to convert exp
  12121. mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
  12122. mov.l &12,%d1 # use d1 for shift count
  12123. lsr.l %d1,%d0 # shift d0 right by 12
  12124. bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
  12125. lsr.l %d1,%d0 # shift d0 right by 12
  12126. bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
  12127. tst.b %d0 # check if e4 is zero
  12128. beq.b A16_st # if zero, skip rest
  12129. or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
  12130. # A16. Write sign bits to final string.
  12131. # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
  12132. #
  12133. # Register usage:
  12134. # Input/Output
  12135. # d0: x/scratch - final is x
  12136. # d2: x/x
  12137. # d3: x/x
  12138. # d4: LEN/Unchanged
  12139. # d5: ICTR:LAMBDA/LAMBDA:ICTR
  12140. # d6: ILOG/ILOG adjusted
  12141. # d7: k-factor/Unchanged
  12142. # a0: ptr to L_SCR1(a6)/Unchanged
  12143. # a1: ptr to PTENxx array/Unchanged
  12144. # a2: ptr to FP_SCR1(a6)/Unchanged
  12145. # fp0: float(ILOG)/Unchanged
  12146. # fp1: 10^ISCALE/Unchanged
  12147. # fp2: 10^LEN/Unchanged
  12148. # F_SCR1:BCD result with correct signs
  12149. # F_SCR2:ILOG/10^4
  12150. # L_SCR1:Exponent digits on return from binstr
  12151. # L_SCR2:first word of X packed/Unchanged
  12152. A16_st:
  12153. clr.l %d0 # clr d0 for collection of signs
  12154. and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
  12155. tst.l L_SCR2(%a6) # check sign of original mantissa
  12156. bge.b mant_p # if pos, don't set SM
  12157. mov.l &2,%d0 # move 2 in to d0 for SM
  12158. mant_p:
  12159. tst.l %d6 # check sign of ILOG
  12160. bge.b wr_sgn # if pos, don't set SE
  12161. addq.l &1,%d0 # set bit 0 in d0 for SE
  12162. wr_sgn:
  12163. bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
  12164. # Clean up and restore all registers used.
  12165. fmov.l &0,%fpsr # clear possible inex2/ainex bits
  12166. fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
  12167. movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
  12168. rts
  12169. global PTENRN
  12170. PTENRN:
  12171. long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
  12172. long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
  12173. long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
  12174. long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
  12175. long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
  12176. long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
  12177. long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
  12178. long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
  12179. long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
  12180. long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
  12181. long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
  12182. long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
  12183. long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
  12184. global PTENRP
  12185. PTENRP:
  12186. long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
  12187. long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
  12188. long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
  12189. long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
  12190. long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
  12191. long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
  12192. long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
  12193. long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
  12194. long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
  12195. long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
  12196. long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
  12197. long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
  12198. long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
  12199. global PTENRM
  12200. PTENRM:
  12201. long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
  12202. long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
  12203. long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
  12204. long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
  12205. long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
  12206. long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
  12207. long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
  12208. long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
  12209. long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
  12210. long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
  12211. long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
  12212. long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
  12213. long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
  12214. #########################################################################
  12215. # binstr(): Converts a 64-bit binary integer to bcd. #
  12216. # #
  12217. # INPUT *************************************************************** #
  12218. # d2:d3 = 64-bit binary integer #
  12219. # d0 = desired length (LEN) #
  12220. # a0 = pointer to start in memory for bcd characters #
  12221. # (This pointer must point to byte 4 of the first #
  12222. # lword of the packed decimal memory string.) #
  12223. # #
  12224. # OUTPUT ************************************************************** #
  12225. # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
  12226. # #
  12227. # ALGORITHM *********************************************************** #
  12228. # The 64-bit binary is assumed to have a decimal point before #
  12229. # bit 63. The fraction is multiplied by 10 using a mul by 2 #
  12230. # shift and a mul by 8 shift. The bits shifted out of the #
  12231. # msb form a decimal digit. This process is iterated until #
  12232. # LEN digits are formed. #
  12233. # #
  12234. # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
  12235. # digit formed will be assumed the least significant. This is #
  12236. # to force the first byte formed to have a 0 in the upper 4 bits. #
  12237. # #
  12238. # A2. Beginning of the loop: #
  12239. # Copy the fraction in d2:d3 to d4:d5. #
  12240. # #
  12241. # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
  12242. # extracts and shifts. The three msbs from d2 will go into d1. #
  12243. # #
  12244. # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
  12245. # will be collected by the carry. #
  12246. # #
  12247. # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
  12248. # into d2:d3. D1 will contain the bcd digit formed. #
  12249. # #
  12250. # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
  12251. # zero, it is the ls digit. Put the digit in its place in the #
  12252. # upper word of d0. If it is the ls digit, write the word #
  12253. # from d0 to memory. #
  12254. # #
  12255. # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
  12256. # #
  12257. #########################################################################
  12258. # Implementation Notes:
  12259. #
  12260. # The registers are used as follows:
  12261. #
  12262. # d0: LEN counter
  12263. # d1: temp used to form the digit
  12264. # d2: upper 32-bits of fraction for mul by 8
  12265. # d3: lower 32-bits of fraction for mul by 8
  12266. # d4: upper 32-bits of fraction for mul by 2
  12267. # d5: lower 32-bits of fraction for mul by 2
  12268. # d6: temp for bit-field extracts
  12269. # d7: byte digit formation word;digit count {0,1}
  12270. # a0: pointer into memory for packed bcd string formation
  12271. #
  12272. global binstr
  12273. binstr:
  12274. movm.l &0xff00,-(%sp) # {%d0-%d7}
  12275. #
  12276. # A1: Init d7
  12277. #
  12278. mov.l &1,%d7 # init d7 for second digit
  12279. subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
  12280. #
  12281. # A2. Copy d2:d3 to d4:d5. Start loop.
  12282. #
  12283. loop:
  12284. mov.l %d2,%d4 # copy the fraction before muls
  12285. mov.l %d3,%d5 # to d4:d5
  12286. #
  12287. # A3. Multiply d2:d3 by 8; extract msbs into d1.
  12288. #
  12289. bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
  12290. asl.l &3,%d2 # shift d2 left by 3 places
  12291. bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
  12292. asl.l &3,%d3 # shift d3 left by 3 places
  12293. or.l %d6,%d2 # or in msbs from d3 into d2
  12294. #
  12295. # A4. Multiply d4:d5 by 2; add carry out to d1.
  12296. #
  12297. asl.l &1,%d5 # mul d5 by 2
  12298. roxl.l &1,%d4 # mul d4 by 2
  12299. swap %d6 # put 0 in d6 lower word
  12300. addx.w %d6,%d1 # add in extend from mul by 2
  12301. #
  12302. # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
  12303. #
  12304. add.l %d5,%d3 # add lower 32 bits
  12305. nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
  12306. addx.l %d4,%d2 # add with extend upper 32 bits
  12307. nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
  12308. addx.w %d6,%d1 # add in extend from add to d1
  12309. swap %d6 # with d6 = 0; put 0 in upper word
  12310. #
  12311. # A6. Test d7 and branch.
  12312. #
  12313. tst.w %d7 # if zero, store digit & to loop
  12314. beq.b first_d # if non-zero, form byte & write
  12315. sec_d:
  12316. swap %d7 # bring first digit to word d7b
  12317. asl.w &4,%d7 # first digit in upper 4 bits d7b
  12318. add.w %d1,%d7 # add in ls digit to d7b
  12319. mov.b %d7,(%a0)+ # store d7b byte in memory
  12320. swap %d7 # put LEN counter in word d7a
  12321. clr.w %d7 # set d7a to signal no digits done
  12322. dbf.w %d0,loop # do loop some more!
  12323. bra.b end_bstr # finished, so exit
  12324. first_d:
  12325. swap %d7 # put digit word in d7b
  12326. mov.w %d1,%d7 # put new digit in d7b
  12327. swap %d7 # put LEN counter in word d7a
  12328. addq.w &1,%d7 # set d7a to signal first digit done
  12329. dbf.w %d0,loop # do loop some more!
  12330. swap %d7 # put last digit in string
  12331. lsl.w &4,%d7 # move it to upper 4 bits
  12332. mov.b %d7,(%a0)+ # store it in memory string
  12333. #
  12334. # Clean up and return with result in fp0.
  12335. #
  12336. end_bstr:
  12337. movm.l (%sp)+,&0xff # {%d0-%d7}
  12338. rts
  12339. #########################################################################
  12340. # XDEF **************************************************************** #
  12341. # facc_in_b(): dmem_read_byte failed #
  12342. # facc_in_w(): dmem_read_word failed #
  12343. # facc_in_l(): dmem_read_long failed #
  12344. # facc_in_d(): dmem_read of dbl prec failed #
  12345. # facc_in_x(): dmem_read of ext prec failed #
  12346. # #
  12347. # facc_out_b(): dmem_write_byte failed #
  12348. # facc_out_w(): dmem_write_word failed #
  12349. # facc_out_l(): dmem_write_long failed #
  12350. # facc_out_d(): dmem_write of dbl prec failed #
  12351. # facc_out_x(): dmem_write of ext prec failed #
  12352. # #
  12353. # XREF **************************************************************** #
  12354. # _real_access() - exit through access error handler #
  12355. # #
  12356. # INPUT *************************************************************** #
  12357. # None #
  12358. # #
  12359. # OUTPUT ************************************************************** #
  12360. # None #
  12361. # #
  12362. # ALGORITHM *********************************************************** #
  12363. # Flow jumps here when an FP data fetch call gets an error #
  12364. # result. This means the operating system wants an access error frame #
  12365. # made out of the current exception stack frame. #
  12366. # So, we first call restore() which makes sure that any updated #
  12367. # -(an)+ register gets returned to its pre-exception value and then #
  12368. # we change the stack to an access error stack frame. #
  12369. # #
  12370. #########################################################################
  12371. facc_in_b:
  12372. movq.l &0x1,%d0 # one byte
  12373. bsr.w restore # fix An
  12374. mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
  12375. bra.w facc_finish
  12376. facc_in_w:
  12377. movq.l &0x2,%d0 # two bytes
  12378. bsr.w restore # fix An
  12379. mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
  12380. bra.b facc_finish
  12381. facc_in_l:
  12382. movq.l &0x4,%d0 # four bytes
  12383. bsr.w restore # fix An
  12384. mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
  12385. bra.b facc_finish
  12386. facc_in_d:
  12387. movq.l &0x8,%d0 # eight bytes
  12388. bsr.w restore # fix An
  12389. mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
  12390. bra.b facc_finish
  12391. facc_in_x:
  12392. movq.l &0xc,%d0 # twelve bytes
  12393. bsr.w restore # fix An
  12394. mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
  12395. bra.b facc_finish
  12396. ################################################################
  12397. facc_out_b:
  12398. movq.l &0x1,%d0 # one byte
  12399. bsr.w restore # restore An
  12400. mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
  12401. bra.b facc_finish
  12402. facc_out_w:
  12403. movq.l &0x2,%d0 # two bytes
  12404. bsr.w restore # restore An
  12405. mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
  12406. bra.b facc_finish
  12407. facc_out_l:
  12408. movq.l &0x4,%d0 # four bytes
  12409. bsr.w restore # restore An
  12410. mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
  12411. bra.b facc_finish
  12412. facc_out_d:
  12413. movq.l &0x8,%d0 # eight bytes
  12414. bsr.w restore # restore An
  12415. mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
  12416. bra.b facc_finish
  12417. facc_out_x:
  12418. mov.l &0xc,%d0 # twelve bytes
  12419. bsr.w restore # restore An
  12420. mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
  12421. # here's where we actually create the access error frame from the
  12422. # current exception stack frame.
  12423. facc_finish:
  12424. mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
  12425. fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
  12426. fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  12427. movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
  12428. unlk %a6
  12429. mov.l (%sp),-(%sp) # store SR, hi(PC)
  12430. mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
  12431. mov.l 0xc(%sp),0x8(%sp) # store EA
  12432. mov.l &0x00000001,0xc(%sp) # store FSLW
  12433. mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
  12434. mov.w &0x4008,0x6(%sp) # store voff
  12435. btst &0x5,(%sp) # supervisor or user mode?
  12436. beq.b facc_out2 # user
  12437. bset &0x2,0xd(%sp) # set supervisor TM bit
  12438. facc_out2:
  12439. bra.l _real_access
  12440. ##################################################################
  12441. # if the effective addressing mode was predecrement or postincrement,
  12442. # the emulation has already changed its value to the correct post-
  12443. # instruction value. but since we're exiting to the access error
  12444. # handler, then AN must be returned to its pre-instruction value.
  12445. # we do that here.
  12446. restore:
  12447. mov.b EXC_OPWORD+0x1(%a6),%d1
  12448. andi.b &0x38,%d1 # extract opmode
  12449. cmpi.b %d1,&0x18 # postinc?
  12450. beq.w rest_inc
  12451. cmpi.b %d1,&0x20 # predec?
  12452. beq.w rest_dec
  12453. rts
  12454. rest_inc:
  12455. mov.b EXC_OPWORD+0x1(%a6),%d1
  12456. andi.w &0x0007,%d1 # fetch An
  12457. mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
  12458. jmp (tbl_rest_inc.b,%pc,%d1.w*1)
  12459. tbl_rest_inc:
  12460. short ri_a0 - tbl_rest_inc
  12461. short ri_a1 - tbl_rest_inc
  12462. short ri_a2 - tbl_rest_inc
  12463. short ri_a3 - tbl_rest_inc
  12464. short ri_a4 - tbl_rest_inc
  12465. short ri_a5 - tbl_rest_inc
  12466. short ri_a6 - tbl_rest_inc
  12467. short ri_a7 - tbl_rest_inc
  12468. ri_a0:
  12469. sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
  12470. rts
  12471. ri_a1:
  12472. sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
  12473. rts
  12474. ri_a2:
  12475. sub.l %d0,%a2 # fix a2
  12476. rts
  12477. ri_a3:
  12478. sub.l %d0,%a3 # fix a3
  12479. rts
  12480. ri_a4:
  12481. sub.l %d0,%a4 # fix a4
  12482. rts
  12483. ri_a5:
  12484. sub.l %d0,%a5 # fix a5
  12485. rts
  12486. ri_a6:
  12487. sub.l %d0,(%a6) # fix stacked a6
  12488. rts
  12489. # if it's a fmove out instruction, we don't have to fix a7
  12490. # because we hadn't changed it yet. if it's an opclass two
  12491. # instruction (data moved in) and the exception was in supervisor
  12492. # mode, then also also wasn't updated. if it was user mode, then
  12493. # restore the correct a7 which is in the USP currently.
  12494. ri_a7:
  12495. cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
  12496. bne.b ri_a7_done # out
  12497. btst &0x5,EXC_SR(%a6) # user or supervisor?
  12498. bne.b ri_a7_done # supervisor
  12499. movc %usp,%a0 # restore USP
  12500. sub.l %d0,%a0
  12501. movc %a0,%usp
  12502. ri_a7_done:
  12503. rts
  12504. # need to invert adjustment value if the <ea> was predec
  12505. rest_dec:
  12506. neg.l %d0
  12507. bra.b rest_inc