/security/nss/lib/freebl/mpi/mpi_x86_os2.s

https://github.com/rillian/firefox · Assembly · 538 lines · 526 code · 12 blank · 0 comment · 17 complexity · c22308c40d07d8189de3c3b3eb4c5a38 MD5 · raw file

  1. #
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this
  4. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5. .data
  6. .align 4
  7. #
  8. # -1 means to call _s_mpi_is_sse to determine if we support sse
  9. # instructions.
  10. # 0 means to use x86 instructions
  11. # 1 means to use sse2 instructions
  12. .type is_sse,@object
  13. .size is_sse,4
  14. is_sse: .long -1
  15. #
  16. # sigh, handle the difference between -fPIC and not PIC
  17. # default to pic, since this file seems to be exclusively
  18. # linux right now (solaris uses mpi_i86pc.s and windows uses
  19. # mpi_x86_asm.c)
  20. #
  21. #.ifndef NO_PIC
  22. #.macro GET var,reg
  23. # movl \var@GOTOFF(%ebx),\reg
  24. #.endm
  25. #.macro PUT reg,var
  26. # movl \reg,\var@GOTOFF(%ebx)
  27. #.endm
  28. #.else
  29. .macro GET var,reg
  30. movl \var,\reg
  31. .endm
  32. .macro PUT reg,var
  33. movl \reg,\var
  34. .endm
  35. #.endif
  36. .text
  37. # ebp - 36: caller's esi
  38. # ebp - 32: caller's edi
  39. # ebp - 28:
  40. # ebp - 24:
  41. # ebp - 20:
  42. # ebp - 16:
  43. # ebp - 12:
  44. # ebp - 8:
  45. # ebp - 4:
  46. # ebp + 0: caller's ebp
  47. # ebp + 4: return address
  48. # ebp + 8: a argument
  49. # ebp + 12: a_len argument
  50. # ebp + 16: b argument
  51. # ebp + 20: c argument
  52. # registers:
  53. # eax:
  54. # ebx: carry
  55. # ecx: a_len
  56. # edx:
  57. # esi: a ptr
  58. # edi: c ptr
  59. .globl _s_mpv_mul_d
  60. .type _s_mpv_mul_d,@function
  61. _s_mpv_mul_d:
  62. GET is_sse,%eax
  63. cmp $0,%eax
  64. je _s_mpv_mul_d_x86
  65. jg _s_mpv_mul_d_sse2
  66. call _s_mpi_is_sse2
  67. PUT %eax,is_sse
  68. cmp $0,%eax
  69. jg _s_mpv_mul_d_sse2
  70. _s_mpv_mul_d_x86:
  71. push %ebp
  72. mov %esp,%ebp
  73. sub $28,%esp
  74. push %edi
  75. push %esi
  76. push %ebx
  77. movl $0,%ebx # carry = 0
  78. mov 12(%ebp),%ecx # ecx = a_len
  79. mov 20(%ebp),%edi
  80. cmp $0,%ecx
  81. je 2f # jmp if a_len == 0
  82. mov 8(%ebp),%esi # esi = a
  83. cld
  84. 1:
  85. lodsl # eax = [ds:esi]; esi += 4
  86. mov 16(%ebp),%edx # edx = b
  87. mull %edx # edx:eax = Phi:Plo = a_i * b
  88. add %ebx,%eax # add carry (%ebx) to edx:eax
  89. adc $0,%edx
  90. mov %edx,%ebx # high half of product becomes next carry
  91. stosl # [es:edi] = ax; edi += 4;
  92. dec %ecx # --a_len
  93. jnz 1b # jmp if a_len != 0
  94. 2:
  95. mov %ebx,0(%edi) # *c = carry
  96. pop %ebx
  97. pop %esi
  98. pop %edi
  99. leave
  100. ret
  101. nop
  102. _s_mpv_mul_d_sse2:
  103. push %ebp
  104. mov %esp,%ebp
  105. push %edi
  106. push %esi
  107. psubq %mm2,%mm2 # carry = 0
  108. mov 12(%ebp),%ecx # ecx = a_len
  109. movd 16(%ebp),%mm1 # mm1 = b
  110. mov 20(%ebp),%edi
  111. cmp $0,%ecx
  112. je 6f # jmp if a_len == 0
  113. mov 8(%ebp),%esi # esi = a
  114. cld
  115. 5:
  116. movd 0(%esi),%mm0 # mm0 = *a++
  117. add $4,%esi
  118. pmuludq %mm1,%mm0 # mm0 = b * *a++
  119. paddq %mm0,%mm2 # add the carry
  120. movd %mm2,0(%edi) # store the 32bit result
  121. add $4,%edi
  122. psrlq $32, %mm2 # save the carry
  123. dec %ecx # --a_len
  124. jnz 5b # jmp if a_len != 0
  125. 6:
  126. movd %mm2,0(%edi) # *c = carry
  127. emms
  128. pop %esi
  129. pop %edi
  130. leave
  131. ret
  132. nop
  133. # ebp - 36: caller's esi
  134. # ebp - 32: caller's edi
  135. # ebp - 28:
  136. # ebp - 24:
  137. # ebp - 20:
  138. # ebp - 16:
  139. # ebp - 12:
  140. # ebp - 8:
  141. # ebp - 4:
  142. # ebp + 0: caller's ebp
  143. # ebp + 4: return address
  144. # ebp + 8: a argument
  145. # ebp + 12: a_len argument
  146. # ebp + 16: b argument
  147. # ebp + 20: c argument
  148. # registers:
  149. # eax:
  150. # ebx: carry
  151. # ecx: a_len
  152. # edx:
  153. # esi: a ptr
  154. # edi: c ptr
  155. .globl _s_mpv_mul_d_add
  156. .type _s_mpv_mul_d_add,@function
  157. _s_mpv_mul_d_add:
  158. GET is_sse,%eax
  159. cmp $0,%eax
  160. je _s_mpv_mul_d_add_x86
  161. jg _s_mpv_mul_d_add_sse2
  162. call _s_mpi_is_sse2
  163. PUT %eax,is_sse
  164. cmp $0,%eax
  165. jg _s_mpv_mul_d_add_sse2
  166. _s_mpv_mul_d_add_x86:
  167. push %ebp
  168. mov %esp,%ebp
  169. sub $28,%esp
  170. push %edi
  171. push %esi
  172. push %ebx
  173. movl $0,%ebx # carry = 0
  174. mov 12(%ebp),%ecx # ecx = a_len
  175. mov 20(%ebp),%edi
  176. cmp $0,%ecx
  177. je 11f # jmp if a_len == 0
  178. mov 8(%ebp),%esi # esi = a
  179. cld
  180. 10:
  181. lodsl # eax = [ds:esi]; esi += 4
  182. mov 16(%ebp),%edx # edx = b
  183. mull %edx # edx:eax = Phi:Plo = a_i * b
  184. add %ebx,%eax # add carry (%ebx) to edx:eax
  185. adc $0,%edx
  186. mov 0(%edi),%ebx # add in current word from *c
  187. add %ebx,%eax
  188. adc $0,%edx
  189. mov %edx,%ebx # high half of product becomes next carry
  190. stosl # [es:edi] = ax; edi += 4;
  191. dec %ecx # --a_len
  192. jnz 10b # jmp if a_len != 0
  193. 11:
  194. mov %ebx,0(%edi) # *c = carry
  195. pop %ebx
  196. pop %esi
  197. pop %edi
  198. leave
  199. ret
  200. nop
  201. _s_mpv_mul_d_add_sse2:
  202. push %ebp
  203. mov %esp,%ebp
  204. push %edi
  205. push %esi
  206. psubq %mm2,%mm2 # carry = 0
  207. mov 12(%ebp),%ecx # ecx = a_len
  208. movd 16(%ebp),%mm1 # mm1 = b
  209. mov 20(%ebp),%edi
  210. cmp $0,%ecx
  211. je 16f # jmp if a_len == 0
  212. mov 8(%ebp),%esi # esi = a
  213. cld
  214. 15:
  215. movd 0(%esi),%mm0 # mm0 = *a++
  216. add $4,%esi
  217. pmuludq %mm1,%mm0 # mm0 = b * *a++
  218. paddq %mm0,%mm2 # add the carry
  219. movd 0(%edi),%mm0
  220. paddq %mm0,%mm2 # add the carry
  221. movd %mm2,0(%edi) # store the 32bit result
  222. add $4,%edi
  223. psrlq $32, %mm2 # save the carry
  224. dec %ecx # --a_len
  225. jnz 15b # jmp if a_len != 0
  226. 16:
  227. movd %mm2,0(%edi) # *c = carry
  228. emms
  229. pop %esi
  230. pop %edi
  231. leave
  232. ret
  233. nop
  234. # ebp - 8: caller's esi
  235. # ebp - 4: caller's edi
  236. # ebp + 0: caller's ebp
  237. # ebp + 4: return address
  238. # ebp + 8: a argument
  239. # ebp + 12: a_len argument
  240. # ebp + 16: b argument
  241. # ebp + 20: c argument
  242. # registers:
  243. # eax:
  244. # ebx: carry
  245. # ecx: a_len
  246. # edx:
  247. # esi: a ptr
  248. # edi: c ptr
  249. .globl _s_mpv_mul_d_add_prop
  250. .type _s_mpv_mul_d_add_prop,@function
  251. _s_mpv_mul_d_add_prop:
  252. GET is_sse,%eax
  253. cmp $0,%eax
  254. je _s_mpv_mul_d_add_prop_x86
  255. jg _s_mpv_mul_d_add_prop_sse2
  256. call _s_mpi_is_sse2
  257. PUT %eax,is_sse
  258. cmp $0,%eax
  259. jg _s_mpv_mul_d_add_prop_sse2
  260. _s_mpv_mul_d_add_prop_x86:
  261. push %ebp
  262. mov %esp,%ebp
  263. sub $28,%esp
  264. push %edi
  265. push %esi
  266. push %ebx
  267. movl $0,%ebx # carry = 0
  268. mov 12(%ebp),%ecx # ecx = a_len
  269. mov 20(%ebp),%edi
  270. cmp $0,%ecx
  271. je 21f # jmp if a_len == 0
  272. cld
  273. mov 8(%ebp),%esi # esi = a
  274. 20:
  275. lodsl # eax = [ds:esi]; esi += 4
  276. mov 16(%ebp),%edx # edx = b
  277. mull %edx # edx:eax = Phi:Plo = a_i * b
  278. add %ebx,%eax # add carry (%ebx) to edx:eax
  279. adc $0,%edx
  280. mov 0(%edi),%ebx # add in current word from *c
  281. add %ebx,%eax
  282. adc $0,%edx
  283. mov %edx,%ebx # high half of product becomes next carry
  284. stosl # [es:edi] = ax; edi += 4;
  285. dec %ecx # --a_len
  286. jnz 20b # jmp if a_len != 0
  287. 21:
  288. cmp $0,%ebx # is carry zero?
  289. jz 23f
  290. mov 0(%edi),%eax # add in current word from *c
  291. add %ebx,%eax
  292. stosl # [es:edi] = ax; edi += 4;
  293. jnc 23f
  294. 22:
  295. mov 0(%edi),%eax # add in current word from *c
  296. adc $0,%eax
  297. stosl # [es:edi] = ax; edi += 4;
  298. jc 22b
  299. 23:
  300. pop %ebx
  301. pop %esi
  302. pop %edi
  303. leave
  304. ret
  305. nop
  306. _s_mpv_mul_d_add_prop_sse2:
  307. push %ebp
  308. mov %esp,%ebp
  309. push %edi
  310. push %esi
  311. push %ebx
  312. psubq %mm2,%mm2 # carry = 0
  313. mov 12(%ebp),%ecx # ecx = a_len
  314. movd 16(%ebp),%mm1 # mm1 = b
  315. mov 20(%ebp),%edi
  316. cmp $0,%ecx
  317. je 26f # jmp if a_len == 0
  318. mov 8(%ebp),%esi # esi = a
  319. cld
  320. 25:
  321. movd 0(%esi),%mm0 # mm0 = *a++
  322. movd 0(%edi),%mm3 # fetch the sum
  323. add $4,%esi
  324. pmuludq %mm1,%mm0 # mm0 = b * *a++
  325. paddq %mm0,%mm2 # add the carry
  326. paddq %mm3,%mm2 # add *c++
  327. movd %mm2,0(%edi) # store the 32bit result
  328. add $4,%edi
  329. psrlq $32, %mm2 # save the carry
  330. dec %ecx # --a_len
  331. jnz 25b # jmp if a_len != 0
  332. 26:
  333. movd %mm2,%ebx
  334. cmp $0,%ebx # is carry zero?
  335. jz 28f
  336. mov 0(%edi),%eax
  337. add %ebx, %eax
  338. stosl
  339. jnc 28f
  340. 27:
  341. mov 0(%edi),%eax # add in current word from *c
  342. adc $0,%eax
  343. stosl # [es:edi] = ax; edi += 4;
  344. jc 27b
  345. 28:
  346. emms
  347. pop %ebx
  348. pop %esi
  349. pop %edi
  350. leave
  351. ret
  352. nop
  353. # ebp - 20: caller's esi
  354. # ebp - 16: caller's edi
  355. # ebp - 12:
  356. # ebp - 8: carry
  357. # ebp - 4: a_len local
  358. # ebp + 0: caller's ebp
  359. # ebp + 4: return address
  360. # ebp + 8: pa argument
  361. # ebp + 12: a_len argument
  362. # ebp + 16: ps argument
  363. # ebp + 20:
  364. # registers:
  365. # eax:
  366. # ebx: carry
  367. # ecx: a_len
  368. # edx:
  369. # esi: a ptr
  370. # edi: c ptr
  371. .globl _s_mpv_sqr_add_prop
  372. .type _s_mpv_sqr_add_prop,@function
  373. _s_mpv_sqr_add_prop:
  374. GET is_sse,%eax
  375. cmp $0,%eax
  376. je _s_mpv_sqr_add_prop_x86
  377. jg _s_mpv_sqr_add_prop_sse2
  378. call _s_mpi_is_sse2
  379. PUT %eax,is_sse
  380. cmp $0,%eax
  381. jg _s_mpv_sqr_add_prop_sse2
  382. _s_mpv_sqr_add_prop_x86:
  383. push %ebp
  384. mov %esp,%ebp
  385. sub $12,%esp
  386. push %edi
  387. push %esi
  388. push %ebx
  389. movl $0,%ebx # carry = 0
  390. mov 12(%ebp),%ecx # a_len
  391. mov 16(%ebp),%edi # edi = ps
  392. cmp $0,%ecx
  393. je 31f # jump if a_len == 0
  394. cld
  395. mov 8(%ebp),%esi # esi = pa
  396. 30:
  397. lodsl # %eax = [ds:si]; si += 4;
  398. mull %eax
  399. add %ebx,%eax # add "carry"
  400. adc $0,%edx
  401. mov 0(%edi),%ebx
  402. add %ebx,%eax # add low word from result
  403. mov 4(%edi),%ebx
  404. stosl # [es:di] = %eax; di += 4;
  405. adc %ebx,%edx # add high word from result
  406. movl $0,%ebx
  407. mov %edx,%eax
  408. adc $0,%ebx
  409. stosl # [es:di] = %eax; di += 4;
  410. dec %ecx # --a_len
  411. jnz 30b # jmp if a_len != 0
  412. 31:
  413. cmp $0,%ebx # is carry zero?
  414. jz 34f
  415. mov 0(%edi),%eax # add in current word from *c
  416. add %ebx,%eax
  417. stosl # [es:edi] = ax; edi += 4;
  418. jnc 34f
  419. 32:
  420. mov 0(%edi),%eax # add in current word from *c
  421. adc $0,%eax
  422. stosl # [es:edi] = ax; edi += 4;
  423. jc 32b
  424. 34:
  425. pop %ebx
  426. pop %esi
  427. pop %edi
  428. leave
  429. ret
  430. nop
  431. _s_mpv_sqr_add_prop_sse2:
  432. push %ebp
  433. mov %esp,%ebp
  434. push %edi
  435. push %esi
  436. push %ebx
  437. psubq %mm2,%mm2 # carry = 0
  438. mov 12(%ebp),%ecx # ecx = a_len
  439. mov 16(%ebp),%edi
  440. cmp $0,%ecx
  441. je 36f # jmp if a_len == 0
  442. mov 8(%ebp),%esi # esi = a
  443. cld
  444. 35:
  445. movd 0(%esi),%mm0 # mm0 = *a
  446. movd 0(%edi),%mm3 # fetch the sum
  447. add $4,%esi
  448. pmuludq %mm0,%mm0 # mm0 = sqr(a)
  449. paddq %mm0,%mm2 # add the carry
  450. paddq %mm3,%mm2 # add the low word
  451. movd 4(%edi),%mm3
  452. movd %mm2,0(%edi) # store the 32bit result
  453. psrlq $32, %mm2
  454. paddq %mm3,%mm2 # add the high word
  455. movd %mm2,4(%edi) # store the 32bit result
  456. psrlq $32, %mm2 # save the carry.
  457. add $8,%edi
  458. dec %ecx # --a_len
  459. jnz 35b # jmp if a_len != 0
  460. 36:
  461. movd %mm2,%ebx
  462. cmp $0,%ebx # is carry zero?
  463. jz 38f
  464. mov 0(%edi),%eax
  465. add %ebx, %eax
  466. stosl
  467. jnc 38f
  468. 37:
  469. mov 0(%edi),%eax # add in current word from *c
  470. adc $0,%eax
  471. stosl # [es:edi] = ax; edi += 4;
  472. jc 37b
  473. 38:
  474. emms
  475. pop %ebx
  476. pop %esi
  477. pop %edi
  478. leave
  479. ret
  480. nop
  481. #
  482. # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
  483. # so its high bit is 1. This code is from NSPR.
  484. #
  485. # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
  486. # mp_digit *qp, mp_digit *rp)
  487. # esp + 0: Caller's ebx
  488. # esp + 4: return address
  489. # esp + 8: Nhi argument
  490. # esp + 12: Nlo argument
  491. # esp + 16: divisor argument
  492. # esp + 20: qp argument
  493. # esp + 24: rp argument
  494. # registers:
  495. # eax:
  496. # ebx: carry
  497. # ecx: a_len
  498. # edx:
  499. # esi: a ptr
  500. # edi: c ptr
  501. #
  502. .globl _s_mpv_div_2dx1d
  503. .type _s_mpv_div_2dx1d,@function
  504. _s_mpv_div_2dx1d:
  505. push %ebx
  506. mov 8(%esp),%edx
  507. mov 12(%esp),%eax
  508. mov 16(%esp),%ebx
  509. div %ebx
  510. mov 20(%esp),%ebx
  511. mov %eax,0(%ebx)
  512. mov 24(%esp),%ebx
  513. mov %edx,0(%ebx)
  514. xor %eax,%eax # return zero
  515. pop %ebx
  516. ret
  517. nop