/security/nss/lib/freebl/mpi/mpi_x86_asm.c

http://github.com/zpao/v8monkey · C · 568 lines · 402 code · 14 blank · 152 comment · 32 complexity · 2e0c9172c15d1aa207e1fa77c0aa6003 MD5 · raw file

  1. /*
  2. * mpi_x86.c - MSVC inline assembly implementation of s_mpv_ functions.
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is the Netscape security libraries.
  18. *
  19. * The Initial Developer of the Original Code is
  20. * Netscape Communications Corporation.
  21. * Portions created by the Initial Developer are Copyright (C) 2000
  22. * the Initial Developer. All Rights Reserved.
  23. *
  24. * Contributor(s):
  25. * Benjamin Smedberg <benjamin@smedbergs.us>
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either the GNU General Public License Version 2 or later (the "GPL"), or
  29. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. #include "mpi-priv.h"
  41. static int is_sse = -1;
  42. extern unsigned long s_mpi_is_sse2();
  43. /*
  44. * ebp - 36: caller's esi
  45. * ebp - 32: caller's edi
  46. * ebp - 28:
  47. * ebp - 24:
  48. * ebp - 20:
  49. * ebp - 16:
  50. * ebp - 12:
  51. * ebp - 8:
  52. * ebp - 4:
  53. * ebp + 0: caller's ebp
  54. * ebp + 4: return address
  55. * ebp + 8: a argument
  56. * ebp + 12: a_len argument
  57. * ebp + 16: b argument
  58. * ebp + 20: c argument
  59. * registers:
  60. * eax:
  61. * ebx: carry
  62. * ecx: a_len
  63. * edx:
  64. * esi: a ptr
  65. * edi: c ptr
  66. */
  67. __declspec(naked) void
  68. s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  69. {
  70. __asm {
  71. mov eax, is_sse
  72. cmp eax, 0
  73. je s_mpv_mul_d_x86
  74. jg s_mpv_mul_d_sse2
  75. call s_mpi_is_sse2
  76. mov is_sse, eax
  77. cmp eax, 0
  78. jg s_mpv_mul_d_sse2
  79. s_mpv_mul_d_x86:
  80. push ebp
  81. mov ebp,esp
  82. sub esp,28
  83. push edi
  84. push esi
  85. push ebx
  86. mov ebx,0 ; carry = 0
  87. mov ecx,[ebp+12] ; ecx = a_len
  88. mov edi,[ebp+20]
  89. cmp ecx,0
  90. je L_2 ; jmp if a_len == 0
  91. mov esi,[ebp+8] ; esi = a
  92. cld
  93. L_1:
  94. lodsd ; eax = [ds:esi]; esi += 4
  95. mov edx,[ebp+16] ; edx = b
  96. mul edx ; edx:eax = Phi:Plo = a_i * b
  97. add eax,ebx ; add carry (ebx) to edx:eax
  98. adc edx,0
  99. mov ebx,edx ; high half of product becomes next carry
  100. stosd ; [es:edi] = ax; edi += 4;
  101. dec ecx ; --a_len
  102. jnz L_1 ; jmp if a_len != 0
  103. L_2:
  104. mov [edi],ebx ; *c = carry
  105. pop ebx
  106. pop esi
  107. pop edi
  108. leave
  109. ret
  110. nop
  111. s_mpv_mul_d_sse2:
  112. push ebp
  113. mov ebp, esp
  114. push edi
  115. push esi
  116. psubq mm2, mm2 ; carry = 0
  117. mov ecx, [ebp+12] ; ecx = a_len
  118. movd mm1, [ebp+16] ; mm1 = b
  119. mov edi, [ebp+20]
  120. cmp ecx, 0
  121. je L_6 ; jmp if a_len == 0
  122. mov esi, [ebp+8] ; esi = a
  123. cld
  124. L_5:
  125. movd mm0, [esi] ; mm0 = *a++
  126. add esi, 4
  127. pmuludq mm0, mm1 ; mm0 = b * *a++
  128. paddq mm2, mm0 ; add the carry
  129. movd [edi], mm2 ; store the 32bit result
  130. add edi, 4
  131. psrlq mm2, 32 ; save the carry
  132. dec ecx ; --a_len
  133. jnz L_5 ; jmp if a_len != 0
  134. L_6:
  135. movd [edi], mm2 ; *c = carry
  136. emms
  137. pop esi
  138. pop edi
  139. leave
  140. ret
  141. nop
  142. }
  143. }
  144. /*
  145. * ebp - 36: caller's esi
  146. * ebp - 32: caller's edi
  147. * ebp - 28:
  148. * ebp - 24:
  149. * ebp - 20:
  150. * ebp - 16:
  151. * ebp - 12:
  152. * ebp - 8:
  153. * ebp - 4:
  154. * ebp + 0: caller's ebp
  155. * ebp + 4: return address
  156. * ebp + 8: a argument
  157. * ebp + 12: a_len argument
  158. * ebp + 16: b argument
  159. * ebp + 20: c argument
  160. * registers:
  161. * eax:
  162. * ebx: carry
  163. * ecx: a_len
  164. * edx:
  165. * esi: a ptr
  166. * edi: c ptr
  167. */
  168. __declspec(naked) void
  169. s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  170. {
  171. __asm {
  172. mov eax, is_sse
  173. cmp eax, 0
  174. je s_mpv_mul_d_add_x86
  175. jg s_mpv_mul_d_add_sse2
  176. call s_mpi_is_sse2
  177. mov is_sse, eax
  178. cmp eax, 0
  179. jg s_mpv_mul_d_add_sse2
  180. s_mpv_mul_d_add_x86:
  181. push ebp
  182. mov ebp,esp
  183. sub esp,28
  184. push edi
  185. push esi
  186. push ebx
  187. mov ebx,0 ; carry = 0
  188. mov ecx,[ebp+12] ; ecx = a_len
  189. mov edi,[ebp+20]
  190. cmp ecx,0
  191. je L_11 ; jmp if a_len == 0
  192. mov esi,[ebp+8] ; esi = a
  193. cld
  194. L_10:
  195. lodsd ; eax = [ds:esi]; esi += 4
  196. mov edx,[ebp+16] ; edx = b
  197. mul edx ; edx:eax = Phi:Plo = a_i * b
  198. add eax,ebx ; add carry (ebx) to edx:eax
  199. adc edx,0
  200. mov ebx,[edi] ; add in current word from *c
  201. add eax,ebx
  202. adc edx,0
  203. mov ebx,edx ; high half of product becomes next carry
  204. stosd ; [es:edi] = ax; edi += 4;
  205. dec ecx ; --a_len
  206. jnz L_10 ; jmp if a_len != 0
  207. L_11:
  208. mov [edi],ebx ; *c = carry
  209. pop ebx
  210. pop esi
  211. pop edi
  212. leave
  213. ret
  214. nop
  215. s_mpv_mul_d_add_sse2:
  216. push ebp
  217. mov ebp, esp
  218. push edi
  219. push esi
  220. psubq mm2, mm2 ; carry = 0
  221. mov ecx, [ebp+12] ; ecx = a_len
  222. movd mm1, [ebp+16] ; mm1 = b
  223. mov edi, [ebp+20]
  224. cmp ecx, 0
  225. je L_16 ; jmp if a_len == 0
  226. mov esi, [ebp+8] ; esi = a
  227. cld
  228. L_15:
  229. movd mm0, [esi] ; mm0 = *a++
  230. add esi, 4
  231. pmuludq mm0, mm1 ; mm0 = b * *a++
  232. paddq mm2, mm0 ; add the carry
  233. movd mm0, [edi]
  234. paddq mm2, mm0 ; add the carry
  235. movd [edi], mm2 ; store the 32bit result
  236. add edi, 4
  237. psrlq mm2, 32 ; save the carry
  238. dec ecx ; --a_len
  239. jnz L_15 ; jmp if a_len != 0
  240. L_16:
  241. movd [edi], mm2 ; *c = carry
  242. emms
  243. pop esi
  244. pop edi
  245. leave
  246. ret
  247. nop
  248. }
  249. }
  250. /*
  251. * ebp - 36: caller's esi
  252. * ebp - 32: caller's edi
  253. * ebp - 28:
  254. * ebp - 24:
  255. * ebp - 20:
  256. * ebp - 16:
  257. * ebp - 12:
  258. * ebp - 8:
  259. * ebp - 4:
  260. * ebp + 0: caller's ebp
  261. * ebp + 4: return address
  262. * ebp + 8: a argument
  263. * ebp + 12: a_len argument
  264. * ebp + 16: b argument
  265. * ebp + 20: c argument
  266. * registers:
  267. * eax:
  268. * ebx: carry
  269. * ecx: a_len
  270. * edx:
  271. * esi: a ptr
  272. * edi: c ptr
  273. */
  274. __declspec(naked) void
  275. s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  276. {
  277. __asm {
  278. mov eax, is_sse
  279. cmp eax, 0
  280. je s_mpv_mul_d_add_prop_x86
  281. jg s_mpv_mul_d_add_prop_sse2
  282. call s_mpi_is_sse2
  283. mov is_sse, eax
  284. cmp eax, 0
  285. jg s_mpv_mul_d_add_prop_sse2
  286. s_mpv_mul_d_add_prop_x86:
  287. push ebp
  288. mov ebp,esp
  289. sub esp,28
  290. push edi
  291. push esi
  292. push ebx
  293. mov ebx,0 ; carry = 0
  294. mov ecx,[ebp+12] ; ecx = a_len
  295. mov edi,[ebp+20]
  296. cmp ecx,0
  297. je L_21 ; jmp if a_len == 0
  298. cld
  299. mov esi,[ebp+8] ; esi = a
  300. L_20:
  301. lodsd ; eax = [ds:esi]; esi += 4
  302. mov edx,[ebp+16] ; edx = b
  303. mul edx ; edx:eax = Phi:Plo = a_i * b
  304. add eax,ebx ; add carry (ebx) to edx:eax
  305. adc edx,0
  306. mov ebx,[edi] ; add in current word from *c
  307. add eax,ebx
  308. adc edx,0
  309. mov ebx,edx ; high half of product becomes next carry
  310. stosd ; [es:edi] = ax; edi += 4;
  311. dec ecx ; --a_len
  312. jnz L_20 ; jmp if a_len != 0
  313. L_21:
  314. cmp ebx,0 ; is carry zero?
  315. jz L_23
  316. mov eax,[edi] ; add in current word from *c
  317. add eax,ebx
  318. stosd ; [es:edi] = ax; edi += 4;
  319. jnc L_23
  320. L_22:
  321. mov eax,[edi] ; add in current word from *c
  322. adc eax,0
  323. stosd ; [es:edi] = ax; edi += 4;
  324. jc L_22
  325. L_23:
  326. pop ebx
  327. pop esi
  328. pop edi
  329. leave
  330. ret
  331. nop
  332. s_mpv_mul_d_add_prop_sse2:
  333. push ebp
  334. mov ebp, esp
  335. push edi
  336. push esi
  337. push ebx
  338. psubq mm2, mm2 ; carry = 0
  339. mov ecx, [ebp+12] ; ecx = a_len
  340. movd mm1, [ebp+16] ; mm1 = b
  341. mov edi, [ebp+20]
  342. cmp ecx, 0
  343. je L_26 ; jmp if a_len == 0
  344. mov esi, [ebp+8] ; esi = a
  345. cld
  346. L_25:
  347. movd mm0, [esi] ; mm0 = *a++
  348. movd mm3, [edi] ; fetch the sum
  349. add esi, 4
  350. pmuludq mm0, mm1 ; mm0 = b * *a++
  351. paddq mm2, mm0 ; add the carry
  352. paddq mm2, mm3 ; add *c++
  353. movd [edi], mm2 ; store the 32bit result
  354. add edi, 4
  355. psrlq mm2, 32 ; save the carry
  356. dec ecx ; --a_len
  357. jnz L_25 ; jmp if a_len != 0
  358. L_26:
  359. movd ebx, mm2
  360. cmp ebx, 0 ; is carry zero?
  361. jz L_28
  362. mov eax, [edi]
  363. add eax, ebx
  364. stosd
  365. jnc L_28
  366. L_27:
  367. mov eax, [edi] ; add in current word from *c
  368. adc eax, 0
  369. stosd ; [es:edi] = ax; edi += 4;
  370. jc L_27
  371. L_28:
  372. emms
  373. pop ebx
  374. pop esi
  375. pop edi
  376. leave
  377. ret
  378. nop
  379. }
  380. }
  381. /*
  382. * ebp - 20: caller's esi
  383. * ebp - 16: caller's edi
  384. * ebp - 12:
  385. * ebp - 8: carry
  386. * ebp - 4: a_len local
  387. * ebp + 0: caller's ebp
  388. * ebp + 4: return address
  389. * ebp + 8: pa argument
  390. * ebp + 12: a_len argument
  391. * ebp + 16: ps argument
  392. * ebp + 20:
  393. * registers:
  394. * eax:
  395. * ebx: carry
  396. * ecx: a_len
  397. * edx:
  398. * esi: a ptr
  399. * edi: c ptr
  400. */
  401. __declspec(naked) void
  402. s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
  403. {
  404. __asm {
  405. mov eax, is_sse
  406. cmp eax, 0
  407. je s_mpv_sqr_add_prop_x86
  408. jg s_mpv_sqr_add_prop_sse2
  409. call s_mpi_is_sse2
  410. mov is_sse, eax
  411. cmp eax, 0
  412. jg s_mpv_sqr_add_prop_sse2
  413. s_mpv_sqr_add_prop_x86:
  414. push ebp
  415. mov ebp,esp
  416. sub esp,12
  417. push edi
  418. push esi
  419. push ebx
  420. mov ebx,0 ; carry = 0
  421. mov ecx,[ebp+12] ; a_len
  422. mov edi,[ebp+16] ; edi = ps
  423. cmp ecx,0
  424. je L_31 ; jump if a_len == 0
  425. cld
  426. mov esi,[ebp+8] ; esi = pa
  427. L_30:
  428. lodsd ; eax = [ds:si]; si += 4;
  429. mul eax
  430. add eax,ebx ; add "carry"
  431. adc edx,0
  432. mov ebx,[edi]
  433. add eax,ebx ; add low word from result
  434. mov ebx,[edi+4]
  435. stosd ; [es:di] = eax; di += 4;
  436. adc edx,ebx ; add high word from result
  437. mov ebx,0
  438. mov eax,edx
  439. adc ebx,0
  440. stosd ; [es:di] = eax; di += 4;
  441. dec ecx ; --a_len
  442. jnz L_30 ; jmp if a_len != 0
  443. L_31:
  444. cmp ebx,0 ; is carry zero?
  445. jz L_34
  446. mov eax,[edi] ; add in current word from *c
  447. add eax,ebx
  448. stosd ; [es:edi] = ax; edi += 4;
  449. jnc L_34
  450. L_32:
  451. mov eax,[edi] ; add in current word from *c
  452. adc eax,0
  453. stosd ; [es:edi] = ax; edi += 4;
  454. jc L_32
  455. L_34:
  456. pop ebx
  457. pop esi
  458. pop edi
  459. leave
  460. ret
  461. nop
  462. s_mpv_sqr_add_prop_sse2:
  463. push ebp
  464. mov ebp, esp
  465. push edi
  466. push esi
  467. push ebx
  468. psubq mm2, mm2 ; carry = 0
  469. mov ecx, [ebp+12] ; ecx = a_len
  470. mov edi, [ebp+16]
  471. cmp ecx, 0
  472. je L_36 ; jmp if a_len == 0
  473. mov esi, [ebp+8] ; esi = a
  474. cld
  475. L_35:
  476. movd mm0, [esi] ; mm0 = *a
  477. movd mm3, [edi] ; fetch the sum
  478. add esi, 4
  479. pmuludq mm0, mm0 ; mm0 = sqr(a)
  480. paddq mm2, mm0 ; add the carry
  481. paddq mm2, mm3 ; add the low word
  482. movd mm3, [edi+4]
  483. movd [edi], mm2 ; store the 32bit result
  484. psrlq mm2, 32
  485. paddq mm2, mm3 ; add the high word
  486. movd [edi+4], mm2 ; store the 32bit result
  487. psrlq mm2, 32 ; save the carry.
  488. add edi, 8
  489. dec ecx ; --a_len
  490. jnz L_35 ; jmp if a_len != 0
  491. L_36:
  492. movd ebx, mm2
  493. cmp ebx, 0 ; is carry zero?
  494. jz L_38
  495. mov eax, [edi]
  496. add eax, ebx
  497. stosd
  498. jnc L_38
  499. L_37:
  500. mov eax, [edi] ; add in current word from *c
  501. adc eax, 0
  502. stosd ; [es:edi] = ax; edi += 4;
  503. jc L_37
  504. L_38:
  505. emms
  506. pop ebx
  507. pop esi
  508. pop edi
  509. leave
  510. ret
  511. nop
  512. }
  513. }
  514. /*
  515. * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
  516. * so its high bit is 1. This code is from NSPR.
  517. *
  518. * Dump of assembler code for function s_mpv_div_2dx1d:
  519. *
  520. * esp + 0: Caller's ebx
  521. * esp + 4: return address
  522. * esp + 8: Nhi argument
  523. * esp + 12: Nlo argument
  524. * esp + 16: divisor argument
  525. * esp + 20: qp argument
  526. * esp + 24: rp argument
  527. * registers:
  528. * eax:
  529. * ebx: carry
  530. * ecx: a_len
  531. * edx:
  532. * esi: a ptr
  533. * edi: c ptr
  534. */
  535. __declspec(naked) mp_err
  536. s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
  537. mp_digit *qp, mp_digit *rp)
  538. {
  539. __asm {
  540. push ebx
  541. mov edx,[esp+8]
  542. mov eax,[esp+12]
  543. mov ebx,[esp+16]
  544. div ebx
  545. mov ebx,[esp+20]
  546. mov [ebx],eax
  547. mov ebx,[esp+24]
  548. mov [ebx],edx
  549. xor eax,eax ; return zero
  550. pop ebx
  551. ret
  552. nop
  553. }
  554. }