PageRenderTime 48ms CodeModel.GetById 18ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 0ms

/security/nss/lib/freebl/mpi/mpi_x86_asm.c

http://github.com/zpao/v8monkey
C | 568 lines | 402 code | 14 blank | 152 comment | 32 complexity | 2e0c9172c15d1aa207e1fa77c0aa6003 MD5 | raw file
  1/*
  2 *  mpi_x86.c - MSVC inline assembly implementation of s_mpv_ functions.
  3 * 
  4 * ***** BEGIN LICENSE BLOCK *****
  5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6 *
  7 * The contents of this file are subject to the Mozilla Public License Version
  8 * 1.1 (the "License"); you may not use this file except in compliance with
  9 * the License. You may obtain a copy of the License at
 10 * http://www.mozilla.org/MPL/
 11 *
 12 * Software distributed under the License is distributed on an "AS IS" basis,
 13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 14 * for the specific language governing rights and limitations under the
 15 * License.
 16 *
 17 * The Original Code is the Netscape security libraries.
 18 *
 19 * The Initial Developer of the Original Code is
 20 * Netscape Communications Corporation.
 21 * Portions created by the Initial Developer are Copyright (C) 2000
 22 * the Initial Developer. All Rights Reserved.
 23 *
 24 * Contributor(s):
 25 *   Benjamin Smedberg <benjamin@smedbergs.us>
 26 *
 27 * Alternatively, the contents of this file may be used under the terms of
 28 * either the GNU General Public License Version 2 or later (the "GPL"), or
 29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 30 * in which case the provisions of the GPL or the LGPL are applicable instead
 31 * of those above. If you wish to allow use of your version of this file only
 32 * under the terms of either the GPL or the LGPL, and not to allow others to
 33 * use your version of this file under the terms of the MPL, indicate your
 34 * decision by deleting the provisions above and replace them with the notice
 35 * and other provisions required by the GPL or the LGPL. If you do not delete
 36 * the provisions above, a recipient may use your version of this file under
 37 * the terms of any one of the MPL, the GPL or the LGPL.
 38 *
 39 * ***** END LICENSE BLOCK ***** */
 40
 41#include "mpi-priv.h"
 42
 43static int is_sse = -1;
 44extern unsigned long s_mpi_is_sse2();
 45
 46/*
 47 *   ebp - 36:	caller's esi
 48 *   ebp - 32:	caller's edi
 49 *   ebp - 28:	
 50 *   ebp - 24:	
 51 *   ebp - 20:	
 52 *   ebp - 16:	
 53 *   ebp - 12:	
 54 *   ebp - 8:	
 55 *   ebp - 4:	
 56 *   ebp + 0:	caller's ebp
 57 *   ebp + 4:	return address
 58 *   ebp + 8:	a	argument
 59 *   ebp + 12:	a_len	argument
 60 *   ebp + 16:	b	argument
 61 *   ebp + 20:	c	argument
 62 *   registers:
 63 *  	eax:
 64 * 	ebx:	carry
 65 * 	ecx:	a_len
 66 * 	edx:
 67 * 	esi:	a ptr
 68 * 	edi:	c ptr
 69 */
 70__declspec(naked) void
 71s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 72{
 73  __asm {
 74    mov    eax, is_sse
 75    cmp    eax, 0
 76    je     s_mpv_mul_d_x86
 77    jg     s_mpv_mul_d_sse2
 78    call   s_mpi_is_sse2
 79    mov    is_sse, eax
 80    cmp    eax, 0
 81    jg     s_mpv_mul_d_sse2
 82s_mpv_mul_d_x86:
 83    push   ebp
 84    mov    ebp,esp
 85    sub    esp,28
 86    push   edi
 87    push   esi
 88    push   ebx
 89    mov    ebx,0		; carry = 0
 90    mov    ecx,[ebp+12]		; ecx = a_len
 91    mov    edi,[ebp+20]
 92    cmp    ecx,0
 93    je     L_2			; jmp if a_len == 0
 94    mov    esi,[ebp+8]		; esi = a
 95    cld
 96L_1:
 97    lodsd			; eax = [ds:esi]; esi += 4
 98    mov    edx,[ebp+16]		; edx = b
 99    mul    edx			; edx:eax = Phi:Plo = a_i * b
100
101    add    eax,ebx		; add carry (ebx) to edx:eax
102    adc    edx,0
103    mov    ebx,edx		; high half of product becomes next carry
104
105    stosd			; [es:edi] = ax; edi += 4;
106    dec    ecx			; --a_len
107    jnz    L_1			; jmp if a_len != 0
108L_2:
109    mov    [edi],ebx		; *c = carry
110    pop    ebx
111    pop    esi
112    pop    edi
113    leave  
114    ret    
115    nop
116s_mpv_mul_d_sse2:
117    push   ebp
118    mov    ebp, esp
119    push   edi
120    push   esi
121    psubq  mm2, mm2		; carry = 0
122    mov    ecx, [ebp+12]	; ecx = a_len
123    movd   mm1, [ebp+16]	; mm1 = b
124    mov    edi, [ebp+20]
125    cmp    ecx, 0
126    je     L_6			; jmp if a_len == 0
127    mov    esi, [ebp+8]		; esi = a
128    cld
129L_5:
130    movd   mm0, [esi]		; mm0 = *a++
131    add    esi, 4
132    pmuludq mm0, mm1		; mm0 = b * *a++
133    paddq  mm2, mm0		; add the carry
134    movd   [edi], mm2		; store the 32bit result
135    add    edi, 4
136    psrlq  mm2, 32		; save the carry
137    dec    ecx			; --a_len
138    jnz    L_5			; jmp if a_len != 0
139L_6:
140    movd   [edi], mm2		; *c = carry
141    emms
142    pop    esi
143    pop    edi
144    leave  
145    ret    
146    nop
147  }
148}
149
150/*
151 *   ebp - 36:	caller's esi
152 *   ebp - 32:	caller's edi
153 *   ebp - 28:	
154 *   ebp - 24:	
155 *   ebp - 20:	
156 *   ebp - 16:	
157 *   ebp - 12:	
158 *   ebp - 8:	
159 *   ebp - 4:	
160 *   ebp + 0:	caller's ebp
161 *   ebp + 4:	return address
162 *   ebp + 8:	a	argument
163 *   ebp + 12:	a_len	argument
164 *   ebp + 16:	b	argument
165 *   ebp + 20:	c	argument
166 *   registers:
167 *  	eax:
168 * 	ebx:	carry
169 * 	ecx:	a_len
170 * 	edx:
171 * 	esi:	a ptr
172 * 	edi:	c ptr
173 */
174__declspec(naked) void
175s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
176{
177  __asm {
178    mov    eax, is_sse
179    cmp    eax, 0
180    je     s_mpv_mul_d_add_x86
181    jg     s_mpv_mul_d_add_sse2
182    call   s_mpi_is_sse2
183    mov    is_sse, eax
184    cmp    eax, 0
185    jg     s_mpv_mul_d_add_sse2
186s_mpv_mul_d_add_x86:
187    push   ebp
188    mov    ebp,esp
189    sub    esp,28
190    push   edi
191    push   esi
192    push   ebx
193    mov    ebx,0		; carry = 0
194    mov    ecx,[ebp+12]		; ecx = a_len
195    mov    edi,[ebp+20]
196    cmp    ecx,0
197    je     L_11			; jmp if a_len == 0
198    mov    esi,[ebp+8]		; esi = a
199    cld
200L_10:
201    lodsd			; eax = [ds:esi]; esi += 4
202    mov    edx,[ebp+16]		; edx = b
203    mul    edx			; edx:eax = Phi:Plo = a_i * b
204
205    add    eax,ebx		; add carry (ebx) to edx:eax
206    adc    edx,0
207    mov    ebx,[edi]		; add in current word from *c
208    add    eax,ebx		
209    adc    edx,0
210    mov    ebx,edx		; high half of product becomes next carry
211
212    stosd			; [es:edi] = ax; edi += 4;
213    dec    ecx			; --a_len
214    jnz    L_10			; jmp if a_len != 0
215L_11:
216    mov    [edi],ebx		; *c = carry
217    pop    ebx
218    pop    esi
219    pop    edi
220    leave  
221    ret    
222    nop
223s_mpv_mul_d_add_sse2:
224    push   ebp
225    mov    ebp, esp
226    push   edi
227    push   esi
228    psubq  mm2, mm2		; carry = 0
229    mov    ecx, [ebp+12]	; ecx = a_len
230    movd   mm1, [ebp+16]	; mm1 = b
231    mov    edi, [ebp+20]
232    cmp    ecx, 0
233    je     L_16			; jmp if a_len == 0
234    mov    esi, [ebp+8]		; esi = a
235    cld
236L_15:
237    movd   mm0, [esi]		; mm0 = *a++
238    add    esi, 4
239    pmuludq mm0, mm1		; mm0 = b * *a++
240    paddq  mm2, mm0		; add the carry
241    movd   mm0, [edi]
242    paddq  mm2, mm0		; add the carry
243    movd   [edi], mm2		; store the 32bit result
244    add    edi, 4
245    psrlq  mm2, 32		; save the carry
246    dec    ecx			; --a_len
247    jnz    L_15			; jmp if a_len != 0
248L_16:
249    movd   [edi], mm2		; *c = carry
250    emms
251    pop    esi
252    pop    edi
253    leave  
254    ret    
255    nop
256  }
257}
258
259/*
260 *   ebp - 36:	caller's esi
261 *   ebp - 32:	caller's edi
262 *   ebp - 28:	
263 *   ebp - 24:	
264 *   ebp - 20:	
265 *   ebp - 16:	
266 *   ebp - 12:	
267 *   ebp - 8:	
268 *   ebp - 4:	
269 *   ebp + 0:	caller's ebp
270 *   ebp + 4:	return address
271 *   ebp + 8:	a	argument
272 *   ebp + 12:	a_len	argument
273 *   ebp + 16:	b	argument
274 *   ebp + 20:	c	argument
275 *   registers:
276 *  	eax:
277 * 	ebx:	carry
278 * 	ecx:	a_len
279 * 	edx:
280 * 	esi:	a ptr
281 * 	edi:	c ptr
282 */
283__declspec(naked) void
284s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
285{
286  __asm {
287    mov    eax, is_sse
288    cmp    eax, 0
289    je     s_mpv_mul_d_add_prop_x86
290    jg     s_mpv_mul_d_add_prop_sse2
291    call   s_mpi_is_sse2
292    mov    is_sse, eax
293    cmp    eax, 0
294    jg     s_mpv_mul_d_add_prop_sse2
295s_mpv_mul_d_add_prop_x86:
296    push   ebp
297    mov    ebp,esp
298    sub    esp,28
299    push   edi
300    push   esi
301    push   ebx
302    mov    ebx,0		; carry = 0
303    mov    ecx,[ebp+12]		; ecx = a_len
304    mov    edi,[ebp+20]
305    cmp    ecx,0
306    je     L_21			; jmp if a_len == 0
307    cld
308    mov    esi,[ebp+8]		; esi = a
309L_20:
310    lodsd			; eax = [ds:esi]; esi += 4
311    mov    edx,[ebp+16]		; edx = b
312    mul    edx			; edx:eax = Phi:Plo = a_i * b
313
314    add    eax,ebx		; add carry (ebx) to edx:eax
315    adc    edx,0
316    mov    ebx,[edi]		; add in current word from *c
317    add    eax,ebx		
318    adc    edx,0
319    mov    ebx,edx		; high half of product becomes next carry
320
321    stosd			; [es:edi] = ax; edi += 4;
322    dec    ecx			; --a_len
323    jnz    L_20			; jmp if a_len != 0
324L_21:
325    cmp    ebx,0		; is carry zero?
326    jz     L_23
327    mov    eax,[edi]		; add in current word from *c
328    add    eax,ebx
329    stosd			; [es:edi] = ax; edi += 4;
330    jnc    L_23
331L_22:
332    mov    eax,[edi]		; add in current word from *c
333    adc    eax,0
334    stosd			; [es:edi] = ax; edi += 4;
335    jc     L_22
336L_23:
337    pop    ebx
338    pop    esi
339    pop    edi
340    leave  
341    ret    
342    nop
343s_mpv_mul_d_add_prop_sse2:
344    push   ebp
345    mov    ebp, esp
346    push   edi
347    push   esi
348    push   ebx
349    psubq  mm2, mm2		; carry = 0
350    mov    ecx, [ebp+12]	; ecx = a_len
351    movd   mm1, [ebp+16]	; mm1 = b
352    mov    edi, [ebp+20]
353    cmp    ecx, 0
354    je     L_26			; jmp if a_len == 0
355    mov    esi, [ebp+8]		; esi = a
356    cld
357L_25:
358    movd   mm0, [esi]		; mm0 = *a++
359    movd   mm3, [edi]		; fetch the sum
360    add    esi, 4
361    pmuludq mm0, mm1		; mm0 = b * *a++
362    paddq  mm2, mm0		; add the carry
363    paddq  mm2, mm3		; add *c++
364    movd   [edi], mm2		; store the 32bit result
365    add    edi, 4
366    psrlq  mm2, 32		; save the carry
367    dec    ecx			; --a_len
368    jnz    L_25			; jmp if a_len != 0
369L_26:
370    movd   ebx, mm2
371    cmp    ebx, 0		; is carry zero?
372    jz     L_28
373    mov    eax, [edi]
374    add    eax, ebx
375    stosd
376    jnc    L_28
377L_27:
378    mov    eax, [edi]		; add in current word from *c
379    adc	   eax, 0
380    stosd			; [es:edi] = ax; edi += 4;
381    jc     L_27
382L_28:
383    emms
384    pop    ebx
385    pop    esi
386    pop    edi
387    leave  
388    ret    
389    nop
390  }
391}
392
393/*
394 *   ebp - 20:	caller's esi
395 *   ebp - 16:	caller's edi
396 *   ebp - 12:	
397 *   ebp - 8:	carry
398 *   ebp - 4:	a_len	local
399 *   ebp + 0:	caller's ebp
400 *   ebp + 4:	return address
401 *   ebp + 8:	pa	argument
402 *   ebp + 12:	a_len	argument
403 *   ebp + 16:	ps	argument
404 *   ebp + 20:	
405 *   registers:
406 *  	eax:
407 * 	ebx:	carry
408 * 	ecx:	a_len
409 * 	edx:
410 * 	esi:	a ptr
411 * 	edi:	c ptr
412 */
413__declspec(naked) void
414s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
415{
416  __asm {
417     mov    eax, is_sse
418     cmp    eax, 0
419     je     s_mpv_sqr_add_prop_x86
420     jg     s_mpv_sqr_add_prop_sse2
421     call   s_mpi_is_sse2
422     mov    is_sse, eax
423     cmp    eax, 0
424     jg     s_mpv_sqr_add_prop_sse2
425s_mpv_sqr_add_prop_x86:
426     push   ebp
427     mov    ebp,esp
428     sub    esp,12
429     push   edi
430     push   esi
431     push   ebx
432     mov    ebx,0		; carry = 0
433     mov    ecx,[ebp+12]	; a_len
434     mov    edi,[ebp+16]	; edi = ps
435     cmp    ecx,0
436     je     L_31		; jump if a_len == 0
437     cld
438     mov    esi,[ebp+8]		; esi = pa
439L_30:
440     lodsd			; eax = [ds:si]; si += 4;
441     mul    eax
442
443     add    eax,ebx		; add "carry"
444     adc    edx,0
445     mov    ebx,[edi]
446     add    eax,ebx		; add low word from result
447     mov    ebx,[edi+4]
448     stosd			; [es:di] = eax; di += 4;
449     adc    edx,ebx		; add high word from result
450     mov    ebx,0
451     mov    eax,edx
452     adc    ebx,0
453     stosd			; [es:di] = eax; di += 4;
454     dec    ecx			; --a_len
455     jnz    L_30		; jmp if a_len != 0
456L_31:
457    cmp    ebx,0		; is carry zero?
458    jz     L_34
459    mov    eax,[edi]		; add in current word from *c
460    add    eax,ebx
461    stosd			; [es:edi] = ax; edi += 4;
462    jnc    L_34
463L_32:
464    mov    eax,[edi]		; add in current word from *c
465    adc    eax,0
466    stosd			; [es:edi] = ax; edi += 4;
467    jc     L_32
468L_34:
469    pop    ebx
470    pop    esi
471    pop    edi
472    leave  
473    ret    
474    nop
475s_mpv_sqr_add_prop_sse2:
476    push   ebp
477    mov    ebp, esp
478    push   edi
479    push   esi
480    push   ebx
481    psubq  mm2, mm2		; carry = 0
482    mov    ecx, [ebp+12]	; ecx = a_len
483    mov    edi, [ebp+16]
484    cmp    ecx, 0
485    je     L_36		; jmp if a_len == 0
486    mov    esi, [ebp+8]		; esi = a
487    cld
488L_35:
489    movd   mm0, [esi]		; mm0 = *a
490    movd   mm3, [edi]		; fetch the sum
491    add	   esi, 4
492    pmuludq mm0, mm0		; mm0 = sqr(a)
493    paddq  mm2, mm0		; add the carry
494    paddq  mm2, mm3		; add the low word
495    movd   mm3, [edi+4]
496    movd   [edi], mm2		; store the 32bit result
497    psrlq  mm2, 32	
498    paddq  mm2, mm3		; add the high word
499    movd   [edi+4], mm2		; store the 32bit result
500    psrlq  mm2, 32		; save the carry.
501    add    edi, 8
502    dec    ecx			; --a_len
503    jnz    L_35			; jmp if a_len != 0
504L_36:
505    movd   ebx, mm2
506    cmp    ebx, 0		; is carry zero?
507    jz     L_38
508    mov    eax, [edi]
509    add    eax, ebx
510    stosd
511    jnc    L_38
512L_37:
513    mov    eax, [edi]		; add in current word from *c
514    adc	   eax, 0
515    stosd			; [es:edi] = ax; edi += 4;
516    jc     L_37
517L_38:
518    emms
519    pop    ebx
520    pop    esi
521    pop    edi
522    leave  
523    ret    
524    nop
525  }
526}
527
528/* 
529 *  Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
530 *  so its high bit is 1.   This code is from NSPR.
531 *
532 *  Dump of assembler code for function s_mpv_div_2dx1d:
533 *  
534 *   esp +  0:   Caller's ebx
535 *   esp +  4:	return address
536 *   esp +  8:	Nhi	argument
537 *   esp + 12:	Nlo	argument
538 *   esp + 16:	divisor	argument
539 *   esp + 20:	qp	argument
540 *   esp + 24:   rp	argument
541 *   registers:
542 *  	eax:
543 * 	ebx:	carry
544 * 	ecx:	a_len
545 * 	edx:
546 * 	esi:	a ptr
547 * 	edi:	c ptr
548 */  
549__declspec(naked) mp_err
550s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
551		mp_digit *qp, mp_digit *rp)
552{
553  __asm {
554       push   ebx
555       mov    edx,[esp+8]
556       mov    eax,[esp+12]
557       mov    ebx,[esp+16]
558       div    ebx
559       mov    ebx,[esp+20]
560       mov    [ebx],eax
561       mov    ebx,[esp+24]
562       mov    [ebx],edx
563       xor    eax,eax		; return zero
564       pop    ebx
565       ret    
566       nop
567  }
568}