/security/nss/lib/freebl/mpi/mpi_x86_asm.c
http://github.com/zpao/v8monkey · C · 568 lines · 402 code · 14 blank · 152 comment · 32 complexity · 2e0c9172c15d1aa207e1fa77c0aa6003 MD5 · raw file
- /*
- * mpi_x86.c - MSVC inline assembly implementation of s_mpv_ functions.
- *
- * ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is the Netscape security libraries.
- *
- * The Initial Developer of the Original Code is
- * Netscape Communications Corporation.
- * Portions created by the Initial Developer are Copyright (C) 2000
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- * Benjamin Smedberg <benjamin@smedbergs.us>
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
- #include "mpi-priv.h"
- static int is_sse = -1;
- extern unsigned long s_mpi_is_sse2();
- /*
- * ebp - 36: caller's esi
- * ebp - 32: caller's edi
- * ebp - 28:
- * ebp - 24:
- * ebp - 20:
- * ebp - 16:
- * ebp - 12:
- * ebp - 8:
- * ebp - 4:
- * ebp + 0: caller's ebp
- * ebp + 4: return address
- * ebp + 8: a argument
- * ebp + 12: a_len argument
- * ebp + 16: b argument
- * ebp + 20: c argument
- * registers:
- * eax:
- * ebx: carry
- * ecx: a_len
- * edx:
- * esi: a ptr
- * edi: c ptr
- */
- __declspec(naked) void
- s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
- {
- __asm {
- mov eax, is_sse
- cmp eax, 0
- je s_mpv_mul_d_x86
- jg s_mpv_mul_d_sse2
- call s_mpi_is_sse2
- mov is_sse, eax
- cmp eax, 0
- jg s_mpv_mul_d_sse2
- s_mpv_mul_d_x86:
- push ebp
- mov ebp,esp
- sub esp,28
- push edi
- push esi
- push ebx
- mov ebx,0 ; carry = 0
- mov ecx,[ebp+12] ; ecx = a_len
- mov edi,[ebp+20]
- cmp ecx,0
- je L_2 ; jmp if a_len == 0
- mov esi,[ebp+8] ; esi = a
- cld
- L_1:
- lodsd ; eax = [ds:esi]; esi += 4
- mov edx,[ebp+16] ; edx = b
- mul edx ; edx:eax = Phi:Plo = a_i * b
- add eax,ebx ; add carry (ebx) to edx:eax
- adc edx,0
- mov ebx,edx ; high half of product becomes next carry
- stosd ; [es:edi] = ax; edi += 4;
- dec ecx ; --a_len
- jnz L_1 ; jmp if a_len != 0
- L_2:
- mov [edi],ebx ; *c = carry
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- s_mpv_mul_d_sse2:
- push ebp
- mov ebp, esp
- push edi
- push esi
- psubq mm2, mm2 ; carry = 0
- mov ecx, [ebp+12] ; ecx = a_len
- movd mm1, [ebp+16] ; mm1 = b
- mov edi, [ebp+20]
- cmp ecx, 0
- je L_6 ; jmp if a_len == 0
- mov esi, [ebp+8] ; esi = a
- cld
- L_5:
- movd mm0, [esi] ; mm0 = *a++
- add esi, 4
- pmuludq mm0, mm1 ; mm0 = b * *a++
- paddq mm2, mm0 ; add the carry
- movd [edi], mm2 ; store the 32bit result
- add edi, 4
- psrlq mm2, 32 ; save the carry
- dec ecx ; --a_len
- jnz L_5 ; jmp if a_len != 0
- L_6:
- movd [edi], mm2 ; *c = carry
- emms
- pop esi
- pop edi
- leave
- ret
- nop
- }
- }
- /*
- * ebp - 36: caller's esi
- * ebp - 32: caller's edi
- * ebp - 28:
- * ebp - 24:
- * ebp - 20:
- * ebp - 16:
- * ebp - 12:
- * ebp - 8:
- * ebp - 4:
- * ebp + 0: caller's ebp
- * ebp + 4: return address
- * ebp + 8: a argument
- * ebp + 12: a_len argument
- * ebp + 16: b argument
- * ebp + 20: c argument
- * registers:
- * eax:
- * ebx: carry
- * ecx: a_len
- * edx:
- * esi: a ptr
- * edi: c ptr
- */
- __declspec(naked) void
- s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
- {
- __asm {
- mov eax, is_sse
- cmp eax, 0
- je s_mpv_mul_d_add_x86
- jg s_mpv_mul_d_add_sse2
- call s_mpi_is_sse2
- mov is_sse, eax
- cmp eax, 0
- jg s_mpv_mul_d_add_sse2
- s_mpv_mul_d_add_x86:
- push ebp
- mov ebp,esp
- sub esp,28
- push edi
- push esi
- push ebx
- mov ebx,0 ; carry = 0
- mov ecx,[ebp+12] ; ecx = a_len
- mov edi,[ebp+20]
- cmp ecx,0
- je L_11 ; jmp if a_len == 0
- mov esi,[ebp+8] ; esi = a
- cld
- L_10:
- lodsd ; eax = [ds:esi]; esi += 4
- mov edx,[ebp+16] ; edx = b
- mul edx ; edx:eax = Phi:Plo = a_i * b
- add eax,ebx ; add carry (ebx) to edx:eax
- adc edx,0
- mov ebx,[edi] ; add in current word from *c
- add eax,ebx
- adc edx,0
- mov ebx,edx ; high half of product becomes next carry
- stosd ; [es:edi] = ax; edi += 4;
- dec ecx ; --a_len
- jnz L_10 ; jmp if a_len != 0
- L_11:
- mov [edi],ebx ; *c = carry
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- s_mpv_mul_d_add_sse2:
- push ebp
- mov ebp, esp
- push edi
- push esi
- psubq mm2, mm2 ; carry = 0
- mov ecx, [ebp+12] ; ecx = a_len
- movd mm1, [ebp+16] ; mm1 = b
- mov edi, [ebp+20]
- cmp ecx, 0
- je L_16 ; jmp if a_len == 0
- mov esi, [ebp+8] ; esi = a
- cld
- L_15:
- movd mm0, [esi] ; mm0 = *a++
- add esi, 4
- pmuludq mm0, mm1 ; mm0 = b * *a++
- paddq mm2, mm0 ; add the carry
- movd mm0, [edi]
- paddq mm2, mm0 ; add the carry
- movd [edi], mm2 ; store the 32bit result
- add edi, 4
- psrlq mm2, 32 ; save the carry
- dec ecx ; --a_len
- jnz L_15 ; jmp if a_len != 0
- L_16:
- movd [edi], mm2 ; *c = carry
- emms
- pop esi
- pop edi
- leave
- ret
- nop
- }
- }
- /*
- * ebp - 36: caller's esi
- * ebp - 32: caller's edi
- * ebp - 28:
- * ebp - 24:
- * ebp - 20:
- * ebp - 16:
- * ebp - 12:
- * ebp - 8:
- * ebp - 4:
- * ebp + 0: caller's ebp
- * ebp + 4: return address
- * ebp + 8: a argument
- * ebp + 12: a_len argument
- * ebp + 16: b argument
- * ebp + 20: c argument
- * registers:
- * eax:
- * ebx: carry
- * ecx: a_len
- * edx:
- * esi: a ptr
- * edi: c ptr
- */
- __declspec(naked) void
- s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
- {
- __asm {
- mov eax, is_sse
- cmp eax, 0
- je s_mpv_mul_d_add_prop_x86
- jg s_mpv_mul_d_add_prop_sse2
- call s_mpi_is_sse2
- mov is_sse, eax
- cmp eax, 0
- jg s_mpv_mul_d_add_prop_sse2
- s_mpv_mul_d_add_prop_x86:
- push ebp
- mov ebp,esp
- sub esp,28
- push edi
- push esi
- push ebx
- mov ebx,0 ; carry = 0
- mov ecx,[ebp+12] ; ecx = a_len
- mov edi,[ebp+20]
- cmp ecx,0
- je L_21 ; jmp if a_len == 0
- cld
- mov esi,[ebp+8] ; esi = a
- L_20:
- lodsd ; eax = [ds:esi]; esi += 4
- mov edx,[ebp+16] ; edx = b
- mul edx ; edx:eax = Phi:Plo = a_i * b
- add eax,ebx ; add carry (ebx) to edx:eax
- adc edx,0
- mov ebx,[edi] ; add in current word from *c
- add eax,ebx
- adc edx,0
- mov ebx,edx ; high half of product becomes next carry
- stosd ; [es:edi] = ax; edi += 4;
- dec ecx ; --a_len
- jnz L_20 ; jmp if a_len != 0
- L_21:
- cmp ebx,0 ; is carry zero?
- jz L_23
- mov eax,[edi] ; add in current word from *c
- add eax,ebx
- stosd ; [es:edi] = ax; edi += 4;
- jnc L_23
- L_22:
- mov eax,[edi] ; add in current word from *c
- adc eax,0
- stosd ; [es:edi] = ax; edi += 4;
- jc L_22
- L_23:
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- s_mpv_mul_d_add_prop_sse2:
- push ebp
- mov ebp, esp
- push edi
- push esi
- push ebx
- psubq mm2, mm2 ; carry = 0
- mov ecx, [ebp+12] ; ecx = a_len
- movd mm1, [ebp+16] ; mm1 = b
- mov edi, [ebp+20]
- cmp ecx, 0
- je L_26 ; jmp if a_len == 0
- mov esi, [ebp+8] ; esi = a
- cld
- L_25:
- movd mm0, [esi] ; mm0 = *a++
- movd mm3, [edi] ; fetch the sum
- add esi, 4
- pmuludq mm0, mm1 ; mm0 = b * *a++
- paddq mm2, mm0 ; add the carry
- paddq mm2, mm3 ; add *c++
- movd [edi], mm2 ; store the 32bit result
- add edi, 4
- psrlq mm2, 32 ; save the carry
- dec ecx ; --a_len
- jnz L_25 ; jmp if a_len != 0
- L_26:
- movd ebx, mm2
- cmp ebx, 0 ; is carry zero?
- jz L_28
- mov eax, [edi]
- add eax, ebx
- stosd
- jnc L_28
- L_27:
- mov eax, [edi] ; add in current word from *c
- adc eax, 0
- stosd ; [es:edi] = ax; edi += 4;
- jc L_27
- L_28:
- emms
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- }
- }
- /*
- * ebp - 20: caller's esi
- * ebp - 16: caller's edi
- * ebp - 12:
- * ebp - 8: carry
- * ebp - 4: a_len local
- * ebp + 0: caller's ebp
- * ebp + 4: return address
- * ebp + 8: pa argument
- * ebp + 12: a_len argument
- * ebp + 16: ps argument
- * ebp + 20:
- * registers:
- * eax:
- * ebx: carry
- * ecx: a_len
- * edx:
- * esi: a ptr
- * edi: c ptr
- */
- __declspec(naked) void
- s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
- {
- __asm {
- mov eax, is_sse
- cmp eax, 0
- je s_mpv_sqr_add_prop_x86
- jg s_mpv_sqr_add_prop_sse2
- call s_mpi_is_sse2
- mov is_sse, eax
- cmp eax, 0
- jg s_mpv_sqr_add_prop_sse2
- s_mpv_sqr_add_prop_x86:
- push ebp
- mov ebp,esp
- sub esp,12
- push edi
- push esi
- push ebx
- mov ebx,0 ; carry = 0
- mov ecx,[ebp+12] ; a_len
- mov edi,[ebp+16] ; edi = ps
- cmp ecx,0
- je L_31 ; jump if a_len == 0
- cld
- mov esi,[ebp+8] ; esi = pa
- L_30:
- lodsd ; eax = [ds:si]; si += 4;
- mul eax
- add eax,ebx ; add "carry"
- adc edx,0
- mov ebx,[edi]
- add eax,ebx ; add low word from result
- mov ebx,[edi+4]
- stosd ; [es:di] = eax; di += 4;
- adc edx,ebx ; add high word from result
- mov ebx,0
- mov eax,edx
- adc ebx,0
- stosd ; [es:di] = eax; di += 4;
- dec ecx ; --a_len
- jnz L_30 ; jmp if a_len != 0
- L_31:
- cmp ebx,0 ; is carry zero?
- jz L_34
- mov eax,[edi] ; add in current word from *c
- add eax,ebx
- stosd ; [es:edi] = ax; edi += 4;
- jnc L_34
- L_32:
- mov eax,[edi] ; add in current word from *c
- adc eax,0
- stosd ; [es:edi] = ax; edi += 4;
- jc L_32
- L_34:
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- s_mpv_sqr_add_prop_sse2:
- push ebp
- mov ebp, esp
- push edi
- push esi
- push ebx
- psubq mm2, mm2 ; carry = 0
- mov ecx, [ebp+12] ; ecx = a_len
- mov edi, [ebp+16]
- cmp ecx, 0
- je L_36 ; jmp if a_len == 0
- mov esi, [ebp+8] ; esi = a
- cld
- L_35:
- movd mm0, [esi] ; mm0 = *a
- movd mm3, [edi] ; fetch the sum
- add esi, 4
- pmuludq mm0, mm0 ; mm0 = sqr(a)
- paddq mm2, mm0 ; add the carry
- paddq mm2, mm3 ; add the low word
- movd mm3, [edi+4]
- movd [edi], mm2 ; store the 32bit result
- psrlq mm2, 32
- paddq mm2, mm3 ; add the high word
- movd [edi+4], mm2 ; store the 32bit result
- psrlq mm2, 32 ; save the carry.
- add edi, 8
- dec ecx ; --a_len
- jnz L_35 ; jmp if a_len != 0
- L_36:
- movd ebx, mm2
- cmp ebx, 0 ; is carry zero?
- jz L_38
- mov eax, [edi]
- add eax, ebx
- stosd
- jnc L_38
- L_37:
- mov eax, [edi] ; add in current word from *c
- adc eax, 0
- stosd ; [es:edi] = ax; edi += 4;
- jc L_37
- L_38:
- emms
- pop ebx
- pop esi
- pop edi
- leave
- ret
- nop
- }
- }
- /*
- * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
- * so its high bit is 1. This code is from NSPR.
- *
- * Dump of assembler code for function s_mpv_div_2dx1d:
- *
- * esp + 0: Caller's ebx
- * esp + 4: return address
- * esp + 8: Nhi argument
- * esp + 12: Nlo argument
- * esp + 16: divisor argument
- * esp + 20: qp argument
- * esp + 24: rp argument
- * registers:
- * eax:
- * ebx: carry
- * ecx: a_len
- * edx:
- * esi: a ptr
- * edi: c ptr
- */
- __declspec(naked) mp_err
- s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
- mp_digit *qp, mp_digit *rp)
- {
- __asm {
- push ebx
- mov edx,[esp+8]
- mov eax,[esp+12]
- mov ebx,[esp+16]
- div ebx
- mov ebx,[esp+20]
- mov [ebx],eax
- mov ebx,[esp+24]
- mov [ebx],edx
- xor eax,eax ; return zero
- pop ebx
- ret
- nop
- }
- }