PageRenderTime 32ms CodeModel.GetById 16ms app.highlight 13ms RepoModel.GetById 2ms app.codeStats 0ms

/security/nss/lib/freebl/mpi/mpi_sparc.c

http://github.com/zpao/v8monkey
C | 257 lines | 175 code | 26 blank | 56 comment | 41 complexity | 1cfa46e6791b118e70a3d8a9b15f4f12 MD5 | raw file
  1/* ***** BEGIN LICENSE BLOCK *****
  2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3 *
  4 * The contents of this file are subject to the Mozilla Public License Version
  5 * 1.1 (the "License"); you may not use this file except in compliance with
  6 * the License. You may obtain a copy of the License at
  7 * http://www.mozilla.org/MPL/
  8 *
  9 * Software distributed under the License is distributed on an "AS IS" basis,
 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11 * for the specific language governing rights and limitations under the
 12 * License.
 13 *
 14 * The Original Code is the Netscape security libraries.
 15 *
 16 * The Initial Developer of the Original Code is
 17 * Netscape Communications Corporation.
 18 * Portions created by the Initial Developer are Copyright (C) 2000
 19 * the Initial Developer. All Rights Reserved.
 20 *
 21 * Contributor(s):
 22 *
 23 * Alternatively, the contents of this file may be used under the terms of
 24 * either the GNU General Public License Version 2 or later (the "GPL"), or
 25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 26 * in which case the provisions of the GPL or the LGPL are applicable instead
 27 * of those above. If you wish to allow use of your version of this file only
 28 * under the terms of either the GPL or the LGPL, and not to allow others to
 29 * use your version of this file under the terms of the MPL, indicate your
 30 * decision by deleting the provisions above and replace them with the notice
 31 * and other provisions required by the GPL or the LGPL. If you do not delete
 32 * the provisions above, a recipient may use your version of this file under
 33 * the terms of any one of the MPL, the GPL or the LGPL.
 34 *
 35 * ***** END LICENSE BLOCK ***** */
 36/* $Id: mpi_sparc.c,v 1.7 2006/01/22 08:43:57 nelsonb%netscape.com Exp $ */
 37
 38/* Multiplication performance enhancements for sparc v8+vis CPUs. */
 39
 40#include "mpi-priv.h"
 41#include <stddef.h>
 42#include <sys/systeminfo.h>
 43#include <strings.h>
 44
 45/* In the functions below, */
 46/* vector y must be 8-byte aligned, and n must be even */
 47/* returns carry out of high order word of result */
 48/* maximum n is 256 */
 49
 50/* vector x += vector y * scaler a; where y is of length n words. */
 51extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
 52
 53/* vector z = vector x + vector y * scaler a; where y is of length n words. */
 54extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y, 
 55			int n, mp_digit a);
 56
 57/* v8 versions of these functions run on any Sparc v8 CPU. */
 58
 59/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
 60#define MP_MUL_DxD(a, b, Phi, Plo) \
 61  { unsigned long long product = (unsigned long long)a * b; \
 62    Plo = (mp_digit)product; \
 63    Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
 64
 65/* c = a * b */
 66static void 
 67v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 68{
 69#if !defined(MP_NO_MP_WORD)
 70  mp_digit   d = 0;
 71
 72  /* Inner product:  Digits of a */
 73  while (a_len--) {
 74    mp_word w = ((mp_word)b * *a++) + d;
 75    *c++ = ACCUM(w);
 76    d = CARRYOUT(w);
 77  }
 78  *c = d;
 79#else
 80  mp_digit carry = 0;
 81  while (a_len--) {
 82    mp_digit a_i = *a++;
 83    mp_digit a0b0, a1b1;
 84
 85    MP_MUL_DxD(a_i, b, a1b1, a0b0);
 86
 87    a0b0 += carry;
 88    if (a0b0 < carry)
 89      ++a1b1;
 90    *c++ = a0b0;
 91    carry = a1b1;
 92  }
 93  *c = carry;
 94#endif
 95}
 96
 97/* c += a * b */
 98static void 
 99v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
100{
101#if !defined(MP_NO_MP_WORD)
102  mp_digit   d = 0;
103
104  /* Inner product:  Digits of a */
105  while (a_len--) {
106    mp_word w = ((mp_word)b * *a++) + *c + d;
107    *c++ = ACCUM(w);
108    d = CARRYOUT(w);
109  }
110  *c = d;
111#else
112  mp_digit carry = 0;
113  while (a_len--) {
114    mp_digit a_i = *a++;
115    mp_digit a0b0, a1b1;
116
117    MP_MUL_DxD(a_i, b, a1b1, a0b0);
118
119    a0b0 += carry;
120    if (a0b0 < carry)
121      ++a1b1;
122    a0b0 += a_i = *c;
123    if (a0b0 < a_i)
124      ++a1b1;
125    *c++ = a0b0;
126    carry = a1b1;
127  }
128  *c = carry;
129#endif
130}
131
132/* Presently, this is only used by the Montgomery arithmetic code. */
133/* c += a * b */
134static void 
135v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
136{
137#if !defined(MP_NO_MP_WORD)
138  mp_digit   d = 0;
139
140  /* Inner product:  Digits of a */
141  while (a_len--) {
142    mp_word w = ((mp_word)b * *a++) + *c + d;
143    *c++ = ACCUM(w);
144    d = CARRYOUT(w);
145  }
146
147  while (d) {
148    mp_word w = (mp_word)*c + d;
149    *c++ = ACCUM(w);
150    d = CARRYOUT(w);
151  }
152#else
153  mp_digit carry = 0;
154  while (a_len--) {
155    mp_digit a_i = *a++;
156    mp_digit a0b0, a1b1;
157
158    MP_MUL_DxD(a_i, b, a1b1, a0b0);
159
160    a0b0 += carry;
161    if (a0b0 < carry)
162      ++a1b1;
163
164    a0b0 += a_i = *c;
165    if (a0b0 < a_i)
166      ++a1b1;
167
168    *c++ = a0b0;
169    carry = a1b1;
170  }
171  while (carry) {
172    mp_digit c_i = *c;
173    carry += c_i;
174    *c++ = carry;
175    carry = carry < c_i;
176  }
177#endif
178}
179
180/* These functions run only on v8plus+vis or v9+vis CPUs. */
181
182/* c = a * b */
183void 
184s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
185{
186    mp_digit d;
187    mp_digit x[258];
188    if (a_len <= 256) {
189	if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
190	    mp_digit * px;
191	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
192	    memcpy(px, a, a_len * sizeof(*a));
193	    a = px;
194	    if (a_len & 1) {
195		px[a_len] = 0;
196	    }
197	}
198	s_mp_setz(c, a_len + 1);
199	d = mul_add_inp(c, a, a_len, b);
200	c[a_len] = d;
201    } else {
202	v8_mpv_mul_d(a, a_len, b, c);
203    }
204}
205
206/* c += a * b, where a is a_len words long. */
207void     
208s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
209{
210    mp_digit d;
211    mp_digit x[258];
212    if (a_len <= 256) {
213	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
214	    mp_digit * px;
215	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
216	    memcpy(px, a, a_len * sizeof(*a));
217	    a = px;
218	    if (a_len & 1) {
219		px[a_len] = 0;
220	    }
221	}
222	d = mul_add_inp(c, a, a_len, b);
223	c[a_len] = d;
224    } else {
225	v8_mpv_mul_d_add(a, a_len, b, c);
226    }
227}
228
229/* c += a * b, where a is y words long. */
230void     
231s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
232{
233    mp_digit d;
234    mp_digit x[258];
235    if (a_len <= 256) {
236	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
237	    mp_digit * px;
238	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
239	    memcpy(px, a, a_len * sizeof(*a));
240	    a = px;
241	    if (a_len & 1) {
242		px[a_len] = 0;
243	    }
244	}
245	d = mul_add_inp(c, a, a_len, b);
246	if (d) {
247	    c += a_len;
248	    do {
249		mp_digit sum = d + *c;
250		*c++ = sum;
251		d = sum < d;
252	    } while (d);
253	}
254    } else {
255	v8_mpv_mul_d_add_prop(a, a_len, b, c);
256    }
257}