/security/nss/lib/freebl/mpi/mpi_sparc.c

http://github.com/zpao/v8monkey · C · 257 lines · 175 code · 26 blank · 56 comment · 41 complexity · 1cfa46e6791b118e70a3d8a9b15f4f12 MD5 · raw file

  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3. *
  4. * The contents of this file are subject to the Mozilla Public License Version
  5. * 1.1 (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. * http://www.mozilla.org/MPL/
  8. *
  9. * Software distributed under the License is distributed on an "AS IS" basis,
  10. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. * for the specific language governing rights and limitations under the
  12. * License.
  13. *
  14. * The Original Code is the Netscape security libraries.
  15. *
  16. * The Initial Developer of the Original Code is
  17. * Netscape Communications Corporation.
  18. * Portions created by the Initial Developer are Copyright (C) 2000
  19. * the Initial Developer. All Rights Reserved.
  20. *
  21. * Contributor(s):
  22. *
  23. * Alternatively, the contents of this file may be used under the terms of
  24. * either the GNU General Public License Version 2 or later (the "GPL"), or
  25. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  26. * in which case the provisions of the GPL or the LGPL are applicable instead
  27. * of those above. If you wish to allow use of your version of this file only
  28. * under the terms of either the GPL or the LGPL, and not to allow others to
  29. * use your version of this file under the terms of the MPL, indicate your
  30. * decision by deleting the provisions above and replace them with the notice
  31. * and other provisions required by the GPL or the LGPL. If you do not delete
  32. * the provisions above, a recipient may use your version of this file under
  33. * the terms of any one of the MPL, the GPL or the LGPL.
  34. *
  35. * ***** END LICENSE BLOCK ***** */
  36. /* $Id: mpi_sparc.c,v 1.7 2006/01/22 08:43:57 nelsonb%netscape.com Exp $ */
  37. /* Multiplication performance enhancements for sparc v8+vis CPUs. */
  38. #include "mpi-priv.h"
  39. #include <stddef.h>
  40. #include <sys/systeminfo.h>
  41. #include <strings.h>
  42. /* In the functions below, */
  43. /* vector y must be 8-byte aligned, and n must be even */
  44. /* returns carry out of high order word of result */
  45. /* maximum n is 256 */
  46. /* vector x += vector y * scaler a; where y is of length n words. */
  47. extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
  48. /* vector z = vector x + vector y * scaler a; where y is of length n words. */
  49. extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
  50. int n, mp_digit a);
  51. /* v8 versions of these functions run on any Sparc v8 CPU. */
  52. /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
  53. #define MP_MUL_DxD(a, b, Phi, Plo) \
  54. { unsigned long long product = (unsigned long long)a * b; \
  55. Plo = (mp_digit)product; \
  56. Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
  57. /* c = a * b */
  58. static void
  59. v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  60. {
  61. #if !defined(MP_NO_MP_WORD)
  62. mp_digit d = 0;
  63. /* Inner product: Digits of a */
  64. while (a_len--) {
  65. mp_word w = ((mp_word)b * *a++) + d;
  66. *c++ = ACCUM(w);
  67. d = CARRYOUT(w);
  68. }
  69. *c = d;
  70. #else
  71. mp_digit carry = 0;
  72. while (a_len--) {
  73. mp_digit a_i = *a++;
  74. mp_digit a0b0, a1b1;
  75. MP_MUL_DxD(a_i, b, a1b1, a0b0);
  76. a0b0 += carry;
  77. if (a0b0 < carry)
  78. ++a1b1;
  79. *c++ = a0b0;
  80. carry = a1b1;
  81. }
  82. *c = carry;
  83. #endif
  84. }
  85. /* c += a * b */
  86. static void
  87. v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  88. {
  89. #if !defined(MP_NO_MP_WORD)
  90. mp_digit d = 0;
  91. /* Inner product: Digits of a */
  92. while (a_len--) {
  93. mp_word w = ((mp_word)b * *a++) + *c + d;
  94. *c++ = ACCUM(w);
  95. d = CARRYOUT(w);
  96. }
  97. *c = d;
  98. #else
  99. mp_digit carry = 0;
  100. while (a_len--) {
  101. mp_digit a_i = *a++;
  102. mp_digit a0b0, a1b1;
  103. MP_MUL_DxD(a_i, b, a1b1, a0b0);
  104. a0b0 += carry;
  105. if (a0b0 < carry)
  106. ++a1b1;
  107. a0b0 += a_i = *c;
  108. if (a0b0 < a_i)
  109. ++a1b1;
  110. *c++ = a0b0;
  111. carry = a1b1;
  112. }
  113. *c = carry;
  114. #endif
  115. }
  116. /* Presently, this is only used by the Montgomery arithmetic code. */
  117. /* c += a * b */
  118. static void
  119. v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  120. {
  121. #if !defined(MP_NO_MP_WORD)
  122. mp_digit d = 0;
  123. /* Inner product: Digits of a */
  124. while (a_len--) {
  125. mp_word w = ((mp_word)b * *a++) + *c + d;
  126. *c++ = ACCUM(w);
  127. d = CARRYOUT(w);
  128. }
  129. while (d) {
  130. mp_word w = (mp_word)*c + d;
  131. *c++ = ACCUM(w);
  132. d = CARRYOUT(w);
  133. }
  134. #else
  135. mp_digit carry = 0;
  136. while (a_len--) {
  137. mp_digit a_i = *a++;
  138. mp_digit a0b0, a1b1;
  139. MP_MUL_DxD(a_i, b, a1b1, a0b0);
  140. a0b0 += carry;
  141. if (a0b0 < carry)
  142. ++a1b1;
  143. a0b0 += a_i = *c;
  144. if (a0b0 < a_i)
  145. ++a1b1;
  146. *c++ = a0b0;
  147. carry = a1b1;
  148. }
  149. while (carry) {
  150. mp_digit c_i = *c;
  151. carry += c_i;
  152. *c++ = carry;
  153. carry = carry < c_i;
  154. }
  155. #endif
  156. }
  157. /* These functions run only on v8plus+vis or v9+vis CPUs. */
  158. /* c = a * b */
  159. void
  160. s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  161. {
  162. mp_digit d;
  163. mp_digit x[258];
  164. if (a_len <= 256) {
  165. if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
  166. mp_digit * px;
  167. px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
  168. memcpy(px, a, a_len * sizeof(*a));
  169. a = px;
  170. if (a_len & 1) {
  171. px[a_len] = 0;
  172. }
  173. }
  174. s_mp_setz(c, a_len + 1);
  175. d = mul_add_inp(c, a, a_len, b);
  176. c[a_len] = d;
  177. } else {
  178. v8_mpv_mul_d(a, a_len, b, c);
  179. }
  180. }
  181. /* c += a * b, where a is a_len words long. */
  182. void
  183. s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  184. {
  185. mp_digit d;
  186. mp_digit x[258];
  187. if (a_len <= 256) {
  188. if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
  189. mp_digit * px;
  190. px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
  191. memcpy(px, a, a_len * sizeof(*a));
  192. a = px;
  193. if (a_len & 1) {
  194. px[a_len] = 0;
  195. }
  196. }
  197. d = mul_add_inp(c, a, a_len, b);
  198. c[a_len] = d;
  199. } else {
  200. v8_mpv_mul_d_add(a, a_len, b, c);
  201. }
  202. }
  203. /* c += a * b, where a is y words long. */
  204. void
  205. s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  206. {
  207. mp_digit d;
  208. mp_digit x[258];
  209. if (a_len <= 256) {
  210. if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
  211. mp_digit * px;
  212. px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
  213. memcpy(px, a, a_len * sizeof(*a));
  214. a = px;
  215. if (a_len & 1) {
  216. px[a_len] = 0;
  217. }
  218. }
  219. d = mul_add_inp(c, a, a_len, b);
  220. if (d) {
  221. c += a_len;
  222. do {
  223. mp_digit sum = d + *c;
  224. *c++ = sum;
  225. d = sum < d;
  226. } while (d);
  227. }
  228. } else {
  229. v8_mpv_mul_d_add_prop(a, a_len, b, c);
  230. }
  231. }