/xblas-1.0.248/src/symv2/BLAS_dsymv2_s_s_x.c
# · C · 588 lines · 386 code · 103 blank · 99 comment · 67 complexity · fd12b155137e4f609f9b31c0b7d5daa0 MD5 · raw file
- #include <blas_extended.h>
- #include <blas_extended_private.h>
- #include <blas_fpu.h>
- void BLAS_dsymv2_s_s_x(enum blas_order_type order, enum blas_uplo_type uplo,
- int n, double alpha, const float *a, int lda,
- const float *x_head, const float *x_tail, int incx,
- double beta, double *y, int incy,
- enum blas_prec_type prec)
- /*
- * Purpose
- * =======
- *
- * This routines computes the matrix product:
- *
- * y <- alpha * A * (x_head + x_tail) + beta * y
- *
- * where A is a symmetric matrix.
- *
- * Arguments
- * =========
- *
- * order (input) enum blas_order_type
- * Storage format of input symmetric matrix A.
- *
- * uplo (input) enum blas_uplo_type
- * Determines which half of matrix A (upper or lower triangle)
- * is accessed.
- *
- * n (input) int
- * Dimension of A and size of vectors x, y.
- *
- * alpha (input) double
- *
- * a (input) float*
- * Matrix A.
- *
- * lda (input) int
- * Leading dimension of matrix A.
- *
- * x_head (input) float*
- * Vector x_head
- *
- * x_tail (input) float*
- * Vector x_tail
- *
- * incx (input) int
- * Stride for vector x.
- *
- * beta (input) double
- *
- * y (input) float*
- * Vector y.
- *
- * incy (input) int
- * Stride for vector y.
- *
- * prec (input) enum blas_prec_type
- * Specifies the internal precision to be used.
- * = blas_prec_single: single precision.
- * = blas_prec_double: double precision.
- * = blas_prec_extra : anything at least 1.5 times as accurate
- * than double, and wider than 80-bits.
- * We use double-double in our implementation.
- *
- */
- {
- /* Routine name */
- const char routine_name[] = "BLAS_dsymv2_s_s_x";
- switch (prec) {
- case blas_prec_single:{
- int i, j;
- int xi, yi, xi0, yi0;
- int aij, ai;
- int incai;
- int incaij, incaij2;
- const float *a_i = a;
- const float *x_head_i = x_head;
- const float *x_tail_i = x_tail;
- double *y_i = y;
- double alpha_i = alpha;
- double beta_i = beta;
- float a_elem;
- float x_elem;
- double y_elem;
- double prod1;
- double prod2;
- double sum1;
- double sum2;
- double tmp1;
- double tmp2;
- double tmp3;
- /* Test for no-op */
- if (n <= 0) {
- return;
- }
- if (alpha_i == 0.0 && beta_i == 1.0) {
- return;
- }
- /* Check for error conditions. */
- if (n < 0) {
- BLAS_error(routine_name, -3, n, NULL);
- }
- if (lda < n) {
- BLAS_error(routine_name, -6, n, NULL);
- }
- if (incx == 0) {
- BLAS_error(routine_name, -9, incx, NULL);
- }
- if (incy == 0) {
- BLAS_error(routine_name, -12, incy, NULL);
- }
- if ((order == blas_colmajor && uplo == blas_upper) ||
- (order == blas_rowmajor && uplo == blas_lower)) {
- incai = lda;
- incaij = 1;
- incaij2 = lda;
- } else {
- incai = 1;
- incaij = lda;
- incaij2 = 1;
- }
- xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
- yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
- /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
- for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
- sum1 = 0.0;
- sum2 = 0.0;
- for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- prod1 = (double) a_elem *x_elem;
- sum1 = sum1 + prod1;
- x_elem = x_tail_i[xi];
- prod2 = (double) a_elem *x_elem;
- sum2 = sum2 + prod2;
- }
- for (; j < n; j++, aij += incaij2, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- prod1 = (double) a_elem *x_elem;
- sum1 = sum1 + prod1;
- x_elem = x_tail_i[xi];
- prod2 = (double) a_elem *x_elem;
- sum2 = sum2 + prod2;
- }
- sum1 = sum1 + sum2;
- tmp1 = sum1 * alpha_i;
- y_elem = y_i[yi];
- tmp2 = y_elem * beta_i;
- tmp3 = tmp1 + tmp2;
- y_i[yi] = tmp3;
- }
- break;
- }
- case blas_prec_double:
- case blas_prec_indigenous:{
- int i, j;
- int xi, yi, xi0, yi0;
- int aij, ai;
- int incai;
- int incaij, incaij2;
- const float *a_i = a;
- const float *x_head_i = x_head;
- const float *x_tail_i = x_tail;
- double *y_i = y;
- double alpha_i = alpha;
- double beta_i = beta;
- float a_elem;
- float x_elem;
- double y_elem;
- double prod1;
- double prod2;
- double sum1;
- double sum2;
- double tmp1;
- double tmp2;
- double tmp3;
- /* Test for no-op */
- if (n <= 0) {
- return;
- }
- if (alpha_i == 0.0 && beta_i == 1.0) {
- return;
- }
- /* Check for error conditions. */
- if (n < 0) {
- BLAS_error(routine_name, -3, n, NULL);
- }
- if (lda < n) {
- BLAS_error(routine_name, -6, n, NULL);
- }
- if (incx == 0) {
- BLAS_error(routine_name, -9, incx, NULL);
- }
- if (incy == 0) {
- BLAS_error(routine_name, -12, incy, NULL);
- }
- if ((order == blas_colmajor && uplo == blas_upper) ||
- (order == blas_rowmajor && uplo == blas_lower)) {
- incai = lda;
- incaij = 1;
- incaij2 = lda;
- } else {
- incai = 1;
- incaij = lda;
- incaij2 = 1;
- }
- xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
- yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
- /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
- for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
- sum1 = 0.0;
- sum2 = 0.0;
- for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- prod1 = (double) a_elem *x_elem;
- sum1 = sum1 + prod1;
- x_elem = x_tail_i[xi];
- prod2 = (double) a_elem *x_elem;
- sum2 = sum2 + prod2;
- }
- for (; j < n; j++, aij += incaij2, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- prod1 = (double) a_elem *x_elem;
- sum1 = sum1 + prod1;
- x_elem = x_tail_i[xi];
- prod2 = (double) a_elem *x_elem;
- sum2 = sum2 + prod2;
- }
- sum1 = sum1 + sum2;
- tmp1 = sum1 * alpha_i;
- y_elem = y_i[yi];
- tmp2 = y_elem * beta_i;
- tmp3 = tmp1 + tmp2;
- y_i[yi] = tmp3;
- }
- break;
- }
- case blas_prec_extra:{
- int i, j;
- int xi, yi, xi0, yi0;
- int aij, ai;
- int incai;
- int incaij, incaij2;
- const float *a_i = a;
- const float *x_head_i = x_head;
- const float *x_tail_i = x_tail;
- double *y_i = y;
- double alpha_i = alpha;
- double beta_i = beta;
- float a_elem;
- float x_elem;
- double y_elem;
- double head_prod1, tail_prod1;
- double head_prod2, tail_prod2;
- double head_sum1, tail_sum1;
- double head_sum2, tail_sum2;
- double head_tmp1, tail_tmp1;
- double head_tmp2, tail_tmp2;
- double head_tmp3, tail_tmp3;
- FPU_FIX_DECL;
- /* Test for no-op */
- if (n <= 0) {
- return;
- }
- if (alpha_i == 0.0 && beta_i == 1.0) {
- return;
- }
- /* Check for error conditions. */
- if (n < 0) {
- BLAS_error(routine_name, -3, n, NULL);
- }
- if (lda < n) {
- BLAS_error(routine_name, -6, n, NULL);
- }
- if (incx == 0) {
- BLAS_error(routine_name, -9, incx, NULL);
- }
- if (incy == 0) {
- BLAS_error(routine_name, -12, incy, NULL);
- }
- if ((order == blas_colmajor && uplo == blas_upper) ||
- (order == blas_rowmajor && uplo == blas_lower)) {
- incai = lda;
- incaij = 1;
- incaij2 = lda;
- } else {
- incai = 1;
- incaij = lda;
- incaij2 = 1;
- }
- xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
- yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
- FPU_FIX_START;
- /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
- for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
- head_sum1 = tail_sum1 = 0.0;
- head_sum2 = tail_sum2 = 0.0;
- for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- head_prod1 = (double) a_elem *x_elem;
- tail_prod1 = 0.0;
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_sum1 + head_prod1;
- bv = s1 - head_sum1;
- s2 = ((head_prod1 - bv) + (head_sum1 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_sum1 + tail_prod1;
- bv = t1 - tail_sum1;
- t2 = ((tail_prod1 - bv) + (tail_sum1 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_sum1 = t1 + t2;
- tail_sum1 = t2 - (head_sum1 - t1);
- }
- x_elem = x_tail_i[xi];
- head_prod2 = (double) a_elem *x_elem;
- tail_prod2 = 0.0;
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_sum2 + head_prod2;
- bv = s1 - head_sum2;
- s2 = ((head_prod2 - bv) + (head_sum2 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_sum2 + tail_prod2;
- bv = t1 - tail_sum2;
- t2 = ((tail_prod2 - bv) + (tail_sum2 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_sum2 = t1 + t2;
- tail_sum2 = t2 - (head_sum2 - t1);
- }
- }
- for (; j < n; j++, aij += incaij2, xi += incx) {
- a_elem = a_i[aij];
- x_elem = x_head_i[xi];
- head_prod1 = (double) a_elem *x_elem;
- tail_prod1 = 0.0;
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_sum1 + head_prod1;
- bv = s1 - head_sum1;
- s2 = ((head_prod1 - bv) + (head_sum1 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_sum1 + tail_prod1;
- bv = t1 - tail_sum1;
- t2 = ((tail_prod1 - bv) + (tail_sum1 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_sum1 = t1 + t2;
- tail_sum1 = t2 - (head_sum1 - t1);
- }
- x_elem = x_tail_i[xi];
- head_prod2 = (double) a_elem *x_elem;
- tail_prod2 = 0.0;
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_sum2 + head_prod2;
- bv = s1 - head_sum2;
- s2 = ((head_prod2 - bv) + (head_sum2 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_sum2 + tail_prod2;
- bv = t1 - tail_sum2;
- t2 = ((tail_prod2 - bv) + (tail_sum2 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_sum2 = t1 + t2;
- tail_sum2 = t2 - (head_sum2 - t1);
- }
- }
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_sum1 + head_sum2;
- bv = s1 - head_sum1;
- s2 = ((head_sum2 - bv) + (head_sum1 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_sum1 + tail_sum2;
- bv = t1 - tail_sum1;
- t2 = ((tail_sum2 - bv) + (tail_sum1 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_sum1 = t1 + t2;
- tail_sum1 = t2 - (head_sum1 - t1);
- }
- {
- /* Compute double-double = double-double * double. */
- double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
- con = head_sum1 * split;
- a11 = con - head_sum1;
- a11 = con - a11;
- a21 = head_sum1 - a11;
- con = alpha_i * split;
- b1 = con - alpha_i;
- b1 = con - b1;
- b2 = alpha_i - b1;
- c11 = head_sum1 * alpha_i;
- c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
- c2 = tail_sum1 * alpha_i;
- t1 = c11 + c2;
- t2 = (c2 - (t1 - c11)) + c21;
- head_tmp1 = t1 + t2;
- tail_tmp1 = t2 - (head_tmp1 - t1);
- }
- y_elem = y_i[yi];
- {
- /* Compute double_double = double * double. */
- double a1, a2, b1, b2, con;
- con = y_elem * split;
- a1 = con - y_elem;
- a1 = con - a1;
- a2 = y_elem - a1;
- con = beta_i * split;
- b1 = con - beta_i;
- b1 = con - b1;
- b2 = beta_i - b1;
- head_tmp2 = y_elem * beta_i;
- tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
- }
- {
- /* Compute double-double = double-double + double-double. */
- double bv;
- double s1, s2, t1, t2;
- /* Add two hi words. */
- s1 = head_tmp1 + head_tmp2;
- bv = s1 - head_tmp1;
- s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
- /* Add two lo words. */
- t1 = tail_tmp1 + tail_tmp2;
- bv = t1 - tail_tmp1;
- t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
- s2 += t1;
- /* Renormalize (s1, s2) to (t1, s2) */
- t1 = s1 + s2;
- s2 = s2 - (t1 - s1);
- t2 += s2;
- /* Renormalize (t1, t2) */
- head_tmp3 = t1 + t2;
- tail_tmp3 = t2 - (head_tmp3 - t1);
- }
- y_i[yi] = head_tmp3;
- }
- FPU_FIX_STOP;
- break;
- }
- }
- } /* end BLAS_dsymv2_s_s_x */