PageRenderTime 48ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/src/atlas-3.8.3/src/blas/pklevel3/gpmm/ATL_cpputblk.c

http://github.com/numpy/vendor
C | 109 lines | 72 code | 2 blank | 35 comment | 19 complexity | 5bd8b19f7181bda0673f36f0497e1fe3 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /*
  2. * Automatically Tuned Linear Algebra Software v3.8.3
  3. * (C) Copyright 2003 R. Clint Whaley
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions, and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. The name of the ATLAS group or the names of its contributers may
  14. * not be used to endorse or promote products derived from this
  15. * software without specific written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  19. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
  21. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. * POSSIBILITY OF SUCH DAMAGE.
  28. *
  29. */
  30. #include "atlas_pkblas.h"
  31. void Mjoin(PATL,pputblk)(const int M, const int N, const TYPE *V, TYPE *C,
  32. int ldc, int ldcinc, const SCALAR beta)
  33. /*
  34. * Given a MxN block-major block V, copy to a packed C
  35. * ldcinc = 0 : General rectangular
  36. * ldcinc = 1 : Upper
  37. * ldcinc = -1 : Lower
  38. */
  39. {
  40. int i, j;
  41. const int mn = M*N;
  42. const register TYPE rbeta = *beta, ibeta = beta[1];
  43. register TYPE rc, ic, t0;
  44. if (ldcinc == -1) ldc--;
  45. ldc -= M;
  46. ldc += ldc;
  47. if (ibeta == ATL_rzero)
  48. {
  49. if (rbeta == ATL_rzero)
  50. {
  51. for (j=N; j; j--)
  52. {
  53. for (i=M; i; i--, C += 2)
  54. {
  55. *C = V[mn];
  56. C[1] = *V++;
  57. }
  58. C += ldc;
  59. ldc += ldcinc;
  60. }
  61. }
  62. else if (rbeta == ATL_rone)
  63. {
  64. for (j=N; j; j--)
  65. {
  66. for (i=M; i; i--, C += 2)
  67. {
  68. *C += V[mn];
  69. C[1] += *V++;
  70. }
  71. C += ldc;
  72. ldc += ldcinc;
  73. }
  74. }
  75. else /* beta real X */
  76. {
  77. for (j=N; j; j--)
  78. {
  79. for (i=M; i; i--, C += 2)
  80. {
  81. *C = *C * rbeta + V[mn];
  82. C[1] = C[1] * rbeta + *V++;
  83. }
  84. C += ldc;
  85. ldc += ldcinc;
  86. }
  87. }
  88. }
  89. else /* beta = complex X */
  90. {
  91. for (j=N; j; j--)
  92. {
  93. for (i=M; i; i--, C += 2)
  94. {
  95. t0 = rc = *C; ic = C[1];
  96. rc = rc * rbeta - ic * ibeta;
  97. ic = t0 * ibeta + ic * rbeta;
  98. rc += V[mn];
  99. ic += *V++;
  100. *C = rc;
  101. C[1] = ic;
  102. }
  103. C += ldc;
  104. ldc += ldcinc;
  105. }
  106. }
  107. }