PageRenderTime 69ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 1ms

/trunk/JuceLibraryCode/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp

#
C++ | 1163 lines | 960 code | 172 blank | 31 comment | 89 complexity | c4fc3409f6a4e1e6516141e5cb2d627e MD5 | raw file
Possible License(s): LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. static bool sse2Present = false;
  24. static bool isSSE2Available() noexcept
  25. {
  26. if (sse2Present)
  27. return true;
  28. sse2Present = SystemStats::hasSSE2();
  29. return sse2Present;
  30. }
  31. inline static bool isAligned (const void* p) noexcept
  32. {
  33. return (((pointer_sized_int) p) & 15) == 0;
  34. }
  35. struct BasicOps32
  36. {
  37. typedef float Type;
  38. typedef __m128 ParallelType;
  39. typedef __m128 IntegerType;
  40. enum { numParallel = 4 };
  41. // Integer and parallel types are the same for SSE. On neon they have different types
  42. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  43. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  44. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  45. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  46. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  47. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  48. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  49. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  50. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  51. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  52. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  53. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  54. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  55. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  56. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  57. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  58. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  59. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  60. };
  61. struct BasicOps64
  62. {
  63. typedef double Type;
  64. typedef __m128d ParallelType;
  65. typedef __m128d IntegerType;
  66. enum { numParallel = 2 };
  67. // Integer and parallel types are the same for SSE. On neon they have different types
  68. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  69. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  70. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  71. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  72. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  73. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  74. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  75. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  76. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  77. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  78. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  79. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  80. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  81. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  82. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  83. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  84. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  85. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  86. };
  87. #define JUCE_BEGIN_VEC_OP \
  88. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  89. if (FloatVectorHelpers::isSSE2Available()) \
  90. { \
  91. const int numLongOps = num / Mode::numParallel;
  92. #define JUCE_FINISH_VEC_OP(normalOp) \
  93. num &= (Mode::numParallel - 1); \
  94. if (num == 0) return; \
  95. } \
  96. for (int i = 0; i < num; ++i) normalOp;
  97. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  98. JUCE_BEGIN_VEC_OP \
  99. setupOp \
  100. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  101. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  102. JUCE_FINISH_VEC_OP (normalOp)
  103. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  104. JUCE_BEGIN_VEC_OP \
  105. setupOp \
  106. if (FloatVectorHelpers::isAligned (dest)) \
  107. { \
  108. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  109. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  110. }\
  111. else \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  114. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  115. } \
  116. JUCE_FINISH_VEC_OP (normalOp)
  117. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  118. JUCE_BEGIN_VEC_OP \
  119. setupOp \
  120. if (FloatVectorHelpers::isAligned (dest)) \
  121. { \
  122. if (FloatVectorHelpers::isAligned (src1)) \
  123. { \
  124. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  125. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  126. } \
  127. else \
  128. { \
  129. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  130. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  131. } \
  132. } \
  133. else \
  134. { \
  135. if (FloatVectorHelpers::isAligned (src1)) \
  136. { \
  137. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  138. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  139. } \
  140. else \
  141. { \
  142. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  143. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  144. } \
  145. } \
  146. JUCE_FINISH_VEC_OP (normalOp)
  147. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  148. JUCE_BEGIN_VEC_OP \
  149. setupOp \
  150. if (FloatVectorHelpers::isAligned (dest)) \
  151. { \
  152. if (FloatVectorHelpers::isAligned (src1)) \
  153. { \
  154. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  155. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  156. } \
  157. else \
  158. { \
  159. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  160. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  161. } \
  162. } \
  163. else \
  164. { \
  165. if (FloatVectorHelpers::isAligned (src1)) \
  166. { \
  167. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  168. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  169. } \
  170. else \
  171. { \
  172. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  173. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  174. } \
  175. } \
  176. JUCE_FINISH_VEC_OP (normalOp)
  177. //==============================================================================
  178. #elif JUCE_USE_ARM_NEON
  179. struct BasicOps32
  180. {
  181. typedef float Type;
  182. typedef float32x4_t ParallelType;
  183. typedef uint32x4_t IntegerType;
  184. enum { numParallel = 4 };
  185. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  186. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  187. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  188. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  189. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  190. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  191. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  192. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  193. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  194. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  195. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  196. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  197. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  198. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  199. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  200. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  201. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  202. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  203. };
  204. struct BasicOps64
  205. {
  206. typedef double Type;
  207. typedef double ParallelType;
  208. typedef uint64 IntegerType;
  209. enum { numParallel = 1 };
  210. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  211. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  212. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  213. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  214. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  215. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  216. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  217. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  218. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  219. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  220. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  221. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  222. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  223. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  224. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  225. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  226. static forcedinline Type max (ParallelType a) noexcept { return a; }
  227. static forcedinline Type min (ParallelType a) noexcept { return a; }
  228. };
  229. #define JUCE_BEGIN_VEC_OP \
  230. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  231. if (Mode::numParallel > 1) \
  232. { \
  233. const int numLongOps = num / Mode::numParallel;
  234. #define JUCE_FINISH_VEC_OP(normalOp) \
  235. num &= (Mode::numParallel - 1); \
  236. if (num == 0) return; \
  237. } \
  238. for (int i = 0; i < num; ++i) normalOp;
  239. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  240. JUCE_BEGIN_VEC_OP \
  241. setupOp \
  242. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  243. JUCE_FINISH_VEC_OP (normalOp)
  244. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  245. JUCE_BEGIN_VEC_OP \
  246. setupOp \
  247. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  248. JUCE_FINISH_VEC_OP (normalOp)
  249. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  250. JUCE_BEGIN_VEC_OP \
  251. setupOp \
  252. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  253. JUCE_FINISH_VEC_OP (normalOp)
  254. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  255. JUCE_BEGIN_VEC_OP \
  256. setupOp \
  257. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  258. JUCE_FINISH_VEC_OP (normalOp)
  259. //==============================================================================
  260. #else
  261. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  262. for (int i = 0; i < num; ++i) normalOp;
  263. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  264. for (int i = 0; i < num; ++i) normalOp;
  265. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  266. for (int i = 0; i < num; ++i) normalOp;
  267. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  268. for (int i = 0; i < num; ++i) normalOp;
  269. #endif
  270. //==============================================================================
  271. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  272. for (int i = 0; i < numLongOps; ++i) \
  273. { \
  274. locals (srcLoad, dstLoad); \
  275. dstStore (dest, vecOp); \
  276. increment; \
  277. }
  278. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  279. for (int i = 0; i < numLongOps; ++i) \
  280. { \
  281. locals (src1Load, src2Load); \
  282. dstStore (dest, vecOp); \
  283. increment; \
  284. }
  285. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  286. for (int i = 0; i < numLongOps; ++i) \
  287. { \
  288. locals (src1Load, src2Load, dstLoad); \
  289. dstStore (dest, vecOp); \
  290. increment; \
  291. }
  292. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  293. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  294. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  295. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  296. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  297. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  298. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  299. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  300. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  301. template <typename Mode>
  302. struct MinMax
  303. {
  304. typedef typename Mode::Type Type;
  305. typedef typename Mode::ParallelType ParallelType;
  306. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  307. {
  308. int numLongOps = num / Mode::numParallel;
  309. #if JUCE_USE_SSE_INTRINSICS
  310. if (numLongOps > 1 && isSSE2Available())
  311. #else
  312. if (numLongOps > 1)
  313. #endif
  314. {
  315. ParallelType val;
  316. #if ! JUCE_USE_ARM_NEON
  317. if (isAligned (src))
  318. {
  319. val = Mode::loadA (src);
  320. if (isMinimum)
  321. {
  322. while (--numLongOps > 0)
  323. {
  324. src += Mode::numParallel;
  325. val = Mode::min (val, Mode::loadA (src));
  326. }
  327. }
  328. else
  329. {
  330. while (--numLongOps > 0)
  331. {
  332. src += Mode::numParallel;
  333. val = Mode::max (val, Mode::loadA (src));
  334. }
  335. }
  336. }
  337. else
  338. #endif
  339. {
  340. val = Mode::loadU (src);
  341. if (isMinimum)
  342. {
  343. while (--numLongOps > 0)
  344. {
  345. src += Mode::numParallel;
  346. val = Mode::min (val, Mode::loadU (src));
  347. }
  348. }
  349. else
  350. {
  351. while (--numLongOps > 0)
  352. {
  353. src += Mode::numParallel;
  354. val = Mode::max (val, Mode::loadU (src));
  355. }
  356. }
  357. }
  358. Type result = isMinimum ? Mode::min (val)
  359. : Mode::max (val);
  360. num &= (Mode::numParallel - 1);
  361. src += Mode::numParallel;
  362. for (int i = 0; i < num; ++i)
  363. result = isMinimum ? jmin (result, src[i])
  364. : jmax (result, src[i]);
  365. return result;
  366. }
  367. return isMinimum ? juce::findMinimum (src, num)
  368. : juce::findMaximum (src, num);
  369. }
  370. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  371. {
  372. int numLongOps = num / Mode::numParallel;
  373. #if JUCE_USE_SSE_INTRINSICS
  374. if (numLongOps > 1 && isSSE2Available())
  375. #else
  376. if (numLongOps > 1)
  377. #endif
  378. {
  379. ParallelType mn, mx;
  380. #if ! JUCE_USE_ARM_NEON
  381. if (isAligned (src))
  382. {
  383. mn = Mode::loadA (src);
  384. mx = mn;
  385. while (--numLongOps > 0)
  386. {
  387. src += Mode::numParallel;
  388. const ParallelType v = Mode::loadA (src);
  389. mn = Mode::min (mn, v);
  390. mx = Mode::max (mx, v);
  391. }
  392. }
  393. else
  394. #endif
  395. {
  396. mn = Mode::loadU (src);
  397. mx = mn;
  398. while (--numLongOps > 0)
  399. {
  400. src += Mode::numParallel;
  401. const ParallelType v = Mode::loadU (src);
  402. mn = Mode::min (mn, v);
  403. mx = Mode::max (mx, v);
  404. }
  405. }
  406. Range<Type> result (Mode::min (mn),
  407. Mode::max (mx));
  408. num &= (Mode::numParallel - 1);
  409. src += Mode::numParallel;
  410. for (int i = 0; i < num; ++i)
  411. result = result.getUnionWith (src[i]);
  412. return result;
  413. }
  414. return Range<Type>::findMinAndMax (src, num);
  415. }
  416. };
  417. #endif
  418. }
  419. //==============================================================================
  420. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  421. {
  422. #if JUCE_USE_VDSP_FRAMEWORK
  423. vDSP_vclr (dest, 1, (size_t) num);
  424. #else
  425. zeromem (dest, (size_t) num * sizeof (float));
  426. #endif
  427. }
  428. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  429. {
  430. #if JUCE_USE_VDSP_FRAMEWORK
  431. vDSP_vclrD (dest, 1, (size_t) num);
  432. #else
  433. zeromem (dest, (size_t) num * sizeof (double));
  434. #endif
  435. }
  436. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  437. {
  438. #if JUCE_USE_VDSP_FRAMEWORK
  439. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  440. #else
  441. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  442. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  443. #endif
  444. }
  445. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  446. {
  447. #if JUCE_USE_VDSP_FRAMEWORK
  448. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  449. #else
  450. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  451. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  452. #endif
  453. }
  454. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  455. {
  456. memcpy (dest, src, (size_t) num * sizeof (float));
  457. }
  458. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  459. {
  460. memcpy (dest, src, (size_t) num * sizeof (double));
  461. }
  462. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  463. {
  464. #if JUCE_USE_VDSP_FRAMEWORK
  465. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  466. #else
  467. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  468. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  469. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  470. #endif
  471. }
  472. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  473. {
  474. #if JUCE_USE_VDSP_FRAMEWORK
  475. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  476. #else
  477. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  478. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  479. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  480. #endif
  481. }
  482. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  483. {
  484. #if JUCE_USE_VDSP_FRAMEWORK
  485. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  486. #else
  487. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  488. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  489. #endif
  490. }
  491. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  492. {
  493. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  494. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  495. }
  496. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float* src, float amount, int num) noexcept
  497. {
  498. #if JUCE_USE_VDSP_FRAMEWORK
  499. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  500. #else
  501. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  502. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  503. const Mode::ParallelType am = Mode::load1 (amount);)
  504. #endif
  505. }
  506. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double* src, double amount, int num) noexcept
  507. {
  508. #if JUCE_USE_VDSP_FRAMEWORK
  509. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  510. #else
  511. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  512. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  513. const Mode::ParallelType am = Mode::load1 (amount);)
  514. #endif
  515. }
  516. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  517. {
  518. #if JUCE_USE_VDSP_FRAMEWORK
  519. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  520. #else
  521. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  522. #endif
  523. }
  524. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  525. {
  526. #if JUCE_USE_VDSP_FRAMEWORK
  527. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  528. #else
  529. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  530. #endif
  531. }
  532. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  533. {
  534. #if JUCE_USE_VDSP_FRAMEWORK
  535. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  536. #else
  537. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  538. #endif
  539. }
  540. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  541. {
  542. #if JUCE_USE_VDSP_FRAMEWORK
  543. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  544. #else
  545. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  546. #endif
  547. }
  548. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  549. {
  550. #if JUCE_USE_VDSP_FRAMEWORK
  551. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  552. #else
  553. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  554. #endif
  555. }
  556. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  557. {
  558. #if JUCE_USE_VDSP_FRAMEWORK
  559. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  560. #else
  561. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  562. #endif
  563. }
  564. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  565. {
  566. #if JUCE_USE_VDSP_FRAMEWORK
  567. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  568. #else
  569. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  570. #endif
  571. }
  572. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  573. {
  574. #if JUCE_USE_VDSP_FRAMEWORK
  575. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  576. #else
  577. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  578. #endif
  579. }
  580. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  581. {
  582. #if JUCE_USE_VDSP_FRAMEWORK
  583. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  584. #else
  585. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  586. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  587. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  588. #endif
  589. }
  590. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  591. {
  592. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  593. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  594. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  595. }
  596. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  597. {
  598. #if JUCE_USE_VDSP_FRAMEWORK
  599. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  600. #else
  601. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  602. JUCE_LOAD_SRC1_SRC2_DEST,
  603. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  604. #endif
  605. }
  606. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  607. {
  608. #if JUCE_USE_VDSP_FRAMEWORK
  609. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  610. #else
  611. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  612. JUCE_LOAD_SRC1_SRC2_DEST,
  613. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  614. #endif
  615. }
  616. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  617. {
  618. #if JUCE_USE_VDSP_FRAMEWORK
  619. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  620. #else
  621. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  622. #endif
  623. }
  624. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  625. {
  626. #if JUCE_USE_VDSP_FRAMEWORK
  627. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  628. #else
  629. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  630. #endif
  631. }
  632. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  633. {
  634. #if JUCE_USE_VDSP_FRAMEWORK
  635. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  636. #else
  637. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  638. #endif
  639. }
  640. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  641. {
  642. #if JUCE_USE_VDSP_FRAMEWORK
  643. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  644. #else
  645. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  646. #endif
  647. }
  648. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  649. {
  650. #if JUCE_USE_VDSP_FRAMEWORK
  651. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  652. #else
  653. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  654. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  655. #endif
  656. }
  657. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  658. {
  659. #if JUCE_USE_VDSP_FRAMEWORK
  660. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  661. #else
  662. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  663. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  664. #endif
  665. }
  666. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  667. {
  668. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  669. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  670. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  671. }
  672. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  673. {
  674. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  675. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  676. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  677. }
  678. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  679. {
  680. #if JUCE_USE_VDSP_FRAMEWORK
  681. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  682. #else
  683. copyWithMultiply (dest, src, -1.0f, num);
  684. #endif
  685. }
  686. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  687. {
  688. #if JUCE_USE_VDSP_FRAMEWORK
  689. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  690. #else
  691. copyWithMultiply (dest, src, -1.0f, num);
  692. #endif
  693. }
  694. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  695. {
  696. #if JUCE_USE_VDSP_FRAMEWORK
  697. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  698. #else
  699. union {float f; uint32 i;} signMask;
  700. signMask.i = 0x7fffffffUL;
  701. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  702. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  703. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  704. #endif
  705. }
  706. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  707. {
  708. #if JUCE_USE_VDSP_FRAMEWORK
  709. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  710. #else
  711. union {double d; uint64 i;} signMask;
  712. signMask.i = 0x7fffffffffffffffULL;
  713. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  714. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  715. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  716. #endif
  717. }
  718. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  719. {
  720. #if JUCE_USE_ARM_NEON
  721. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  722. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  723. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  724. #else
  725. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  726. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  727. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  728. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  729. #endif
  730. }
  731. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  732. {
  733. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  734. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  735. const Mode::ParallelType cmp = Mode::load1 (comp);)
  736. }
  737. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  738. {
  739. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  740. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  741. const Mode::ParallelType cmp = Mode::load1 (comp);)
  742. }
  743. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  744. {
  745. #if JUCE_USE_VDSP_FRAMEWORK
  746. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  747. #else
  748. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  749. #endif
  750. }
  751. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  752. {
  753. #if JUCE_USE_VDSP_FRAMEWORK
  754. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  755. #else
  756. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  757. #endif
  758. }
  759. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  760. {
  761. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  762. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  763. const Mode::ParallelType cmp = Mode::load1 (comp);)
  764. }
  765. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  766. {
  767. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  768. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  769. const Mode::ParallelType cmp = Mode::load1 (comp);)
  770. }
  771. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  772. {
  773. #if JUCE_USE_VDSP_FRAMEWORK
  774. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  775. #else
  776. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  777. #endif
  778. }
  779. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  780. {
  781. #if JUCE_USE_VDSP_FRAMEWORK
  782. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  783. #else
  784. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  785. #endif
  786. }
  787. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  788. {
  789. jassert(high >= low);
  790. #if JUCE_USE_VDSP_FRAMEWORK
  791. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  792. #else
  793. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  794. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  795. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  796. #endif
  797. }
  798. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  799. {
  800. jassert(high >= low);
  801. #if JUCE_USE_VDSP_FRAMEWORK
  802. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  803. #else
  804. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  805. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  806. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  807. #endif
  808. }
  809. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  810. {
  811. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  812. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  813. #else
  814. return Range<float>::findMinAndMax (src, num);
  815. #endif
  816. }
  817. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  818. {
  819. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  820. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  821. #else
  822. return Range<double>::findMinAndMax (src, num);
  823. #endif
  824. }
  825. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  826. {
  827. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  828. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  829. #else
  830. return juce::findMinimum (src, num);
  831. #endif
  832. }
  833. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  834. {
  835. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  836. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  837. #else
  838. return juce::findMinimum (src, num);
  839. #endif
  840. }
  841. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  842. {
  843. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  844. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  845. #else
  846. return juce::findMaximum (src, num);
  847. #endif
  848. }
  849. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  850. {
  851. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  852. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  853. #else
  854. return juce::findMaximum (src, num);
  855. #endif
  856. }
  857. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  858. {
  859. #if JUCE_USE_SSE_INTRINSICS
  860. if (FloatVectorHelpers::isSSE2Available())
  861. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  862. #endif
  863. (void) shouldEnable;
  864. }
  865. //==============================================================================
  866. //==============================================================================
  867. #if JUCE_UNIT_TESTS
  868. class FloatVectorOperationsTests : public UnitTest
  869. {
  870. public:
  871. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  872. template <typename ValueType>
  873. struct TestRunner
  874. {
  875. static void runTest (UnitTest& u, Random random)
  876. {
  877. const int range = random.nextBool() ? 500 : 10;
  878. const int num = random.nextInt (range) + 1;
  879. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  880. HeapBlock<int> buffer3 ((size_t) num + 16);
  881. #if JUCE_ARM
  882. ValueType* const data1 = buffer1;
  883. ValueType* const data2 = buffer2;
  884. int* const int1 = buffer3;
  885. #else
  886. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  887. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  888. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  889. #endif
  890. fillRandomly (random, data1, num);
  891. fillRandomly (random, data2, num);
  892. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  893. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  894. u.expect (minMax1 == minMax2);
  895. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  896. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  897. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  898. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  899. FloatVectorOperations::clear (data1, num);
  900. u.expect (areAllValuesEqual (data1, num, 0));
  901. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  902. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  903. FloatVectorOperations::add (data1, (ValueType) 2, num);
  904. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  905. FloatVectorOperations::copy (data2, data1, num);
  906. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  907. FloatVectorOperations::add (data2, data1, num);
  908. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  909. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  910. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  911. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  912. u.expect (areAllValuesEqual (data2, num, (ValueTyp

Large files files are truncated, but you can click here to view the full file