/src/gui/painting/qdrawhelper_sse2.cpp

https://bitbucket.org/ultra_iter/qt-vtl · C++ · 544 lines · 424 code · 65 blank · 55 comment · 61 complexity · 0578fb8a58017c7fd46d6494a6d75362 MD5 · raw file

  1. /****************************************************************************
  2. **
  3. ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
  4. ** All rights reserved.
  5. ** Contact: Nokia Corporation (qt-info@nokia.com)
  6. **
  7. ** This file is part of the QtGui module of the Qt Toolkit.
  8. **
  9. ** $QT_BEGIN_LICENSE:LGPL$
  10. ** GNU Lesser General Public License Usage
  11. ** This file may be used under the terms of the GNU Lesser General Public
  12. ** License version 2.1 as published by the Free Software Foundation and
  13. ** appearing in the file LICENSE.LGPL included in the packaging of this
  14. ** file. Please review the following information to ensure the GNU Lesser
  15. ** General Public License version 2.1 requirements will be met:
  16. ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  17. **
  18. ** In addition, as a special exception, Nokia gives you certain additional
  19. ** rights. These rights are described in the Nokia Qt LGPL Exception
  20. ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
  21. **
  22. ** GNU General Public License Usage
  23. ** Alternatively, this file may be used under the terms of the GNU General
  24. ** Public License version 3.0 as published by the Free Software Foundation
  25. ** and appearing in the file LICENSE.GPL included in the packaging of this
  26. ** file. Please review the following information to ensure the GNU General
  27. ** Public License version 3.0 requirements will be met:
  28. ** http://www.gnu.org/copyleft/gpl.html.
  29. **
  30. ** Other Usage
  31. ** Alternatively, this file may be used in accordance with the terms and
  32. ** conditions contained in a signed written agreement between you and Nokia.
  33. **
  34. **
  35. **
  36. **
  37. **
  38. ** $QT_END_LICENSE$
  39. **
  40. ****************************************************************************/
  41. #include <private/qdrawhelper_x86_p.h>
  42. #ifdef QT_HAVE_SSE2
  43. #include <private/qdrawingprimitive_sse2_p.h>
  44. #include <private/qpaintengine_raster_p.h>
  45. QT_BEGIN_NAMESPACE
  46. void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
  47. const uchar *srcPixels, int sbpl,
  48. int w, int h,
  49. int const_alpha)
  50. {
  51. const quint32 *src = (const quint32 *) srcPixels;
  52. quint32 *dst = (quint32 *) destPixels;
  53. if (const_alpha == 256) {
  54. const __m128i alphaMask = _mm_set1_epi32(0xff000000);
  55. const __m128i nullVector = _mm_set1_epi32(0);
  56. const __m128i half = _mm_set1_epi16(0x80);
  57. const __m128i one = _mm_set1_epi16(0xff);
  58. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  59. for (int y = 0; y < h; ++y) {
  60. BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask);
  61. dst = (quint32 *)(((uchar *) dst) + dbpl);
  62. src = (const quint32 *)(((const uchar *) src) + sbpl);
  63. }
  64. } else if (const_alpha != 0) {
  65. // dest = (s + d * sia) * ca + d * cia
  66. // = s * ca + d * (sia * ca + cia)
  67. // = s * ca + d * (1 - sa*ca)
  68. const_alpha = (const_alpha * 255) >> 8;
  69. const __m128i nullVector = _mm_set1_epi32(0);
  70. const __m128i half = _mm_set1_epi16(0x80);
  71. const __m128i one = _mm_set1_epi16(0xff);
  72. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  73. const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
  74. for (int y = 0; y < h; ++y) {
  75. BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
  76. dst = (quint32 *)(((uchar *) dst) + dbpl);
  77. src = (const quint32 *)(((const uchar *) src) + sbpl);
  78. }
  79. }
  80. }
  81. // qblendfunctions.cpp
  82. void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl,
  83. const uchar *srcPixels, int sbpl,
  84. int w, int h,
  85. int const_alpha);
  86. void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
  87. const uchar *srcPixels, int sbpl,
  88. int w, int h,
  89. int const_alpha)
  90. {
  91. const quint32 *src = (const quint32 *) srcPixels;
  92. quint32 *dst = (quint32 *) destPixels;
  93. if (const_alpha != 256) {
  94. if (const_alpha != 0) {
  95. const __m128i nullVector = _mm_set1_epi32(0);
  96. const __m128i half = _mm_set1_epi16(0x80);
  97. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  98. const_alpha = (const_alpha * 255) >> 8;
  99. int one_minus_const_alpha = 255 - const_alpha;
  100. const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
  101. const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
  102. for (int y = 0; y < h; ++y) {
  103. int x = 0;
  104. // First, align dest to 16 bytes:
  105. ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) {
  106. dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
  107. }
  108. for (; x < w-3; x += 4) {
  109. __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
  110. if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
  111. const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
  112. __m128i result;
  113. INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half);
  114. _mm_store_si128((__m128i *)&dst[x], result);
  115. }
  116. }
  117. for (; x<w; ++x) {
  118. dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
  119. }
  120. dst = (quint32 *)(((uchar *) dst) + dbpl);
  121. src = (const quint32 *)(((const uchar *) src) + sbpl);
  122. }
  123. }
  124. } else {
  125. qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
  126. }
  127. }
  128. void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
  129. {
  130. Q_ASSERT(const_alpha < 256);
  131. const quint32 *src = (const quint32 *) srcPixels;
  132. quint32 *dst = (quint32 *) destPixels;
  133. const __m128i nullVector = _mm_set1_epi32(0);
  134. const __m128i half = _mm_set1_epi16(0x80);
  135. const __m128i one = _mm_set1_epi16(0xff);
  136. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  137. if (const_alpha == 255) {
  138. const __m128i alphaMask = _mm_set1_epi32(0xff000000);
  139. BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask);
  140. } else {
  141. const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
  142. BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector);
  143. }
  144. }
  145. void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
  146. {
  147. int x = 0;
  148. if (const_alpha == 255) {
  149. // 1) Prologue: align destination on 16 bytes
  150. ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
  151. dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
  152. // 2) composition with SSE2
  153. for (; x < length - 3; x += 4) {
  154. const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
  155. const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
  156. const __m128i result = _mm_adds_epu8(srcVector, dstVector);
  157. _mm_store_si128((__m128i *)&dst[x], result);
  158. }
  159. // 3) Epilogue:
  160. for (; x < length; ++x)
  161. dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
  162. } else {
  163. const int one_minus_const_alpha = 255 - const_alpha;
  164. const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
  165. const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
  166. // 1) Prologue: align destination on 16 bytes
  167. ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
  168. dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
  169. const __m128i half = _mm_set1_epi16(0x80);
  170. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  171. // 2) composition with SSE2
  172. for (; x < length - 3; x += 4) {
  173. const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
  174. const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
  175. __m128i result = _mm_adds_epu8(srcVector, dstVector);
  176. INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
  177. _mm_store_si128((__m128i *)&dst[x], result);
  178. }
  179. // 3) Epilogue:
  180. for (; x < length; ++x)
  181. dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
  182. }
  183. }
  184. void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha)
  185. {
  186. if (const_alpha == 255) {
  187. ::memcpy(dst, src, length * sizeof(uint));
  188. } else {
  189. const int ialpha = 255 - const_alpha;
  190. int x = 0;
  191. // 1) prologue, align on 16 bytes
  192. ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
  193. dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
  194. // 2) interpolate pixels with SSE2
  195. const __m128i half = _mm_set1_epi16(0x80);
  196. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  197. const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
  198. const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
  199. for (; x < length - 3; x += 4) {
  200. const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
  201. __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
  202. INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
  203. _mm_store_si128((__m128i *)&dst[x], dstVector);
  204. }
  205. // 3) Epilogue
  206. for (; x < length; ++x)
  207. dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
  208. }
  209. }
  210. void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
  211. {
  212. if (count < 7) {
  213. switch (count) {
  214. case 6: *dest++ = value;
  215. case 5: *dest++ = value;
  216. case 4: *dest++ = value;
  217. case 3: *dest++ = value;
  218. case 2: *dest++ = value;
  219. case 1: *dest = value;
  220. }
  221. return;
  222. };
  223. const int align = (quintptr)(dest) & 0xf;
  224. switch (align) {
  225. case 4: *dest++ = value; --count;
  226. case 8: *dest++ = value; --count;
  227. case 12: *dest++ = value; --count;
  228. }
  229. int count128 = count / 4;
  230. __m128i *dst128 = reinterpret_cast<__m128i*>(dest);
  231. const __m128i value128 = _mm_set_epi32(value, value, value, value);
  232. int n = (count128 + 3) / 4;
  233. switch (count128 & 0x3) {
  234. case 0: do { _mm_stream_si128(dst128++, value128);
  235. case 3: _mm_stream_si128(dst128++, value128);
  236. case 2: _mm_stream_si128(dst128++, value128);
  237. case 1: _mm_stream_si128(dst128++, value128);
  238. } while (--n > 0);
  239. }
  240. const int rest = count & 0x3;
  241. if (rest) {
  242. switch (rest) {
  243. case 3: dest[count - 3] = value;
  244. case 2: dest[count - 2] = value;
  245. case 1: dest[count - 1] = value;
  246. }
  247. }
  248. }
  249. void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
  250. {
  251. if ((const_alpha & qAlpha(color)) == 255) {
  252. qt_memfill32_sse2(destPixels, color, length);
  253. } else {
  254. if (const_alpha != 255)
  255. color = BYTE_MUL(color, const_alpha);
  256. const quint32 minusAlphaOfColor = qAlpha(~color);
  257. int x = 0;
  258. quint32 *dst = (quint32 *) destPixels;
  259. const __m128i colorVector = _mm_set1_epi32(color);
  260. const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
  261. const __m128i half = _mm_set1_epi16(0x80);
  262. const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
  263. ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
  264. destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
  265. for (; x < length-3; x += 4) {
  266. __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
  267. BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
  268. dstVector = _mm_add_epi8(colorVector, dstVector);
  269. _mm_store_si128((__m128i *)&dst[x], dstVector);
  270. }
  271. for (;x < length; ++x)
  272. destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
  273. }
  274. }
  275. CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[numCompositionFunctions] = {
  276. comp_func_solid_SourceOver_sse2,
  277. comp_func_solid_DestinationOver,
  278. comp_func_solid_Clear,
  279. comp_func_solid_Source,
  280. comp_func_solid_Destination,
  281. comp_func_solid_SourceIn,
  282. comp_func_solid_DestinationIn,
  283. comp_func_solid_SourceOut,
  284. comp_func_solid_DestinationOut,
  285. comp_func_solid_SourceAtop,
  286. comp_func_solid_DestinationAtop,
  287. comp_func_solid_XOR,
  288. comp_func_solid_Plus,
  289. comp_func_solid_Multiply,
  290. comp_func_solid_Screen,
  291. comp_func_solid_Overlay,
  292. comp_func_solid_Darken,
  293. comp_func_solid_Lighten,
  294. comp_func_solid_ColorDodge,
  295. comp_func_solid_ColorBurn,
  296. comp_func_solid_HardLight,
  297. comp_func_solid_SoftLight,
  298. comp_func_solid_Difference,
  299. comp_func_solid_Exclusion,
  300. rasterop_solid_SourceOrDestination,
  301. rasterop_solid_SourceAndDestination,
  302. rasterop_solid_SourceXorDestination,
  303. rasterop_solid_NotSourceAndNotDestination,
  304. rasterop_solid_NotSourceOrNotDestination,
  305. rasterop_solid_NotSourceXorDestination,
  306. rasterop_solid_NotSource,
  307. rasterop_solid_NotSourceAndDestination,
  308. rasterop_solid_SourceAndNotDestination
  309. };
  310. CompositionFunction qt_functionForMode_onlySSE2[numCompositionFunctions] = {
  311. comp_func_SourceOver_sse2,
  312. comp_func_DestinationOver,
  313. comp_func_Clear,
  314. comp_func_Source_sse2,
  315. comp_func_Destination,
  316. comp_func_SourceIn,
  317. comp_func_DestinationIn,
  318. comp_func_SourceOut,
  319. comp_func_DestinationOut,
  320. comp_func_SourceAtop,
  321. comp_func_DestinationAtop,
  322. comp_func_XOR,
  323. comp_func_Plus_sse2,
  324. comp_func_Multiply,
  325. comp_func_Screen,
  326. comp_func_Overlay,
  327. comp_func_Darken,
  328. comp_func_Lighten,
  329. comp_func_ColorDodge,
  330. comp_func_ColorBurn,
  331. comp_func_HardLight,
  332. comp_func_SoftLight,
  333. comp_func_Difference,
  334. comp_func_Exclusion,
  335. rasterop_SourceOrDestination,
  336. rasterop_SourceAndDestination,
  337. rasterop_SourceXorDestination,
  338. rasterop_NotSourceAndNotDestination,
  339. rasterop_NotSourceOrNotDestination,
  340. rasterop_NotSourceXorDestination,
  341. rasterop_NotSource,
  342. rasterop_NotSourceAndDestination,
  343. rasterop_SourceAndNotDestination
  344. };
  345. void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
  346. {
  347. if (count < 3) {
  348. switch (count) {
  349. case 2: *dest++ = value;
  350. case 1: *dest = value;
  351. }
  352. return;
  353. }
  354. const int align = (quintptr)(dest) & 0x3;
  355. switch (align) {
  356. case 2: *dest++ = value; --count;
  357. }
  358. const quint32 value32 = (value << 16) | value;
  359. qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2);
  360. if (count & 0x1)
  361. dest[count - 1] = value;
  362. }
  363. void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
  364. quint32 color,
  365. const uchar *src, int width, int height, int stride)
  366. {
  367. quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
  368. const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32);
  369. const __m128i c128 = _mm_set1_epi32(color);
  370. const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
  371. 0x40404040, 0x80808080);
  372. const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
  373. 0x40404040, 0x00000000);
  374. if (width > 4) {
  375. const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
  376. 0x04040404, 0x08080808);
  377. const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
  378. 0x7c7c7c7c, 0x78787878);
  379. while (height--) {
  380. for (int x = 0; x < width; x += 8) {
  381. const quint8 s = src[x >> 3];
  382. if (!s)
  383. continue;
  384. __m128i mask1 = _mm_set1_epi8(s);
  385. __m128i mask2 = mask1;
  386. mask1 = _mm_and_si128(mask1, maskmask1);
  387. mask1 = _mm_add_epi8(mask1, maskadd1);
  388. _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x));
  389. mask2 = _mm_and_si128(mask2, maskmask2);
  390. mask2 = _mm_add_epi8(mask2, maskadd2);
  391. _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4));
  392. }
  393. dest += destStride;
  394. src += stride;
  395. }
  396. } else {
  397. while (height--) {
  398. const quint8 s = *src;
  399. if (s) {
  400. __m128i mask1 = _mm_set1_epi8(s);
  401. mask1 = _mm_and_si128(mask1, maskmask1);
  402. mask1 = _mm_add_epi8(mask1, maskadd1);
  403. _mm_maskmoveu_si128(c128, mask1, (char*)(dest));
  404. }
  405. dest += destStride;
  406. src += stride;
  407. }
  408. }
  409. }
  410. void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
  411. quint32 color,
  412. const uchar *src, int width, int height, int stride)
  413. {
  414. const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
  415. quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
  416. const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
  417. const __m128i c128 = _mm_set1_epi16(c);
  418. #if defined(Q_CC_MSVC)
  419. # pragma warning(disable: 4309) // truncation of constant value
  420. #endif
  421. const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
  422. 0x1010, 0x2020, 0x4040, 0x8080);
  423. const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
  424. 0x7070, 0x6060, 0x4040, 0x0000);
  425. while (height--) {
  426. for (int x = 0; x < width; x += 8) {
  427. const quint8 s = src[x >> 3];
  428. if (!s)
  429. continue;
  430. __m128i mask = _mm_set1_epi8(s);
  431. mask = _mm_and_si128(mask, maskmask);
  432. mask = _mm_add_epi8(mask, maskadd);
  433. _mm_maskmoveu_si128(c128, mask, (char*)(dest + x));
  434. }
  435. dest += destStride;
  436. src += stride;
  437. }
  438. }
  439. class QSimdSse2
  440. {
  441. public:
  442. typedef __m128i Int32x4;
  443. typedef __m128 Float32x4;
  444. union Vect_buffer_i { Int32x4 v; int i[4]; };
  445. union Vect_buffer_f { Float32x4 v; float f[4]; };
  446. static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); }
  447. static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); }
  448. static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); }
  449. static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); }
  450. static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); }
  451. static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); }
  452. static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); }
  453. static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); }
  454. static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); }
  455. static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); }
  456. static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); }
  457. static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); }
  458. static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); }
  459. static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); }
  460. static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); }
  461. // pre-VS 2008 doesn't have cast intrinsics, whereas 2008 and later requires it
  462. #if defined(Q_CC_MSVC) && _MSC_VER < 1500
  463. static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b)
  464. {
  465. union Convert { Int32x4 vi; Float32x4 vf; } convert;
  466. convert.vf = _mm_cmpgt_ps(a, b);
  467. return convert.vi;
  468. }
  469. #else
  470. static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); }
  471. #endif
  472. };
  473. const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
  474. int y, int x, int length)
  475. {
  476. return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
  477. }
  478. QT_END_NAMESPACE
  479. #endif // QT_HAVE_SSE2