/draw/arch_port.c

https://github.com/Lafriks/mupdf · C · 486 lines · 446 code · 20 blank · 20 comment · 77 complexity · a33d1fce1b656cdc986274ebd815ee34 MD5 · raw file

  1. #include "fitz.h"
  2. typedef unsigned char byte;
  3. /* These C implementations use SWAR (SIMD-within-a-register) techniques. */
  4. #if 0 /* TODO: move into porterduff.c functions */
  5. #define MASK 0xFF00FF00;
  6. static void
  7. path_w4i1o4_32bit(byte *rgba,
  8. byte * restrict src, byte cov, int len, byte * restrict dst)
  9. {
  10. /* COLOR * coverage + DST * (256-coverage) = (COLOR - DST)*coverage + DST*256 */
  11. unsigned int *dst32 = (unsigned int *)(void *)dst;
  12. int alpha = rgba[3];
  13. unsigned int rb = rgba[0] | (rgba[2] << 16);
  14. unsigned int ga = rgba[1] | 0xFF0000;
  15. if (alpha == 0)
  16. return;
  17. if (alpha != 255)
  18. {
  19. alpha += alpha>>7; /* alpha is now in the 0...256 range */
  20. while (len--)
  21. {
  22. unsigned int ca, drb, dga, crb, cga;
  23. cov += *src; *src++ = 0;
  24. ca = cov + (cov>>7); /* ca is in 0...256 range */
  25. ca = (ca*alpha)>>8; /* ca is is in 0...256 range */
  26. drb = *dst32++;
  27. if (ca != 0)
  28. {
  29. dga = drb & MASK;
  30. drb = (drb<<8) & MASK;
  31. cga = ga - (dga>>8);
  32. crb = rb - (drb>>8);
  33. dga += cga * ca;
  34. drb += crb * ca;
  35. dga &= MASK;
  36. drb &= MASK;
  37. drb = dga | (drb>>8);
  38. dst32[-1] = drb;
  39. }
  40. }
  41. }
  42. else
  43. {
  44. while (len--)
  45. {
  46. unsigned int ca, drb, dga, crb, cga;
  47. cov += *src; *src++ = 0;
  48. ca = cov + (cov>>7); /* ca is in 0...256 range */
  49. drb = *dst32++;
  50. if (ca == 0)
  51. continue;
  52. if (ca == 255)
  53. {
  54. drb = (ga<<8) | rb;
  55. }
  56. else
  57. {
  58. dga = drb & MASK;
  59. drb = (drb<<8) & MASK;
  60. cga = ga - (dga>>8);
  61. crb = rb - (drb>>8);
  62. dga += cga * ca;
  63. drb += crb * ca;
  64. dga &= MASK;
  65. drb &= MASK;
  66. drb = dga |(drb>>8);
  67. }
  68. dst32[-1] = drb;
  69. }
  70. }
  71. }
  72. static void
  73. text_w4i1o4_32bit(byte *rgba,
  74. byte * restrict src, int srcw,
  75. byte * restrict dst, int dstw, int w0, int h)
  76. {
  77. unsigned int *dst32 = (unsigned int *)(void *)dst;
  78. unsigned int alpha = rgba[3];
  79. unsigned int rb = rgba[0] | (rgba[2] << 16);
  80. unsigned int ga = rgba[1] | 0xFF0000;
  81. if (alpha == 0)
  82. return;
  83. srcw -= w0;
  84. dstw = (dstw>>2)-w0;
  85. if (alpha != 255)
  86. {
  87. alpha += alpha>>7; /* alpha is now in the 0...256 range */
  88. while (h--)
  89. {
  90. int w = w0;
  91. while (w--)
  92. {
  93. unsigned int ca, drb, dga, crb, cga;
  94. ca = *src++;
  95. drb = *dst32++;
  96. ca += ca>>7;
  97. ca = (ca*alpha)>>8;
  98. if (ca == 0)
  99. continue;
  100. dga = drb & MASK;
  101. drb = (drb<<8) & MASK;
  102. cga = ga - (dga>>8);
  103. crb = rb - (drb>>8);
  104. dga += cga * ca;
  105. drb += crb * ca;
  106. dga &= MASK;
  107. drb &= MASK;
  108. drb = dga | (drb>>8);
  109. dst32[-1] = drb;
  110. }
  111. src += srcw;
  112. dst32 += dstw;
  113. }
  114. }
  115. else
  116. {
  117. while (h--)
  118. {
  119. int w = w0;
  120. while (w--)
  121. {
  122. unsigned int ca, drb, dga, crb, cga;
  123. ca = *src++;
  124. drb = *dst32++;
  125. ca += ca>>7;
  126. if (ca == 0)
  127. continue;
  128. dga = drb & MASK;
  129. drb = (drb<<8) & MASK;
  130. cga = ga - (dga>>8);
  131. crb = rb - (drb>>8);
  132. dga += cga * ca;
  133. drb += crb * ca;
  134. dga &= MASK;
  135. drb &= MASK;
  136. drb = dga | (drb>>8);
  137. dst32[-1] = drb;
  138. }
  139. src += srcw;
  140. dst32 += dstw;
  141. }
  142. }
  143. }
  144. static void
  145. img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
  146. fz_pixmap *image, int u, int v, int fa, int fb)
  147. {
  148. unsigned int *dst32 = (unsigned int *)(void *)dst;
  149. unsigned int *samples = (unsigned int *)(void *)image->samples;
  150. int w = image->w;
  151. int h = image->h-1;
  152. while (len--)
  153. {
  154. unsigned int a, a1, d, d1;
  155. int sa;
  156. cov += *src; *src = 0; src++;
  157. /* (a,a1) = sampleargb(samples, w, h, u, v, argb); */
  158. {
  159. int ui, ui1, vi, vi1, ud, vd;
  160. unsigned int b, b1, c, c1;
  161. ui1 = 1;
  162. ui = u >> 16;
  163. if (ui < 0)
  164. {
  165. ui = 0;
  166. ui1 = 0;
  167. }
  168. else if (ui >= w-1)
  169. {
  170. ui = w-1;
  171. ui1 = 0;
  172. }
  173. vi1 = w;
  174. vi = v >> 16;
  175. if (vi < 0)
  176. {
  177. vi = 0;
  178. vi1 = 0;
  179. }
  180. else if (vi >= h)
  181. {
  182. vi = h;
  183. vi1 = 0;
  184. }
  185. ui += vi*w;
  186. a = samples[ui];
  187. b = samples[ui + ui1];
  188. c = samples[ui + vi1];
  189. d = samples[ui + ui1 + vi1];
  190. ud = (u>>8) & 0xFF;
  191. vd = (v>>8) & 0xFF;
  192. ud = FZ_EXPAND(ud);
  193. vd = FZ_EXPAND(vd);
  194. /* (a,a1) = blend(a,b,ud) */
  195. a1 = a & MASK;
  196. a = (a<<8) & MASK;
  197. b1 = (b>>8) & ~MASK;
  198. b = b & ~MASK;
  199. a = ((b -(a >>8)) * ud + a ) & MASK;
  200. a1 = ((b1-(a1>>8)) * ud + a1) & MASK;
  201. /* (c,c1) = blend(c,d,ud) */
  202. c1 = c & MASK;
  203. c = (c<<8) & MASK;
  204. d1 = (d>>8) & ~MASK;
  205. d = d & ~MASK;
  206. c = ((d -(c >>8)) * ud + c ) & MASK;
  207. c1 = ((d1-(c1>>8)) * ud + c1) & MASK;
  208. /* (a,a1) = blend((a,a1),(c,c1),vd) */
  209. a = (((c >>8)-(a >>8)) * vd + a ) & MASK;
  210. a1 = (((c1>>8)-(a1>>8)) * vd + a1) & MASK;
  211. }
  212. sa = (a1>>24);
  213. sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov));
  214. a1 |= 0xFF000000;
  215. d = *dst32++;
  216. d1 = d & MASK;
  217. d = (d<<8) & MASK;
  218. a = (((a >>8)-(d >>8)) * sa + d ) & MASK;
  219. a1 = (((a1>>8)-(d1>>8)) * sa + d1) & MASK;
  220. dst32[-1] = (a>>8) | a1;
  221. u += fa;
  222. v += fb;
  223. }
  224. }
  225. static void
  226. img_w4i1o4_32bit(byte *rgba, byte * restrict src, byte cov, int len,
  227. byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb)
  228. {
  229. byte *samples = image->samples;
  230. int w = image->w;
  231. int h = image->h-1;
  232. int alpha = FZ_EXPAND(rgba[3]);
  233. unsigned int rb = rgba[0] | (rgba[2] << 16);
  234. unsigned int ga = rgba[1] | 0xFF0000;
  235. unsigned int *dst32 = (unsigned int *)(void *)dst;
  236. if (alpha == 0)
  237. return;
  238. if (alpha != 256)
  239. {
  240. while (len--)
  241. {
  242. unsigned int ca, drb, dga, crb, cga;
  243. unsigned int a, b;
  244. cov += *src; *src = 0; src++;
  245. drb = *dst32++;
  246. ca = FZ_COMBINE(FZ_EXPAND(cov), alpha);
  247. if (ca != 0)
  248. {
  249. int ui, ui1, vi, vi1, ud, vd;
  250. /* a = samplemask(samples, w, h, u, v); */
  251. ui1 = 1;
  252. ui = u >> 16;
  253. if (ui < 0)
  254. {
  255. ui = 0;
  256. ui1 = 0;
  257. }
  258. else if (ui >= w-1)
  259. {
  260. ui = w-1;
  261. ui1 = 0;
  262. }
  263. vi1 = w;
  264. vi = v >> 16;
  265. if (vi < 0)
  266. {
  267. vi = 0;
  268. vi1 = 0;
  269. }
  270. else if (vi >= h)
  271. {
  272. vi = h;
  273. vi1 = 0;
  274. }
  275. ui += vi*w;
  276. a = samples[ui];
  277. b = samples[ui + ui1];
  278. a |= samples[ui + vi1]<<16;
  279. b |= samples[ui + ui1 + vi1]<<16;
  280. ud = (u>>8) & 0xFF;
  281. vd = (v>>8) & 0xFF;
  282. ud = FZ_EXPAND(ud);
  283. vd = FZ_EXPAND(vd);
  284. /* a = blend(a,b,ud) */
  285. a = ((b-a) * ud + (a<<8)) & MASK;
  286. /* a = blend(a,a>>16,vd) */
  287. a = (((a>>24)-(a>>8)) * vd + a);
  288. a = (a>>8) & 0xFF;
  289. ca = FZ_COMBINE(ca, FZ_EXPAND(a));
  290. }
  291. if (ca != 0)
  292. {
  293. dga = drb & MASK;
  294. drb = (drb<<8) & MASK;
  295. cga = ga - (dga>>8);
  296. crb = rb - (drb>>8);
  297. dga += cga * ca;
  298. drb += crb * ca;
  299. dga &= MASK;
  300. drb &= MASK;
  301. drb = dga | (drb>>8);
  302. dst32[-1] = drb;
  303. }
  304. u += fa;
  305. v += fb;
  306. }
  307. }
  308. else
  309. {
  310. while (len--)
  311. {
  312. unsigned int ca, drb, dga, crb, cga;
  313. unsigned int a, b;
  314. cov += *src; *src = 0; src++;
  315. drb = *dst32++;
  316. if (cov != 0)
  317. {
  318. int ui, ui1, vi, vi1, ud, vd;
  319. /* a = samplemask(samples, w, h, u, v); */
  320. ui1 = 1;
  321. ui = u >> 16;
  322. if (ui < 0)
  323. {
  324. ui = 0;
  325. ui1 = 0;
  326. }
  327. else if (ui >= w-1)
  328. {
  329. ui = w-1;
  330. ui1 = 0;
  331. }
  332. vi1 = w;
  333. vi = v >> 16;
  334. if (vi < 0)
  335. {
  336. vi = 0;
  337. vi1 = 0;
  338. }
  339. else if (vi >= h)
  340. {
  341. vi = h;
  342. vi1 = 0;
  343. }
  344. ui += vi*w;
  345. a = samples[ui];
  346. b = samples[ui + ui1];
  347. a |= samples[ui + vi1]<<16;
  348. b |= samples[ui + ui1 + vi1]<<16;
  349. ud = (u>>8) & 0xFF;
  350. vd = (v>>8) & 0xFF;
  351. ud = FZ_EXPAND(ud);
  352. vd = FZ_EXPAND(vd);
  353. /* a = blend(a,b,ud) */
  354. a = ((b-a) * ud + (a<<8)) & MASK;
  355. /* a = blend(a,a>>16,vd) */
  356. a = (((a>>24)-(a>>8)) * vd + a);
  357. a = (a>>8) & 0xFF;
  358. ca = FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(a));
  359. if (ca != 0)
  360. {
  361. if (ca == 256)
  362. {
  363. drb = (ga<<8) | rb;
  364. }
  365. else
  366. {
  367. dga = drb & MASK;
  368. drb = (drb<<8) & MASK;
  369. cga = ga - (dga>>8);
  370. crb = rb - (drb>>8);
  371. dga += cga * ca;
  372. drb += crb * ca;
  373. dga &= MASK;
  374. drb &= MASK;
  375. drb = dga | (drb>>8);
  376. }
  377. dst32[-1] = drb;
  378. }
  379. }
  380. u += fa;
  381. v += fb;
  382. }
  383. }
  384. }
  385. static void
  386. img_1o1_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
  387. fz_pixmap *image, int u, int v, int fa, int fb)
  388. {
  389. byte *samples = image->samples;
  390. int w = image->w;
  391. int h = image->h-1;
  392. while (len--)
  393. {
  394. unsigned int a, b;
  395. cov += *src; *src = 0; src++;
  396. if (cov != 0)
  397. {
  398. int ui, ui1, vi, vi1, ud, vd;
  399. /* sa = samplemask(samples, w, h, u, v); */
  400. ui1 = 1;
  401. ui = u >> 16;
  402. if (ui < 0)
  403. {
  404. ui = 0;
  405. ui1 = 0;
  406. }
  407. else if (ui >= w-1)
  408. {
  409. ui = w-1;
  410. ui1 = 0;
  411. }
  412. vi1 = w;
  413. vi = v >> 16;
  414. if (vi < 0)
  415. {
  416. vi = 0;
  417. vi1 = 0;
  418. }
  419. else if (vi >= h)
  420. {
  421. vi = h;
  422. vi1 = 0;
  423. }
  424. ui += vi*w;
  425. a = samples[ui];
  426. b = samples[ui + ui1];
  427. a |= samples[ui + vi1]<<16;
  428. b |= samples[ui + ui1 + vi1]<<16;
  429. ud = (u>>8) & 0xFF;
  430. vd = (v>>8) & 0xFF;
  431. ud = FZ_EXPAND(ud);
  432. vd = FZ_EXPAND(vd);
  433. /* a = blend(a,b,ud) */
  434. a = ((b-a) * ud + (a<<8)) & MASK;
  435. /* a = blend(a,a>>16,vd) */
  436. a = (((a>>24)-(a>>8)) * vd + a);
  437. a = (a>>8) & 0xFF;
  438. a = FZ_COMBINE(FZ_EXPAND(a), FZ_EXPAND(cov));
  439. if (a != 0)
  440. {
  441. if (a == 256)
  442. dst[0] = 255;
  443. else
  444. dst[0] = FZ_BLEND(255, dst[0], a);
  445. }
  446. }
  447. dst++;
  448. u += fa;
  449. v += fb;
  450. }
  451. }
  452. #endif
  453. void fz_accelerate(void)
  454. {
  455. if (sizeof(int) == 4 && sizeof(unsigned int) == 4 && !fz_is_big_endian())
  456. {
  457. // fz_path_w4i1o4 = path_w4i1o4_32bit;
  458. // fz_text_w4i1o4 = text_w4i1o4_32bit;
  459. // fz_img_4o4 = img_4o4_32bit;
  460. // fz_img_w4i1o4 = img_w4i1o4_32bit;
  461. // fz_img_1o1 = img_1o1_32bit;
  462. }
  463. #ifdef HAVE_CPUDEP
  464. fz_accelerate_arch();
  465. #endif
  466. }