PageRenderTime 53ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/java-1.7.0-openjdk/openjdk/jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageLookUpSIU8S32Func.c

#
C | 1361 lines | 1149 code | 168 blank | 44 comment | 77 complexity | a1882d4a948bf7a4fbb141f561e9b7af MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause-No-Nuclear-License-2014, LGPL-3.0, LGPL-2.0
  1. /*
  2. * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
  3. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4. *
  5. * This code is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 only, as
  7. * published by the Free Software Foundation. Oracle designates this
  8. * particular file as subject to the "Classpath" exception as provided
  9. * by Oracle in the LICENSE file that accompanied this code.
  10. *
  11. * This code is distributed in the hope that it will be useful, but WITHOUT
  12. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  14. * version 2 for more details (a copy is included in the LICENSE file that
  15. * accompanied this code).
  16. *
  17. * You should have received a copy of the GNU General Public License version
  18. * 2 along with this work; if not, write to the Free Software Foundation,
  19. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20. *
  21. * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22. * or visit www.oracle.com if you need additional information or have any
  23. * questions.
  24. */
  25. #include "vis_proto.h"
  26. #include "mlib_image.h"
  27. #include "mlib_v_ImageLookUpFunc.h"
  28. /***************************************************************/
  29. static void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
  30. mlib_s32 *dst,
  31. mlib_s32 xsize,
  32. const mlib_d64 *table);
  33. static void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
  34. mlib_s32 *dst,
  35. mlib_s32 xsize,
  36. const mlib_d64 *table);
  37. static void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
  38. mlib_s32 *dst,
  39. mlib_s32 xsize,
  40. const mlib_s32 **table);
  41. static void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
  42. mlib_s32 *dst,
  43. mlib_s32 xsize,
  44. const mlib_d64 *table);
  45. static void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
  46. mlib_s32 *dst,
  47. mlib_s32 xsize,
  48. const mlib_d64 *table);
  49. static void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
  50. mlib_s32 *dst,
  51. mlib_s32 xsize,
  52. const mlib_s32 **table);
  53. static void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
  54. mlib_s32 *dst,
  55. mlib_s32 xsize,
  56. const mlib_d64 *table);
  57. static void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
  58. mlib_s32 *dst,
  59. mlib_s32 xsize,
  60. const mlib_d64 *table);
  61. static void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
  62. mlib_s32 *dst,
  63. mlib_s32 xsize,
  64. const mlib_s32 **table);
  65. /***************************************************************/
  66. void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
  67. mlib_s32 *dst,
  68. mlib_s32 xsize,
  69. const mlib_d64 *table)
  70. {
  71. mlib_u32 *sa; /* aligned pointer to source data */
  72. mlib_u8 *sp; /* pointer to source data */
  73. mlib_u32 s0; /* source data */
  74. mlib_d64 *dp; /* aligned pointer to destination */
  75. mlib_d64 acc0, acc1; /* destination data */
  76. mlib_d64 acc2, acc3; /* destination data */
  77. mlib_s32 i; /* loop variable */
  78. mlib_u32 s00, s01, s02, s03;
  79. sa = (mlib_u32*)src;
  80. dp = (mlib_d64 *) dst;
  81. i = 0;
  82. if (xsize >= 4) {
  83. s0 = *sa++;
  84. s00 = (s0 >> 21) & 0x7F8;
  85. s01 = (s0 >> 13) & 0x7F8;
  86. #pragma pipeloop(0)
  87. for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
  88. s02 = (s0 >> 5) & 0x7F8;
  89. s03 = (s0 << 3) & 0x7F8;
  90. acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
  91. acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
  92. acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
  93. acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
  94. s0 = *sa++;
  95. s00 = (s0 >> 21) & 0x7F8;
  96. s01 = (s0 >> 13) & 0x7F8;
  97. dp[0] = acc0;
  98. dp[1] = acc1;
  99. dp[2] = acc2;
  100. dp[3] = acc3;
  101. }
  102. s02 = (s0 >> 5) & 0x7F8;
  103. s03 = (s0 << 3) & 0x7F8;
  104. acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
  105. acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
  106. acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
  107. acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
  108. dp[0] = acc0;
  109. dp[1] = acc1;
  110. dp[2] = acc2;
  111. dp[3] = acc3;
  112. dp += 4;
  113. i += 4;
  114. }
  115. sp = (mlib_u8*)sa;
  116. if ( i <= xsize - 2) {
  117. *dp++ = table[sp[0]];
  118. *dp++ = table[sp[1]];
  119. i+=2; sp += 2;
  120. }
  121. if ( i < xsize) *dp++ = table[sp[0]];
  122. }
  123. /***************************************************************/
  124. void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
  125. mlib_s32 *dst,
  126. mlib_s32 xsize,
  127. const mlib_d64 *table)
  128. {
  129. mlib_u32 *sa; /* aligned pointer to source data */
  130. mlib_u8 *sp; /* pointer to source data */
  131. mlib_u32 s0; /* source data */
  132. mlib_s32 *dl; /* pointer to start of destination */
  133. mlib_d64 *dp; /* aligned pointer to destination */
  134. mlib_d64 acc0, acc1; /* destination data */
  135. mlib_d64 acc2, acc3, acc4; /* destination data */
  136. mlib_s32 i; /* loop variable */
  137. mlib_u32 s00, s01, s02, s03;
  138. sa = (mlib_u32*)src;
  139. dl = dst;
  140. dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
  141. vis_alignaddr(dp, 4);
  142. s0 = *sa++;
  143. s00 = (s0 >> 21) & 0x7F8;
  144. acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
  145. *(mlib_f32*)dl = vis_read_hi(acc0);
  146. xsize--;
  147. sp = (mlib_u8*)sa - 3;
  148. if (xsize >= 3) {
  149. s01 = (s0 >> 13) & 0x7F8;
  150. s02 = (s0 >> 5) & 0x7F8;
  151. s03 = (s0 << 3) & 0x7F8;
  152. acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
  153. acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
  154. acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
  155. dp[0] = vis_faligndata(acc0, acc1);
  156. dp[1] = vis_faligndata(acc1, acc2);
  157. dp[2] = vis_faligndata(acc2, acc3);
  158. acc0 = acc3; dp += 3; xsize -= 3;
  159. sp = (mlib_u8*)sa;
  160. }
  161. i = 0;
  162. if (xsize >= 4) {
  163. s0 = *sa++;
  164. s00 = (s0 >> 21) & 0x7F8;
  165. s01 = (s0 >> 13) & 0x7F8;
  166. #pragma pipeloop(0)
  167. for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
  168. s02 = (s0 >> 5) & 0x7F8;
  169. s03 = (s0 << 3) & 0x7F8;
  170. acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
  171. acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
  172. acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
  173. acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
  174. s0 = *sa++;
  175. s00 = (s0 >> 21) & 0x7F8;
  176. s01 = (s0 >> 13) & 0x7F8;
  177. dp[0] = vis_faligndata(acc0, acc1);
  178. dp[1] = vis_faligndata(acc1, acc2);
  179. dp[2] = vis_faligndata(acc2, acc3);
  180. dp[3] = vis_faligndata(acc3, acc4);
  181. acc0 = acc4;
  182. }
  183. s02 = (s0 >> 5) & 0x7F8;
  184. s03 = (s0 << 3) & 0x7F8;
  185. acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
  186. acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
  187. acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
  188. acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
  189. dp[0] = vis_faligndata(acc0, acc1);
  190. dp[1] = vis_faligndata(acc1, acc2);
  191. dp[2] = vis_faligndata(acc2, acc3);
  192. dp[3] = vis_faligndata(acc3, acc4);
  193. acc0 = acc4;
  194. dp += 4;
  195. i += 4;
  196. sp = (mlib_u8*)sa;
  197. }
  198. if ( i <= xsize - 2) {
  199. acc1 = table[sp[0]];
  200. acc2 = table[sp[1]];
  201. *dp++ = vis_faligndata(acc0, acc1);
  202. *dp++ = vis_faligndata(acc1, acc2);
  203. i+=2; sp += 2;
  204. acc0 = acc2;
  205. }
  206. if ( i < xsize) {
  207. acc1 = table[sp[0]];
  208. *dp++ = vis_faligndata(acc0, acc1);
  209. acc0 = acc1;
  210. }
  211. *(mlib_f32*) dp = vis_read_lo(acc0);
  212. }
  213. /***************************************************************/
  214. void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
  215. mlib_s32 *dst,
  216. mlib_s32 xsize,
  217. const mlib_s32 **table)
  218. {
  219. mlib_u32 *sa; /* aligned pointer to source data */
  220. mlib_u8 *sp; /* pointer to source data */
  221. mlib_u32 s0; /* source data */
  222. mlib_f32 *dp; /* aligned pointer to destination */
  223. mlib_f32 acc0, acc1; /* destination data */
  224. mlib_f32 acc2, acc3; /* destination data */
  225. mlib_f32 acc4, acc5; /* destination data */
  226. mlib_f32 acc6, acc7; /* destination data */
  227. mlib_f32 *table0 = (mlib_f32*)table[0];
  228. mlib_f32 *table1 = (mlib_f32*)table[1];
  229. mlib_s32 i; /* loop variable */
  230. mlib_u32 s00, s01, s02, s03;
  231. sa = (mlib_u32*)src;
  232. dp = (mlib_f32*)dst;
  233. i = 0;
  234. if (xsize >= 4) {
  235. s0 = *sa++;
  236. s00 = (s0 >> 22) & 0x3FC;
  237. s01 = (s0 >> 14) & 0x3FC;
  238. #pragma pipeloop(0)
  239. for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
  240. s02 = (s0 >> 6) & 0x3FC;
  241. s03 = (s0 << 2) & 0x3FC;
  242. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  243. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  244. acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  245. acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  246. acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  247. acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  248. acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  249. acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  250. s0 = *sa++;
  251. s00 = (s0 >> 22) & 0x3FC;
  252. s01 = (s0 >> 14) & 0x3FC;
  253. dp[0] = acc0;
  254. dp[1] = acc1;
  255. dp[2] = acc2;
  256. dp[3] = acc3;
  257. dp[4] = acc4;
  258. dp[5] = acc5;
  259. dp[6] = acc6;
  260. dp[7] = acc7;
  261. }
  262. s02 = (s0 >> 6) & 0x3FC;
  263. s03 = (s0 << 2) & 0x3FC;
  264. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  265. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  266. acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  267. acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  268. acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  269. acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  270. acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  271. acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  272. dp[0] = acc0;
  273. dp[1] = acc1;
  274. dp[2] = acc2;
  275. dp[3] = acc3;
  276. dp[4] = acc4;
  277. dp[5] = acc5;
  278. dp[6] = acc6;
  279. dp[7] = acc7;
  280. dp += 8;
  281. i += 4;
  282. }
  283. sp = (mlib_u8*)sa;
  284. if ( i < xsize ) {
  285. *dp++ = table0[sp[0]];
  286. *dp++ = table1[sp[0]];
  287. i++; sp++;
  288. }
  289. if ( i < xsize ) {
  290. *dp++ = table0[sp[0]];
  291. *dp++ = table1[sp[0]];
  292. i++; sp++;
  293. }
  294. if ( i < xsize ) {
  295. *dp++ = table0[sp[0]];
  296. *dp++ = table1[sp[0]];
  297. }
  298. }
  299. /***************************************************************/
  300. void mlib_v_ImageLookUpSI_U8_S32_2(const mlib_u8 *src,
  301. mlib_s32 slb,
  302. mlib_s32 *dst,
  303. mlib_s32 dlb,
  304. mlib_s32 xsize,
  305. mlib_s32 ysize,
  306. const mlib_s32 **table)
  307. {
  308. if ((xsize * ysize) < 600) {
  309. mlib_u8 *sl;
  310. mlib_s32 *dl;
  311. mlib_s32 j, i;
  312. const mlib_s32 *tab0 = table[0];
  313. const mlib_s32 *tab1 = table[1];
  314. sl = (void *)src;
  315. dl = dst;
  316. /* row loop */
  317. for (j = 0; j < ysize; j ++) {
  318. mlib_u8 *sp = sl;
  319. mlib_s32 *dp = dl;
  320. mlib_s32 off, size = xsize;
  321. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  322. off = (off < size) ? off : size;
  323. for (i = 0; i < off; i++) {
  324. *dp++ = tab0[sp[0]];
  325. *dp++ = tab1[sp[0]];
  326. size--; sp++;
  327. }
  328. if (size > 0) {
  329. mlib_v_ImageLookUpSI_U8_S32_2_SMALL(sp, (mlib_s32*)dp, size, table);
  330. }
  331. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  332. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  333. }
  334. } else {
  335. mlib_u8 *sl;
  336. mlib_s32 *dl;
  337. mlib_d64 dtab[256];
  338. mlib_u32 *tab;
  339. mlib_u32 *tab0 = (mlib_u32*)table[0];
  340. mlib_u32 *tab1 = (mlib_u32*)table[1];
  341. mlib_s32 i, j;
  342. mlib_u32 s0, s1;
  343. tab = (mlib_u32*)dtab;
  344. s0 = tab0[0];
  345. s1 = tab1[0];
  346. for (i = 0; i < 255; i++) {
  347. tab[2*i] = s0;
  348. tab[2*i+1] = s1;
  349. s0 = tab0[i+1];
  350. s1 = tab1[i+1];
  351. }
  352. tab[510] = s0;
  353. tab[511] = s1;
  354. sl = (void *)src;
  355. dl = dst;
  356. /* row loop */
  357. for (j = 0; j < ysize; j ++) {
  358. mlib_u8 *sp = sl;
  359. mlib_u32 *dp = (mlib_u32*)dl;
  360. mlib_s32 off, size = xsize;
  361. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  362. off = (off < size) ? off : size;
  363. #pragma pipeloop(0)
  364. for (i = 0; i < off; i++) {
  365. dp[0] = tab0[sp[0]];
  366. dp[1] = tab1[sp[0]];
  367. dp += 2; sp++;
  368. }
  369. size -= off;
  370. if (size > 0) {
  371. if (((mlib_addr)dp & 7) == 0) {
  372. mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
  373. } else {
  374. mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
  375. }
  376. }
  377. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  378. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  379. }
  380. }
  381. }
  382. /***************************************************************/
  383. void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
  384. mlib_s32 *dst,
  385. mlib_s32 xsize,
  386. const mlib_d64 *table)
  387. {
  388. mlib_u8 *sp; /* pointer to source data */
  389. mlib_u32 *sa; /* aligned pointer to source data */
  390. mlib_u32 s0; /* source data */
  391. mlib_s32 *dl; /* pointer to start of destination */
  392. mlib_d64 *dp; /* aligned pointer to destination */
  393. mlib_d64 t0, t1, t2, t3; /* destination data */
  394. mlib_d64 t4, t5, t6, t7; /* destination data */
  395. mlib_s32 i; /* loop variable */
  396. mlib_s32 *ptr;
  397. mlib_u32 s00, s01, s02, s03;
  398. dl = dst;
  399. sp = (void *)src;
  400. dp = (mlib_d64 *) dl;
  401. sa = (mlib_u32*)sp;
  402. vis_alignaddr((void *) 0, 4);
  403. i = 0;
  404. if (xsize >= 4) {
  405. s0 = *sa++;
  406. s00 = (s0 >> 20) & 0xFF0;
  407. s01 = (s0 >> 12) & 0xFF0;
  408. #pragma pipeloop(0)
  409. for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
  410. s02 = (s0 >> 4) & 0xFF0;
  411. s03 = (s0 << 4) & 0xFF0;
  412. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  413. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  414. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  415. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  416. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  417. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  418. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  419. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  420. t1 = vis_faligndata(t1, t1);
  421. t1 = vis_faligndata(t1, t2);
  422. t2 = vis_faligndata(t2, t3);
  423. t5 = vis_faligndata(t5, t5);
  424. t5 = vis_faligndata(t5, t6);
  425. t6 = vis_faligndata(t6, t7);
  426. s0 = *sa++;
  427. s00 = (s0 >> 20) & 0xFF0;
  428. s01 = (s0 >> 12) & 0xFF0;
  429. dp[0] = t0;
  430. dp[1] = t1;
  431. dp[2] = t2;
  432. dp[3] = t4;
  433. dp[4] = t5;
  434. dp[5] = t6;
  435. }
  436. s02 = (s0 >> 4) & 0xFF0;
  437. s03 = (s0 << 4) & 0xFF0;
  438. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  439. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  440. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  441. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  442. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  443. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  444. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  445. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  446. t1 = vis_faligndata(t1, t1);
  447. t1 = vis_faligndata(t1, t2);
  448. t2 = vis_faligndata(t2, t3);
  449. t5 = vis_faligndata(t5, t5);
  450. t5 = vis_faligndata(t5, t6);
  451. t6 = vis_faligndata(t6, t7);
  452. dp[0] = t0;
  453. dp[1] = t1;
  454. dp[2] = t2;
  455. dp[3] = t4;
  456. dp[4] = t5;
  457. dp[5] = t6;
  458. i += 4; dp += 6;
  459. }
  460. dl = (mlib_s32*)dp;
  461. sp = (mlib_u8*)sa;
  462. #pragma pipeloop(0)
  463. for (; i < xsize; i++) {
  464. ptr = (mlib_s32*)(table + (sp[0] << 1));
  465. dl[0] = ptr[0];
  466. dl[1] = ptr[1];
  467. dl[2] = ptr[2];
  468. dl += 3; sp ++;
  469. }
  470. }
  471. /***************************************************************/
  472. void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
  473. mlib_s32 *dst,
  474. mlib_s32 xsize,
  475. const mlib_d64 *table)
  476. {
  477. mlib_u8 *sp; /* pointer to source data */
  478. mlib_u32 *sa; /* aligned pointer to source data */
  479. mlib_u32 s0; /* source data */
  480. mlib_s32 *dl; /* pointer to start of destination */
  481. mlib_d64 *dp; /* aligned pointer to destination */
  482. mlib_d64 t0, t1, t2, t3; /* destination data */
  483. mlib_d64 t4, t5, t6, t7; /* destination data */
  484. mlib_s32 i; /* loop variable */
  485. mlib_s32 *ptr;
  486. mlib_u32 s00, s01, s02, s03;
  487. dl = dst;
  488. sp = (void *)src;
  489. dp = (mlib_d64 *) ((mlib_addr) dl & (~7));
  490. sa = (mlib_u32*)sp;
  491. vis_alignaddr((void *) 0, 4);
  492. i = 0;
  493. if (xsize >= 4) {
  494. s0 = *sa++;
  495. s00 = (s0 >> 20) & 0xFF0;
  496. s01 = (s0 >> 12) & 0xFF0;
  497. #pragma pipeloop(0)
  498. for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
  499. s02 = (s0 >> 4) & 0xFF0;
  500. s03 = (s0 << 4) & 0xFF0;
  501. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  502. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  503. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  504. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  505. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  506. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  507. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  508. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  509. t1 = vis_faligndata(t0, t1);
  510. t3 = vis_faligndata(t3, t3);
  511. t3 = vis_faligndata(t3, t4);
  512. t4 = vis_faligndata(t4, t5);
  513. s0 = *sa++;
  514. s00 = (s0 >> 20) & 0xFF0;
  515. s01 = (s0 >> 12) & 0xFF0;
  516. *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
  517. dp[1] = t1;
  518. dp[2] = t2;
  519. dp[3] = t3;
  520. dp[4] = t4;
  521. dp[5] = t6;
  522. *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
  523. }
  524. s02 = (s0 >> 4) & 0xFF0;
  525. s03 = (s0 << 4) & 0xFF0;
  526. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  527. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  528. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  529. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  530. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  531. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  532. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  533. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  534. t1 = vis_faligndata(t0, t1);
  535. t3 = vis_faligndata(t3, t3);
  536. t3 = vis_faligndata(t3, t4);
  537. t4 = vis_faligndata(t4, t5);
  538. *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
  539. dp[1] = t1;
  540. dp[2] = t2;
  541. dp[3] = t3;
  542. dp[4] = t4;
  543. dp[5] = t6;
  544. *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
  545. i += 4; dp += 6;
  546. }
  547. dl = (mlib_s32*)dp + 1;
  548. sp = (mlib_u8*)sa;
  549. #pragma pipeloop(0)
  550. for (; i < xsize; i++) {
  551. ptr = (mlib_s32*)(table + (sp[0] << 1));
  552. dl[0] = ptr[0];
  553. dl[1] = ptr[1];
  554. dl[2] = ptr[2];
  555. dl += 3; sp ++;
  556. }
  557. }
  558. /***************************************************************/
  559. void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
  560. mlib_s32 *dst,
  561. mlib_s32 xsize,
  562. const mlib_s32 **table)
  563. {
  564. mlib_u32 *sa; /* aligned pointer to source data */
  565. mlib_u8 *sp; /* pointer to source data */
  566. mlib_u32 s0; /* source data */
  567. mlib_f32 *dp; /* aligned pointer to destination */
  568. mlib_f32 acc0, acc1; /* destination data */
  569. mlib_f32 acc2, acc3; /* destination data */
  570. mlib_f32 acc4, acc5; /* destination data */
  571. mlib_f32 acc6, acc7; /* destination data */
  572. mlib_f32 acc8, acc9; /* destination data */
  573. mlib_f32 acc10, acc11; /* destination data */
  574. mlib_f32 *table0 = (mlib_f32*)table[0];
  575. mlib_f32 *table1 = (mlib_f32*)table[1];
  576. mlib_f32 *table2 = (mlib_f32*)table[2];
  577. mlib_s32 i; /* loop variable */
  578. mlib_u32 s00, s01, s02, s03;
  579. sa = (mlib_u32*)src;
  580. dp = (mlib_f32*)dst;
  581. i = 0;
  582. if (xsize >= 4) {
  583. s0 = *sa++;
  584. s00 = (s0 >> 22) & 0x3FC;
  585. s01 = (s0 >> 14) & 0x3FC;
  586. #pragma pipeloop(0)
  587. for(i = 0; i <= xsize - 8; i+=4, dp += 12) {
  588. s02 = (s0 >> 6) & 0x3FC;
  589. s03 = (s0 << 2) & 0x3FC;
  590. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  591. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  592. acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
  593. acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  594. acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  595. acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
  596. acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  597. acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  598. acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
  599. acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  600. acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  601. acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
  602. s0 = *sa++;
  603. s00 = (s0 >> 22) & 0x3FC;
  604. s01 = (s0 >> 14) & 0x3FC;
  605. dp[0] = acc0;
  606. dp[1] = acc1;
  607. dp[2] = acc2;
  608. dp[3] = acc3;
  609. dp[4] = acc4;
  610. dp[5] = acc5;
  611. dp[6] = acc6;
  612. dp[7] = acc7;
  613. dp[8] = acc8;
  614. dp[9] = acc9;
  615. dp[10] = acc10;
  616. dp[11] = acc11;
  617. }
  618. s02 = (s0 >> 6) & 0x3FC;
  619. s03 = (s0 << 2) & 0x3FC;
  620. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  621. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  622. acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
  623. acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  624. acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  625. acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
  626. acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  627. acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  628. acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
  629. acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  630. acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  631. acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
  632. dp[0] = acc0;
  633. dp[1] = acc1;
  634. dp[2] = acc2;
  635. dp[3] = acc3;
  636. dp[4] = acc4;
  637. dp[5] = acc5;
  638. dp[6] = acc6;
  639. dp[7] = acc7;
  640. dp[8] = acc8;
  641. dp[9] = acc9;
  642. dp[10] = acc10;
  643. dp[11] = acc11;
  644. dp += 12;
  645. i += 4;
  646. }
  647. sp = (mlib_u8*)sa;
  648. if ( i < xsize ) {
  649. *dp++ = table0[sp[0]];
  650. *dp++ = table1[sp[0]];
  651. *dp++ = table2[sp[0]];
  652. i++; sp++;
  653. }
  654. if ( i < xsize ) {
  655. *dp++ = table0[sp[0]];
  656. *dp++ = table1[sp[0]];
  657. *dp++ = table2[sp[0]];
  658. i++; sp++;
  659. }
  660. if ( i < xsize ) {
  661. *dp++ = table0[sp[0]];
  662. *dp++ = table1[sp[0]];
  663. *dp++ = table2[sp[0]];
  664. }
  665. }
  666. /***************************************************************/
  667. void mlib_v_ImageLookUpSI_U8_S32_3(const mlib_u8 *src,
  668. mlib_s32 slb,
  669. mlib_s32 *dst,
  670. mlib_s32 dlb,
  671. mlib_s32 xsize,
  672. mlib_s32 ysize,
  673. const mlib_s32 **table)
  674. {
  675. if ((xsize * ysize) < 600) {
  676. mlib_u8 *sl;
  677. mlib_s32 *dl;
  678. mlib_s32 j, i;
  679. const mlib_s32 *tab0 = table[0];
  680. const mlib_s32 *tab1 = table[1];
  681. const mlib_s32 *tab2 = table[2];
  682. sl = (void *)src;
  683. dl = dst;
  684. /* row loop */
  685. for (j = 0; j < ysize; j ++) {
  686. mlib_u8 *sp = sl;
  687. mlib_s32 *dp = dl;
  688. mlib_s32 off, size = xsize;
  689. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  690. off = (off < size) ? off : size;
  691. for (i = 0; i < off; i++) {
  692. *dp++ = tab0[sp[0]];
  693. *dp++ = tab1[sp[0]];
  694. *dp++ = tab2[sp[0]];
  695. size--; sp++;
  696. }
  697. if (size > 0) {
  698. mlib_v_ImageLookUpSI_U8_S32_3_SMALL(sp, (mlib_s32*)dp, size, table);
  699. }
  700. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  701. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  702. }
  703. } else {
  704. mlib_u8 *sl;
  705. mlib_s32 *dl;
  706. mlib_d64 dtab[512];
  707. mlib_u32 *tab;
  708. mlib_u32 *tab0 = (mlib_u32*)table[0];
  709. mlib_u32 *tab1 = (mlib_u32*)table[1];
  710. mlib_u32 *tab2 = (mlib_u32*)table[2];
  711. mlib_s32 i, j;
  712. mlib_u32 s0, s1, s2;
  713. tab = (mlib_u32*)dtab;
  714. s0 = tab0[0];
  715. s1 = tab1[0];
  716. s2 = tab2[0];
  717. for (i = 0; i < 255; i++) {
  718. tab[4*i] = s0;
  719. tab[4*i+1] = s1;
  720. tab[4*i+2] = s2;
  721. s0 = tab0[i+1];
  722. s1 = tab1[i+1];
  723. s2 = tab2[i+1];
  724. }
  725. tab[1020] = s0;
  726. tab[1021] = s1;
  727. tab[1022] = s2;
  728. sl = (void *)src;
  729. dl = dst;
  730. /* row loop */
  731. for (j = 0; j < ysize; j ++) {
  732. mlib_u8 *sp = sl;
  733. mlib_u32 *dp = (mlib_u32*)dl;
  734. mlib_s32 off, size = xsize;
  735. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  736. off = (off < size) ? off : size;
  737. #pragma pipeloop(0)
  738. for (i = 0; i < off; i++) {
  739. dp[0] = tab0[sp[0]];
  740. dp[1] = tab1[sp[0]];
  741. dp[2] = tab2[sp[0]];
  742. dp += 3; sp++;
  743. }
  744. size -= off;
  745. if (size > 0) {
  746. if (((mlib_addr)dp & 7) == 0) {
  747. mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
  748. } else {
  749. mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
  750. }
  751. }
  752. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  753. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  754. }
  755. }
  756. }
  757. /***************************************************************/
  758. void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
  759. mlib_s32 *dst,
  760. mlib_s32 xsize,
  761. const mlib_d64 *table)
  762. {
  763. mlib_u32 *sa; /* aligned pointer to source data */
  764. mlib_u8 *sp; /* pointer to source data */
  765. mlib_u32 s0; /* source data */
  766. mlib_d64 *dp; /* aligned pointer to destination */
  767. mlib_d64 t0, t1, t2, t3; /* destination data */
  768. mlib_d64 t4, t5, t6, t7; /* destination data */
  769. mlib_s32 i; /* loop variable */
  770. mlib_u32 s00, s01, s02, s03;
  771. sa = (mlib_u32*)src;
  772. dp = (mlib_d64 *) dst;
  773. i = 0;
  774. if (xsize >= 4) {
  775. s0 = *sa++;
  776. s00 = (s0 >> 20) & 0xFF0;
  777. s01 = (s0 >> 12) & 0xFF0;
  778. #pragma pipeloop(0)
  779. for(i = 0; i <= xsize - 8; i+=4, dp+=8) {
  780. s02 = (s0 >> 4) & 0xFF0;
  781. s03 = (s0 << 4) & 0xFF0;
  782. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  783. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  784. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  785. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  786. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  787. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  788. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  789. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  790. s0 = *sa++;
  791. s00 = (s0 >> 20) & 0xFF0;
  792. s01 = (s0 >> 12) & 0xFF0;
  793. dp[0] = t0;
  794. dp[1] = t1;
  795. dp[2] = t2;
  796. dp[3] = t3;
  797. dp[4] = t4;
  798. dp[5] = t5;
  799. dp[6] = t6;
  800. dp[7] = t7;
  801. }
  802. s02 = (s0 >> 4) & 0xFF0;
  803. s03 = (s0 << 4) & 0xFF0;
  804. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  805. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  806. t2 = *(mlib_d64*)((mlib_u8*)table + s01);
  807. t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  808. t4 = *(mlib_d64*)((mlib_u8*)table + s02);
  809. t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  810. t6 = *(mlib_d64*)((mlib_u8*)table + s03);
  811. t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  812. dp[0] = t0;
  813. dp[1] = t1;
  814. dp[2] = t2;
  815. dp[3] = t3;
  816. dp[4] = t4;
  817. dp[5] = t5;
  818. dp[6] = t6;
  819. dp[7] = t7;
  820. dp += 8;
  821. i += 4;
  822. }
  823. sp = (mlib_u8*)sa;
  824. if ( i < xsize ) {
  825. *dp++ = table[2*sp[0]];
  826. *dp++ = table[2*sp[0] + 1];
  827. i++; sp++;
  828. }
  829. if ( i < xsize ) {
  830. *dp++ = table[2*sp[0]];
  831. *dp++ = table[2*sp[0] + 1];
  832. i++; sp++;
  833. }
  834. if ( i < xsize ) {
  835. *dp++ = table[2*sp[0]];
  836. *dp++ = table[2*sp[0] + 1];
  837. }
  838. }
  839. /***************************************************************/
  840. void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
  841. mlib_s32 *dst,
  842. mlib_s32 xsize,
  843. const mlib_d64 *table)
  844. {
  845. mlib_u32 *sa; /* aligned pointer to source data */
  846. mlib_u8 *sp; /* pointer to source data */
  847. mlib_u32 s0; /* source data */
  848. mlib_s32 *dl; /* pointer to start of destination */
  849. mlib_d64 *dp; /* aligned pointer to destination */
  850. mlib_d64 t0, t1, t2, t3; /* destination data */
  851. mlib_d64 t4, t5, t6, t7, t8; /* destination data */
  852. mlib_s32 i; /* loop variable */
  853. mlib_u32 s00, s01, s02, s03;
  854. sa = (mlib_u32*)src;
  855. dl = dst;
  856. dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
  857. vis_alignaddr(dp, 4);
  858. s0 = *sa++;
  859. s00 = (s0 >> 20) & 0xFF0;
  860. t0 = *(mlib_d64*)((mlib_u8*)table + s00);
  861. t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  862. *(mlib_f32*)dl = vis_read_hi(t0);
  863. dp[0] = vis_faligndata(t0, t1);
  864. t0 = t1;
  865. xsize--; dp++;
  866. sp = (mlib_u8*)sa - 3;
  867. if (xsize >= 3) {
  868. s01 = (s0 >> 12) & 0xFF0;
  869. s02 = (s0 >> 4) & 0xFF0;
  870. s03 = (s0 << 4) & 0xFF0;
  871. t1 = *(mlib_d64*)((mlib_u8*)table + s01);
  872. t2 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  873. t3 = *(mlib_d64*)((mlib_u8*)table + s02);
  874. t4 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  875. t5 = *(mlib_d64*)((mlib_u8*)table + s03);
  876. t6 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  877. dp[0] = vis_faligndata(t0, t1);
  878. dp[1] = vis_faligndata(t1, t2);
  879. dp[2] = vis_faligndata(t2, t3);
  880. dp[3] = vis_faligndata(t3, t4);
  881. dp[4] = vis_faligndata(t4, t5);
  882. dp[5] = vis_faligndata(t5, t6);
  883. t0 = t6; dp += 6; xsize -= 3;
  884. sp = (mlib_u8*)sa;
  885. }
  886. i = 0;
  887. if (xsize >= 4) {
  888. s0 = *sa++;
  889. s00 = (s0 >> 20) & 0xFF0;
  890. s01 = (s0 >> 12) & 0xFF0;
  891. #pragma pipeloop(0)
  892. for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
  893. s02 = (s0 >> 4) & 0xFF0;
  894. s03 = (s0 << 4) & 0xFF0;
  895. t1 = *(mlib_d64*)((mlib_u8*)table + s00);
  896. t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  897. t3 = *(mlib_d64*)((mlib_u8*)table + s01);
  898. t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  899. t5 = *(mlib_d64*)((mlib_u8*)table + s02);
  900. t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  901. t7 = *(mlib_d64*)((mlib_u8*)table + s03);
  902. t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  903. s0 = *sa++;
  904. s00 = (s0 >> 20) & 0xFF0;
  905. s01 = (s0 >> 12) & 0xFF0;
  906. dp[0] = vis_faligndata(t0, t1);
  907. dp[1] = vis_faligndata(t1, t2);
  908. dp[2] = vis_faligndata(t2, t3);
  909. dp[3] = vis_faligndata(t3, t4);
  910. dp[4] = vis_faligndata(t4, t5);
  911. dp[5] = vis_faligndata(t5, t6);
  912. dp[6] = vis_faligndata(t6, t7);
  913. dp[7] = vis_faligndata(t7, t8);
  914. t0 = t8;
  915. }
  916. s02 = (s0 >> 4) & 0xFF0;
  917. s03 = (s0 << 4) & 0xFF0;
  918. t1 = *(mlib_d64*)((mlib_u8*)table + s00);
  919. t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
  920. t3 = *(mlib_d64*)((mlib_u8*)table + s01);
  921. t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
  922. t5 = *(mlib_d64*)((mlib_u8*)table + s02);
  923. t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
  924. t7 = *(mlib_d64*)((mlib_u8*)table + s03);
  925. t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
  926. dp[0] = vis_faligndata(t0, t1);
  927. dp[1] = vis_faligndata(t1, t2);
  928. dp[2] = vis_faligndata(t2, t3);
  929. dp[3] = vis_faligndata(t3, t4);
  930. dp[4] = vis_faligndata(t4, t5);
  931. dp[5] = vis_faligndata(t5, t6);
  932. dp[6] = vis_faligndata(t6, t7);
  933. dp[7] = vis_faligndata(t7, t8);
  934. t0 = t8;
  935. dp += 8;
  936. i += 4;
  937. sp = (mlib_u8*)sa;
  938. }
  939. if ( i < xsize ) {
  940. t1 = table[2*sp[0]];
  941. t2 = table[2*sp[0] + 1];
  942. *dp++ = vis_faligndata(t0, t1);
  943. *dp++ = vis_faligndata(t1, t2);
  944. i++; sp++;
  945. t0 = t2;
  946. }
  947. if ( i < xsize ) {
  948. t1 = table[2*sp[0]];
  949. t2 = table[2*sp[0] + 1];
  950. *dp++ = vis_faligndata(t0, t1);
  951. *dp++ = vis_faligndata(t1, t2);
  952. i++; sp++;
  953. t0 = t2;
  954. }
  955. if ( i < xsize ) {
  956. t1 = table[2*sp[0]];
  957. t2 = table[2*sp[0] + 1];
  958. *dp++ = vis_faligndata(t0, t1);
  959. *dp++ = vis_faligndata(t1, t2);
  960. t0 = t2;
  961. }
  962. *(mlib_f32*) dp = vis_read_lo(t0);
  963. }
  964. /***************************************************************/
  965. void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
  966. mlib_s32 *dst,
  967. mlib_s32 xsize,
  968. const mlib_s32 **table)
  969. {
  970. mlib_u32 *sa; /* aligned pointer to source data */
  971. mlib_u8 *sp; /* pointer to source data */
  972. mlib_u32 s0; /* source data */
  973. mlib_f32 *dp; /* aligned pointer to destination */
  974. mlib_f32 acc0, acc1; /* destination data */
  975. mlib_f32 acc2, acc3; /* destination data */
  976. mlib_f32 acc4, acc5; /* destination data */
  977. mlib_f32 acc6, acc7; /* destination data */
  978. mlib_f32 acc8, acc9; /* destination data */
  979. mlib_f32 acc10, acc11; /* destination data */
  980. mlib_f32 acc12, acc13; /* destination data */
  981. mlib_f32 acc14, acc15; /* destination data */
  982. mlib_f32 *table0 = (mlib_f32*)table[0];
  983. mlib_f32 *table1 = (mlib_f32*)table[1];
  984. mlib_f32 *table2 = (mlib_f32*)table[2];
  985. mlib_f32 *table3 = (mlib_f32*)table[3];
  986. mlib_s32 i; /* loop variable */
  987. mlib_u32 s00, s01, s02, s03;
  988. sa = (mlib_u32*)src;
  989. dp = (mlib_f32*)dst;
  990. i = 0;
  991. if (xsize >= 4) {
  992. s0 = *sa++;
  993. s00 = (s0 >> 22) & 0x3FC;
  994. s01 = (s0 >> 14) & 0x3FC;
  995. #pragma pipeloop(0)
  996. for(i = 0; i <= xsize - 8; i+=4, dp += 16) {
  997. s02 = (s0 >> 6) & 0x3FC;
  998. s03 = (s0 << 2) & 0x3FC;
  999. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  1000. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  1001. acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
  1002. acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
  1003. acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  1004. acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  1005. acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
  1006. acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
  1007. acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  1008. acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  1009. acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
  1010. acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
  1011. acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  1012. acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  1013. acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
  1014. acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
  1015. s0 = *sa++;
  1016. s00 = (s0 >> 22) & 0x3FC;
  1017. s01 = (s0 >> 14) & 0x3FC;
  1018. dp[0] = acc0;
  1019. dp[1] = acc1;
  1020. dp[2] = acc2;
  1021. dp[3] = acc3;
  1022. dp[4] = acc4;
  1023. dp[5] = acc5;
  1024. dp[6] = acc6;
  1025. dp[7] = acc7;
  1026. dp[8] = acc8;
  1027. dp[9] = acc9;
  1028. dp[10] = acc10;
  1029. dp[11] = acc11;
  1030. dp[12] = acc12;
  1031. dp[13] = acc13;
  1032. dp[14] = acc14;
  1033. dp[15] = acc15;
  1034. }
  1035. s02 = (s0 >> 6) & 0x3FC;
  1036. s03 = (s0 << 2) & 0x3FC;
  1037. acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
  1038. acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
  1039. acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
  1040. acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
  1041. acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
  1042. acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
  1043. acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
  1044. acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
  1045. acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
  1046. acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
  1047. acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
  1048. acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
  1049. acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
  1050. acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
  1051. acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
  1052. acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
  1053. dp[0] = acc0;
  1054. dp[1] = acc1;
  1055. dp[2] = acc2;
  1056. dp[3] = acc3;
  1057. dp[4] = acc4;
  1058. dp[5] = acc5;
  1059. dp[6] = acc6;
  1060. dp[7] = acc7;
  1061. dp[8] = acc8;
  1062. dp[9] = acc9;
  1063. dp[10] = acc10;
  1064. dp[11] = acc11;
  1065. dp[12] = acc12;
  1066. dp[13] = acc13;
  1067. dp[14] = acc14;
  1068. dp[15] = acc15;
  1069. dp += 16;
  1070. i += 4;
  1071. }
  1072. sp = (mlib_u8*)sa;
  1073. if ( i < xsize ) {
  1074. *dp++ = table0[sp[0]];
  1075. *dp++ = table1[sp[0]];
  1076. *dp++ = table2[sp[0]];
  1077. *dp++ = table3[sp[0]];
  1078. i++; sp++;
  1079. }
  1080. if ( i < xsize ) {
  1081. *dp++ = table0[sp[0]];
  1082. *dp++ = table1[sp[0]];
  1083. *dp++ = table2[sp[0]];
  1084. *dp++ = table3[sp[0]];
  1085. i++; sp++;
  1086. }
  1087. if ( i < xsize ) {
  1088. *dp++ = table0[sp[0]];
  1089. *dp++ = table1[sp[0]];
  1090. *dp++ = table2[sp[0]];
  1091. *dp++ = table3[sp[0]];
  1092. }
  1093. }
  1094. /***************************************************************/
  1095. void mlib_v_ImageLookUpSI_U8_S32_4(const mlib_u8 *src,
  1096. mlib_s32 slb,
  1097. mlib_s32 *dst,
  1098. mlib_s32 dlb,
  1099. mlib_s32 xsize,
  1100. mlib_s32 ysize,
  1101. const mlib_s32 **table)
  1102. {
  1103. if ((xsize * ysize) < 600) {
  1104. mlib_u8 *sl;
  1105. mlib_s32 *dl;
  1106. mlib_s32 j, i;
  1107. const mlib_s32 *tab0 = table[0];
  1108. const mlib_s32 *tab1 = table[1];
  1109. const mlib_s32 *tab2 = table[2];
  1110. const mlib_s32 *tab3 = table[3];
  1111. sl = (void *)src;
  1112. dl = dst;
  1113. /* row loop */
  1114. for (j = 0; j < ysize; j ++) {
  1115. mlib_u8 *sp = sl;
  1116. mlib_s32 *dp = dl;
  1117. mlib_s32 off, size = xsize;
  1118. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  1119. off = (off < size) ? off : size;
  1120. for (i = 0; i < off; i++) {
  1121. *dp++ = tab0[sp[0]];
  1122. *dp++ = tab1[sp[0]];
  1123. *dp++ = tab2[sp[0]];
  1124. *dp++ = tab3[sp[0]];
  1125. size--; sp++;
  1126. }
  1127. if (size > 0) {
  1128. mlib_v_ImageLookUpSI_U8_S32_4_SMALL(sp, (mlib_s32*)dp, size, table);
  1129. }
  1130. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  1131. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  1132. }
  1133. } else {
  1134. mlib_u8 *sl;
  1135. mlib_s32 *dl;
  1136. mlib_d64 dtab[512];
  1137. mlib_u32 *tab;
  1138. mlib_u32 *tab0 = (mlib_u32*)table[0];
  1139. mlib_u32 *tab1 = (mlib_u32*)table[1];
  1140. mlib_u32 *tab2 = (mlib_u32*)table[2];
  1141. mlib_u32 *tab3 = (mlib_u32*)table[3];
  1142. mlib_s32 i, j;
  1143. mlib_u32 s0, s1, s2, s3;
  1144. tab = (mlib_u32*)dtab;
  1145. s0 = tab0[0];
  1146. s1 = tab1[0];
  1147. s2 = tab2[0];
  1148. s3 = tab3[0];
  1149. for (i = 0; i < 255; i++) {
  1150. tab[4*i] = s0;
  1151. tab[4*i+1] = s1;
  1152. tab[4*i+2] = s2;
  1153. tab[4*i+3] = s3;
  1154. s0 = tab0[i+1];
  1155. s1 = tab1[i+1];
  1156. s2 = tab2[i+1];
  1157. s3 = tab3[i+1];
  1158. }
  1159. tab[1020] = s0;
  1160. tab[1021] = s1;
  1161. tab[1022] = s2;
  1162. tab[1023] = s3;
  1163. sl = (void *)src;
  1164. dl = dst;
  1165. /* row loop */
  1166. for (j = 0; j < ysize; j ++) {
  1167. mlib_u8 *sp = sl;
  1168. mlib_u32 *dp = (mlib_u32*)dl;
  1169. mlib_s32 off, size = xsize;
  1170. off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
  1171. off = (off < size) ? off : size;
  1172. #pragma pipeloop(0)
  1173. for (i = 0; i < off; i++) {
  1174. dp[0] = tab0[sp[0]];
  1175. dp[1] = tab1[sp[0]];
  1176. dp[2] = tab2[sp[0]];
  1177. dp[3] = tab3[sp[0]];
  1178. dp += 4; sp++;
  1179. }
  1180. size -= off;
  1181. if (size > 0) {
  1182. if (((mlib_addr)dp & 7) == 0) {
  1183. mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
  1184. } else {
  1185. mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
  1186. }
  1187. }
  1188. sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
  1189. dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
  1190. }
  1191. }
  1192. }
  1193. /***************************************************************/