/java-1.7.0-openjdk/openjdk/jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageLookUpSIU8S32Func.c
C | 1361 lines | 1149 code | 168 blank | 44 comment | 77 complexity | a1882d4a948bf7a4fbb141f561e9b7af MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause-No-Nuclear-License-2014, LGPL-3.0, LGPL-2.0
- /*
- * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
- #include "vis_proto.h"
- #include "mlib_image.h"
- #include "mlib_v_ImageLookUpFunc.h"
- /***************************************************************/
- static void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table);
- static void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table);
- static void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table);
- static void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table);
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 acc0, acc1; /* destination data */
- mlib_d64 acc2, acc3; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dp = (mlib_d64 *) dst;
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 21) & 0x7F8;
- s01 = (s0 >> 13) & 0x7F8;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
- s02 = (s0 >> 5) & 0x7F8;
- s03 = (s0 << 3) & 0x7F8;
- acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
- acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
- acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
- acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
- s0 = *sa++;
- s00 = (s0 >> 21) & 0x7F8;
- s01 = (s0 >> 13) & 0x7F8;
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- }
- s02 = (s0 >> 5) & 0x7F8;
- s03 = (s0 << 3) & 0x7F8;
- acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
- acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
- acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
- acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp += 4;
- i += 4;
- }
- sp = (mlib_u8*)sa;
- if ( i <= xsize - 2) {
- *dp++ = table[sp[0]];
- *dp++ = table[sp[1]];
- i+=2; sp += 2;
- }
- if ( i < xsize) *dp++ = table[sp[0]];
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_s32 *dl; /* pointer to start of destination */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 acc0, acc1; /* destination data */
- mlib_d64 acc2, acc3, acc4; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dl = dst;
- dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
- vis_alignaddr(dp, 4);
- s0 = *sa++;
- s00 = (s0 >> 21) & 0x7F8;
- acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
- *(mlib_f32*)dl = vis_read_hi(acc0);
- xsize--;
- sp = (mlib_u8*)sa - 3;
- if (xsize >= 3) {
- s01 = (s0 >> 13) & 0x7F8;
- s02 = (s0 >> 5) & 0x7F8;
- s03 = (s0 << 3) & 0x7F8;
- acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
- acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
- acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
- dp[0] = vis_faligndata(acc0, acc1);
- dp[1] = vis_faligndata(acc1, acc2);
- dp[2] = vis_faligndata(acc2, acc3);
- acc0 = acc3; dp += 3; xsize -= 3;
- sp = (mlib_u8*)sa;
- }
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 21) & 0x7F8;
- s01 = (s0 >> 13) & 0x7F8;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
- s02 = (s0 >> 5) & 0x7F8;
- s03 = (s0 << 3) & 0x7F8;
- acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
- acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
- acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
- acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
- s0 = *sa++;
- s00 = (s0 >> 21) & 0x7F8;
- s01 = (s0 >> 13) & 0x7F8;
- dp[0] = vis_faligndata(acc0, acc1);
- dp[1] = vis_faligndata(acc1, acc2);
- dp[2] = vis_faligndata(acc2, acc3);
- dp[3] = vis_faligndata(acc3, acc4);
- acc0 = acc4;
- }
- s02 = (s0 >> 5) & 0x7F8;
- s03 = (s0 << 3) & 0x7F8;
- acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
- acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
- acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
- acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
- dp[0] = vis_faligndata(acc0, acc1);
- dp[1] = vis_faligndata(acc1, acc2);
- dp[2] = vis_faligndata(acc2, acc3);
- dp[3] = vis_faligndata(acc3, acc4);
- acc0 = acc4;
- dp += 4;
- i += 4;
- sp = (mlib_u8*)sa;
- }
- if ( i <= xsize - 2) {
- acc1 = table[sp[0]];
- acc2 = table[sp[1]];
- *dp++ = vis_faligndata(acc0, acc1);
- *dp++ = vis_faligndata(acc1, acc2);
- i+=2; sp += 2;
- acc0 = acc2;
- }
- if ( i < xsize) {
- acc1 = table[sp[0]];
- *dp++ = vis_faligndata(acc0, acc1);
- acc0 = acc1;
- }
- *(mlib_f32*) dp = vis_read_lo(acc0);
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_f32 *dp; /* aligned pointer to destination */
- mlib_f32 acc0, acc1; /* destination data */
- mlib_f32 acc2, acc3; /* destination data */
- mlib_f32 acc4, acc5; /* destination data */
- mlib_f32 acc6, acc7; /* destination data */
- mlib_f32 *table0 = (mlib_f32*)table[0];
- mlib_f32 *table1 = (mlib_f32*)table[1];
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dp = (mlib_f32*)dst;
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- }
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- dp += 8;
- i += 4;
- }
- sp = (mlib_u8*)sa;
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_2(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_s32 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- const mlib_s32 **table)
- {
- if ((xsize * ysize) < 600) {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_s32 j, i;
- const mlib_s32 *tab0 = table[0];
- const mlib_s32 *tab1 = table[1];
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_s32 *dp = dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- for (i = 0; i < off; i++) {
- *dp++ = tab0[sp[0]];
- *dp++ = tab1[sp[0]];
- size--; sp++;
- }
- if (size > 0) {
- mlib_v_ImageLookUpSI_U8_S32_2_SMALL(sp, (mlib_s32*)dp, size, table);
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- } else {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_d64 dtab[256];
- mlib_u32 *tab;
- mlib_u32 *tab0 = (mlib_u32*)table[0];
- mlib_u32 *tab1 = (mlib_u32*)table[1];
- mlib_s32 i, j;
- mlib_u32 s0, s1;
- tab = (mlib_u32*)dtab;
- s0 = tab0[0];
- s1 = tab1[0];
- for (i = 0; i < 255; i++) {
- tab[2*i] = s0;
- tab[2*i+1] = s1;
- s0 = tab0[i+1];
- s1 = tab1[i+1];
- }
- tab[510] = s0;
- tab[511] = s1;
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_u32 *dp = (mlib_u32*)dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- #pragma pipeloop(0)
- for (i = 0; i < off; i++) {
- dp[0] = tab0[sp[0]];
- dp[1] = tab1[sp[0]];
- dp += 2; sp++;
- }
- size -= off;
- if (size > 0) {
- if (((mlib_addr)dp & 7) == 0) {
- mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
- } else {
- mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
- }
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_s32 *dl; /* pointer to start of destination */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 t0, t1, t2, t3; /* destination data */
- mlib_d64 t4, t5, t6, t7; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_s32 *ptr;
- mlib_u32 s00, s01, s02, s03;
- dl = dst;
- sp = (void *)src;
- dp = (mlib_d64 *) dl;
- sa = (mlib_u32*)sp;
- vis_alignaddr((void *) 0, 4);
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- t1 = vis_faligndata(t1, t1);
- t1 = vis_faligndata(t1, t2);
- t2 = vis_faligndata(t2, t3);
- t5 = vis_faligndata(t5, t5);
- t5 = vis_faligndata(t5, t6);
- t6 = vis_faligndata(t6, t7);
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- dp[0] = t0;
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t4;
- dp[4] = t5;
- dp[5] = t6;
- }
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- t1 = vis_faligndata(t1, t1);
- t1 = vis_faligndata(t1, t2);
- t2 = vis_faligndata(t2, t3);
- t5 = vis_faligndata(t5, t5);
- t5 = vis_faligndata(t5, t6);
- t6 = vis_faligndata(t6, t7);
- dp[0] = t0;
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t4;
- dp[4] = t5;
- dp[5] = t6;
- i += 4; dp += 6;
- }
- dl = (mlib_s32*)dp;
- sp = (mlib_u8*)sa;
- #pragma pipeloop(0)
- for (; i < xsize; i++) {
- ptr = (mlib_s32*)(table + (sp[0] << 1));
- dl[0] = ptr[0];
- dl[1] = ptr[1];
- dl[2] = ptr[2];
- dl += 3; sp ++;
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_s32 *dl; /* pointer to start of destination */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 t0, t1, t2, t3; /* destination data */
- mlib_d64 t4, t5, t6, t7; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_s32 *ptr;
- mlib_u32 s00, s01, s02, s03;
- dl = dst;
- sp = (void *)src;
- dp = (mlib_d64 *) ((mlib_addr) dl & (~7));
- sa = (mlib_u32*)sp;
- vis_alignaddr((void *) 0, 4);
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- t1 = vis_faligndata(t0, t1);
- t3 = vis_faligndata(t3, t3);
- t3 = vis_faligndata(t3, t4);
- t4 = vis_faligndata(t4, t5);
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t3;
- dp[4] = t4;
- dp[5] = t6;
- *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
- }
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- t1 = vis_faligndata(t0, t1);
- t3 = vis_faligndata(t3, t3);
- t3 = vis_faligndata(t3, t4);
- t4 = vis_faligndata(t4, t5);
- *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t3;
- dp[4] = t4;
- dp[5] = t6;
- *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
- i += 4; dp += 6;
- }
- dl = (mlib_s32*)dp + 1;
- sp = (mlib_u8*)sa;
- #pragma pipeloop(0)
- for (; i < xsize; i++) {
- ptr = (mlib_s32*)(table + (sp[0] << 1));
- dl[0] = ptr[0];
- dl[1] = ptr[1];
- dl[2] = ptr[2];
- dl += 3; sp ++;
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_f32 *dp; /* aligned pointer to destination */
- mlib_f32 acc0, acc1; /* destination data */
- mlib_f32 acc2, acc3; /* destination data */
- mlib_f32 acc4, acc5; /* destination data */
- mlib_f32 acc6, acc7; /* destination data */
- mlib_f32 acc8, acc9; /* destination data */
- mlib_f32 acc10, acc11; /* destination data */
- mlib_f32 *table0 = (mlib_f32*)table[0];
- mlib_f32 *table1 = (mlib_f32*)table[1];
- mlib_f32 *table2 = (mlib_f32*)table[2];
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dp = (mlib_f32*)dst;
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 12) {
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
- acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
- acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
- acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- dp[8] = acc8;
- dp[9] = acc9;
- dp[10] = acc10;
- dp[11] = acc11;
- }
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
- acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
- acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
- acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- dp[8] = acc8;
- dp[9] = acc9;
- dp[10] = acc10;
- dp[11] = acc11;
- dp += 12;
- i += 4;
- }
- sp = (mlib_u8*)sa;
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_3(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_s32 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- const mlib_s32 **table)
- {
- if ((xsize * ysize) < 600) {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_s32 j, i;
- const mlib_s32 *tab0 = table[0];
- const mlib_s32 *tab1 = table[1];
- const mlib_s32 *tab2 = table[2];
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_s32 *dp = dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- for (i = 0; i < off; i++) {
- *dp++ = tab0[sp[0]];
- *dp++ = tab1[sp[0]];
- *dp++ = tab2[sp[0]];
- size--; sp++;
- }
- if (size > 0) {
- mlib_v_ImageLookUpSI_U8_S32_3_SMALL(sp, (mlib_s32*)dp, size, table);
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- } else {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_d64 dtab[512];
- mlib_u32 *tab;
- mlib_u32 *tab0 = (mlib_u32*)table[0];
- mlib_u32 *tab1 = (mlib_u32*)table[1];
- mlib_u32 *tab2 = (mlib_u32*)table[2];
- mlib_s32 i, j;
- mlib_u32 s0, s1, s2;
- tab = (mlib_u32*)dtab;
- s0 = tab0[0];
- s1 = tab1[0];
- s2 = tab2[0];
- for (i = 0; i < 255; i++) {
- tab[4*i] = s0;
- tab[4*i+1] = s1;
- tab[4*i+2] = s2;
- s0 = tab0[i+1];
- s1 = tab1[i+1];
- s2 = tab2[i+1];
- }
- tab[1020] = s0;
- tab[1021] = s1;
- tab[1022] = s2;
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_u32 *dp = (mlib_u32*)dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- #pragma pipeloop(0)
- for (i = 0; i < off; i++) {
- dp[0] = tab0[sp[0]];
- dp[1] = tab1[sp[0]];
- dp[2] = tab2[sp[0]];
- dp += 3; sp++;
- }
- size -= off;
- if (size > 0) {
- if (((mlib_addr)dp & 7) == 0) {
- mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
- } else {
- mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
- }
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 t0, t1, t2, t3; /* destination data */
- mlib_d64 t4, t5, t6, t7; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dp = (mlib_d64 *) dst;
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp+=8) {
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- dp[0] = t0;
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t3;
- dp[4] = t4;
- dp[5] = t5;
- dp[6] = t6;
- dp[7] = t7;
- }
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- dp[0] = t0;
- dp[1] = t1;
- dp[2] = t2;
- dp[3] = t3;
- dp[4] = t4;
- dp[5] = t5;
- dp[6] = t6;
- dp[7] = t7;
- dp += 8;
- i += 4;
- }
- sp = (mlib_u8*)sa;
- if ( i < xsize ) {
- *dp++ = table[2*sp[0]];
- *dp++ = table[2*sp[0] + 1];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table[2*sp[0]];
- *dp++ = table[2*sp[0] + 1];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table[2*sp[0]];
- *dp++ = table[2*sp[0] + 1];
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_d64 *table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_s32 *dl; /* pointer to start of destination */
- mlib_d64 *dp; /* aligned pointer to destination */
- mlib_d64 t0, t1, t2, t3; /* destination data */
- mlib_d64 t4, t5, t6, t7, t8; /* destination data */
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dl = dst;
- dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
- vis_alignaddr(dp, 4);
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- t0 = *(mlib_d64*)((mlib_u8*)table + s00);
- t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- *(mlib_f32*)dl = vis_read_hi(t0);
- dp[0] = vis_faligndata(t0, t1);
- t0 = t1;
- xsize--; dp++;
- sp = (mlib_u8*)sa - 3;
- if (xsize >= 3) {
- s01 = (s0 >> 12) & 0xFF0;
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t1 = *(mlib_d64*)((mlib_u8*)table + s01);
- t2 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t3 = *(mlib_d64*)((mlib_u8*)table + s02);
- t4 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t5 = *(mlib_d64*)((mlib_u8*)table + s03);
- t6 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- dp[0] = vis_faligndata(t0, t1);
- dp[1] = vis_faligndata(t1, t2);
- dp[2] = vis_faligndata(t2, t3);
- dp[3] = vis_faligndata(t3, t4);
- dp[4] = vis_faligndata(t4, t5);
- dp[5] = vis_faligndata(t5, t6);
- t0 = t6; dp += 6; xsize -= 3;
- sp = (mlib_u8*)sa;
- }
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t1 = *(mlib_d64*)((mlib_u8*)table + s00);
- t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01);
- t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02);
- t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03);
- t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- s0 = *sa++;
- s00 = (s0 >> 20) & 0xFF0;
- s01 = (s0 >> 12) & 0xFF0;
- dp[0] = vis_faligndata(t0, t1);
- dp[1] = vis_faligndata(t1, t2);
- dp[2] = vis_faligndata(t2, t3);
- dp[3] = vis_faligndata(t3, t4);
- dp[4] = vis_faligndata(t4, t5);
- dp[5] = vis_faligndata(t5, t6);
- dp[6] = vis_faligndata(t6, t7);
- dp[7] = vis_faligndata(t7, t8);
- t0 = t8;
- }
- s02 = (s0 >> 4) & 0xFF0;
- s03 = (s0 << 4) & 0xFF0;
- t1 = *(mlib_d64*)((mlib_u8*)table + s00);
- t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
- t3 = *(mlib_d64*)((mlib_u8*)table + s01);
- t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
- t5 = *(mlib_d64*)((mlib_u8*)table + s02);
- t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
- t7 = *(mlib_d64*)((mlib_u8*)table + s03);
- t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
- dp[0] = vis_faligndata(t0, t1);
- dp[1] = vis_faligndata(t1, t2);
- dp[2] = vis_faligndata(t2, t3);
- dp[3] = vis_faligndata(t3, t4);
- dp[4] = vis_faligndata(t4, t5);
- dp[5] = vis_faligndata(t5, t6);
- dp[6] = vis_faligndata(t6, t7);
- dp[7] = vis_faligndata(t7, t8);
- t0 = t8;
- dp += 8;
- i += 4;
- sp = (mlib_u8*)sa;
- }
- if ( i < xsize ) {
- t1 = table[2*sp[0]];
- t2 = table[2*sp[0] + 1];
- *dp++ = vis_faligndata(t0, t1);
- *dp++ = vis_faligndata(t1, t2);
- i++; sp++;
- t0 = t2;
- }
- if ( i < xsize ) {
- t1 = table[2*sp[0]];
- t2 = table[2*sp[0] + 1];
- *dp++ = vis_faligndata(t0, t1);
- *dp++ = vis_faligndata(t1, t2);
- i++; sp++;
- t0 = t2;
- }
- if ( i < xsize ) {
- t1 = table[2*sp[0]];
- t2 = table[2*sp[0] + 1];
- *dp++ = vis_faligndata(t0, t1);
- *dp++ = vis_faligndata(t1, t2);
- t0 = t2;
- }
- *(mlib_f32*) dp = vis_read_lo(t0);
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
- mlib_s32 *dst,
- mlib_s32 xsize,
- const mlib_s32 **table)
- {
- mlib_u32 *sa; /* aligned pointer to source data */
- mlib_u8 *sp; /* pointer to source data */
- mlib_u32 s0; /* source data */
- mlib_f32 *dp; /* aligned pointer to destination */
- mlib_f32 acc0, acc1; /* destination data */
- mlib_f32 acc2, acc3; /* destination data */
- mlib_f32 acc4, acc5; /* destination data */
- mlib_f32 acc6, acc7; /* destination data */
- mlib_f32 acc8, acc9; /* destination data */
- mlib_f32 acc10, acc11; /* destination data */
- mlib_f32 acc12, acc13; /* destination data */
- mlib_f32 acc14, acc15; /* destination data */
- mlib_f32 *table0 = (mlib_f32*)table[0];
- mlib_f32 *table1 = (mlib_f32*)table[1];
- mlib_f32 *table2 = (mlib_f32*)table[2];
- mlib_f32 *table3 = (mlib_f32*)table[3];
- mlib_s32 i; /* loop variable */
- mlib_u32 s00, s01, s02, s03;
- sa = (mlib_u32*)src;
- dp = (mlib_f32*)dst;
- i = 0;
- if (xsize >= 4) {
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- #pragma pipeloop(0)
- for(i = 0; i <= xsize - 8; i+=4, dp += 16) {
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
- acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
- acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
- acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
- acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
- acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
- acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
- acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
- s0 = *sa++;
- s00 = (s0 >> 22) & 0x3FC;
- s01 = (s0 >> 14) & 0x3FC;
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- dp[8] = acc8;
- dp[9] = acc9;
- dp[10] = acc10;
- dp[11] = acc11;
- dp[12] = acc12;
- dp[13] = acc13;
- dp[14] = acc14;
- dp[15] = acc15;
- }
- s02 = (s0 >> 6) & 0x3FC;
- s03 = (s0 << 2) & 0x3FC;
- acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
- acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
- acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
- acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
- acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
- acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
- acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
- acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
- acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
- acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
- acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
- acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
- acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
- acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
- acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
- acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
- dp[0] = acc0;
- dp[1] = acc1;
- dp[2] = acc2;
- dp[3] = acc3;
- dp[4] = acc4;
- dp[5] = acc5;
- dp[6] = acc6;
- dp[7] = acc7;
- dp[8] = acc8;
- dp[9] = acc9;
- dp[10] = acc10;
- dp[11] = acc11;
- dp[12] = acc12;
- dp[13] = acc13;
- dp[14] = acc14;
- dp[15] = acc15;
- dp += 16;
- i += 4;
- }
- sp = (mlib_u8*)sa;
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- *dp++ = table3[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- *dp++ = table3[sp[0]];
- i++; sp++;
- }
- if ( i < xsize ) {
- *dp++ = table0[sp[0]];
- *dp++ = table1[sp[0]];
- *dp++ = table2[sp[0]];
- *dp++ = table3[sp[0]];
- }
- }
- /***************************************************************/
- void mlib_v_ImageLookUpSI_U8_S32_4(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_s32 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- const mlib_s32 **table)
- {
- if ((xsize * ysize) < 600) {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_s32 j, i;
- const mlib_s32 *tab0 = table[0];
- const mlib_s32 *tab1 = table[1];
- const mlib_s32 *tab2 = table[2];
- const mlib_s32 *tab3 = table[3];
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_s32 *dp = dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- for (i = 0; i < off; i++) {
- *dp++ = tab0[sp[0]];
- *dp++ = tab1[sp[0]];
- *dp++ = tab2[sp[0]];
- *dp++ = tab3[sp[0]];
- size--; sp++;
- }
- if (size > 0) {
- mlib_v_ImageLookUpSI_U8_S32_4_SMALL(sp, (mlib_s32*)dp, size, table);
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- } else {
- mlib_u8 *sl;
- mlib_s32 *dl;
- mlib_d64 dtab[512];
- mlib_u32 *tab;
- mlib_u32 *tab0 = (mlib_u32*)table[0];
- mlib_u32 *tab1 = (mlib_u32*)table[1];
- mlib_u32 *tab2 = (mlib_u32*)table[2];
- mlib_u32 *tab3 = (mlib_u32*)table[3];
- mlib_s32 i, j;
- mlib_u32 s0, s1, s2, s3;
- tab = (mlib_u32*)dtab;
- s0 = tab0[0];
- s1 = tab1[0];
- s2 = tab2[0];
- s3 = tab3[0];
- for (i = 0; i < 255; i++) {
- tab[4*i] = s0;
- tab[4*i+1] = s1;
- tab[4*i+2] = s2;
- tab[4*i+3] = s3;
- s0 = tab0[i+1];
- s1 = tab1[i+1];
- s2 = tab2[i+1];
- s3 = tab3[i+1];
- }
- tab[1020] = s0;
- tab[1021] = s1;
- tab[1022] = s2;
- tab[1023] = s3;
- sl = (void *)src;
- dl = dst;
- /* row loop */
- for (j = 0; j < ysize; j ++) {
- mlib_u8 *sp = sl;
- mlib_u32 *dp = (mlib_u32*)dl;
- mlib_s32 off, size = xsize;
- off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
- off = (off < size) ? off : size;
- #pragma pipeloop(0)
- for (i = 0; i < off; i++) {
- dp[0] = tab0[sp[0]];
- dp[1] = tab1[sp[0]];
- dp[2] = tab2[sp[0]];
- dp[3] = tab3[sp[0]];
- dp += 4; sp++;
- }
- size -= off;
- if (size > 0) {
- if (((mlib_addr)dp & 7) == 0) {
- mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
- } else {
- mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
- }
- }
- sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
- dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
- }
- /***************************************************************/