PageRenderTime 245ms CodeModel.GetById 61ms app.highlight 157ms RepoModel.GetById 14ms app.codeStats 0ms

/java-1.7.0-openjdk/openjdk/jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageLookUpSIU8S32Func.c

#
C | 1361 lines | 1149 code | 168 blank | 44 comment | 77 complexity | a1882d4a948bf7a4fbb141f561e9b7af MD5 | raw file
   1/*
   2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
   3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 *
   5 * This code is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 only, as
   7 * published by the Free Software Foundation.  Oracle designates this
   8 * particular file as subject to the "Classpath" exception as provided
   9 * by Oracle in the LICENSE file that accompanied this code.
  10 *
  11 * This code is distributed in the hope that it will be useful, but WITHOUT
  12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 * version 2 for more details (a copy is included in the LICENSE file that
  15 * accompanied this code).
  16 *
  17 * You should have received a copy of the GNU General Public License version
  18 * 2 along with this work; if not, write to the Free Software Foundation,
  19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 *
  21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 * or visit www.oracle.com if you need additional information or have any
  23 * questions.
  24 */
  25
  26
  27
  28#include "vis_proto.h"
  29#include "mlib_image.h"
  30#include "mlib_v_ImageLookUpFunc.h"
  31
  32/***************************************************************/
  33static void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8  *src,
  34                                                     mlib_s32       *dst,
  35                                                     mlib_s32       xsize,
  36                                                     const mlib_d64 *table);
  37
  38static void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8  *src,
  39                                                      mlib_s32       *dst,
  40                                                      mlib_s32       xsize,
  41                                                      const mlib_d64 *table);
  42
  43static void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8  *src,
  44                                                mlib_s32       *dst,
  45                                                mlib_s32       xsize,
  46                                                const mlib_s32 **table);
  47
  48static void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8  *src,
  49                                                     mlib_s32       *dst,
  50                                                     mlib_s32       xsize,
  51                                                     const mlib_d64 *table);
  52
  53static void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8  *src,
  54                                                      mlib_s32       *dst,
  55                                                      mlib_s32       xsize,
  56                                                      const mlib_d64 *table);
  57
  58static void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8  *src,
  59                                                mlib_s32       *dst,
  60                                                mlib_s32       xsize,
  61                                                const mlib_s32 **table);
  62
  63static void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8  *src,
  64                                                     mlib_s32       *dst,
  65                                                     mlib_s32       xsize,
  66                                                     const mlib_d64 *table);
  67
  68static void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8  *src,
  69                                                      mlib_s32       *dst,
  70                                                      mlib_s32       xsize,
  71                                                      const mlib_d64 *table);
  72
  73static void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8  *src,
  74                                                mlib_s32       *dst,
  75                                                mlib_s32       xsize,
  76                                                const mlib_s32 **table);
  77
  78/***************************************************************/
  79void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8  *src,
  80                                              mlib_s32       *dst,
  81                                              mlib_s32       xsize,
  82                                              const mlib_d64 *table)
  83{
  84  mlib_u32 *sa;          /* aligned pointer to source data */
  85  mlib_u8  *sp;          /* pointer to source data */
  86  mlib_u32 s0;           /* source data */
  87  mlib_d64 *dp;          /* aligned pointer to destination */
  88  mlib_d64 acc0, acc1;   /* destination data */
  89  mlib_d64 acc2, acc3;   /* destination data */
  90  mlib_s32 i;            /* loop variable */
  91  mlib_u32 s00, s01, s02, s03;
  92
  93  sa   = (mlib_u32*)src;
  94  dp   = (mlib_d64 *) dst;
  95
  96  i = 0;
  97
  98  if (xsize >= 4) {
  99
 100    s0 = *sa++;
 101    s00 = (s0 >> 21) & 0x7F8;
 102    s01 = (s0 >> 13) & 0x7F8;
 103
 104#pragma pipeloop(0)
 105    for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
 106      s02 = (s0 >> 5) & 0x7F8;
 107      s03 = (s0 << 3) & 0x7F8;
 108      acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
 109      acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
 110      acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
 111      acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
 112      s0 = *sa++;
 113      s00 = (s0 >> 21) & 0x7F8;
 114      s01 = (s0 >> 13) & 0x7F8;
 115      dp[0] = acc0;
 116      dp[1] = acc1;
 117      dp[2] = acc2;
 118      dp[3] = acc3;
 119    }
 120
 121    s02 = (s0 >> 5) & 0x7F8;
 122    s03 = (s0 << 3) & 0x7F8;
 123    acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
 124    acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
 125    acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
 126    acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
 127    dp[0] = acc0;
 128    dp[1] = acc1;
 129    dp[2] = acc2;
 130    dp[3] = acc3;
 131    dp += 4;
 132    i += 4;
 133  }
 134
 135  sp = (mlib_u8*)sa;
 136
 137  if ( i <= xsize - 2) {
 138    *dp++ = table[sp[0]];
 139    *dp++ = table[sp[1]];
 140    i+=2; sp += 2;
 141  }
 142
 143  if ( i < xsize) *dp++ = table[sp[0]];
 144}
 145
 146/***************************************************************/
 147void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8  *src,
 148                                               mlib_s32       *dst,
 149                                               mlib_s32       xsize,
 150                                               const mlib_d64 *table)
 151{
 152  mlib_u32 *sa;              /* aligned pointer to source data */
 153  mlib_u8  *sp;              /* pointer to source data */
 154  mlib_u32 s0;               /* source data */
 155  mlib_s32 *dl;              /* pointer to start of destination */
 156  mlib_d64 *dp;              /* aligned pointer to destination */
 157  mlib_d64 acc0, acc1;       /* destination data */
 158  mlib_d64 acc2, acc3, acc4; /* destination data */
 159  mlib_s32 i;                /* loop variable */
 160  mlib_u32 s00, s01, s02, s03;
 161
 162  sa = (mlib_u32*)src;
 163  dl = dst;
 164  dp   = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
 165  vis_alignaddr(dp, 4);
 166
 167  s0 = *sa++;
 168  s00 = (s0 >> 21) & 0x7F8;
 169  acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
 170  *(mlib_f32*)dl = vis_read_hi(acc0);
 171  xsize--;
 172  sp = (mlib_u8*)sa - 3;
 173
 174  if (xsize >= 3) {
 175    s01 = (s0 >> 13) & 0x7F8;
 176    s02 = (s0 >> 5) & 0x7F8;
 177    s03 = (s0 << 3) & 0x7F8;
 178    acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
 179    acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
 180    acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
 181    dp[0] = vis_faligndata(acc0, acc1);
 182    dp[1] = vis_faligndata(acc1, acc2);
 183    dp[2] = vis_faligndata(acc2, acc3);
 184    acc0 = acc3; dp += 3; xsize -= 3;
 185    sp = (mlib_u8*)sa;
 186  }
 187
 188  i = 0;
 189
 190  if (xsize >= 4) {
 191
 192    s0 = *sa++;
 193    s00 = (s0 >> 21) & 0x7F8;
 194    s01 = (s0 >> 13) & 0x7F8;
 195
 196#pragma pipeloop(0)
 197    for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
 198      s02 = (s0 >> 5) & 0x7F8;
 199      s03 = (s0 << 3) & 0x7F8;
 200      acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
 201      acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
 202      acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
 203      acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
 204      s0 = *sa++;
 205      s00 = (s0 >> 21) & 0x7F8;
 206      s01 = (s0 >> 13) & 0x7F8;
 207      dp[0] = vis_faligndata(acc0, acc1);
 208      dp[1] = vis_faligndata(acc1, acc2);
 209      dp[2] = vis_faligndata(acc2, acc3);
 210      dp[3] = vis_faligndata(acc3, acc4);
 211      acc0 = acc4;
 212    }
 213
 214    s02 = (s0 >> 5) & 0x7F8;
 215    s03 = (s0 << 3) & 0x7F8;
 216    acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
 217    acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
 218    acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
 219    acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
 220    dp[0] = vis_faligndata(acc0, acc1);
 221    dp[1] = vis_faligndata(acc1, acc2);
 222    dp[2] = vis_faligndata(acc2, acc3);
 223    dp[3] = vis_faligndata(acc3, acc4);
 224    acc0 = acc4;
 225    dp += 4;
 226    i += 4;
 227    sp = (mlib_u8*)sa;
 228  }
 229
 230  if ( i <= xsize - 2) {
 231    acc1 = table[sp[0]];
 232    acc2 = table[sp[1]];
 233    *dp++ = vis_faligndata(acc0, acc1);
 234    *dp++ = vis_faligndata(acc1, acc2);
 235    i+=2; sp += 2;
 236    acc0 = acc2;
 237  }
 238
 239  if ( i < xsize) {
 240    acc1 = table[sp[0]];
 241    *dp++ = vis_faligndata(acc0, acc1);
 242    acc0 = acc1;
 243  }
 244
 245  *(mlib_f32*) dp = vis_read_lo(acc0);
 246}
 247
 248/***************************************************************/
 249void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8  *src,
 250                                         mlib_s32       *dst,
 251                                         mlib_s32       xsize,
 252                                         const mlib_s32 **table)
 253{
 254  mlib_u32 *sa;          /* aligned pointer to source data */
 255  mlib_u8  *sp;          /* pointer to source data */
 256  mlib_u32 s0;           /* source data */
 257  mlib_f32 *dp;          /* aligned pointer to destination */
 258  mlib_f32 acc0, acc1;   /* destination data */
 259  mlib_f32 acc2, acc3;   /* destination data */
 260  mlib_f32 acc4, acc5;   /* destination data */
 261  mlib_f32 acc6, acc7;   /* destination data */
 262  mlib_f32 *table0 = (mlib_f32*)table[0];
 263  mlib_f32 *table1 = (mlib_f32*)table[1];
 264  mlib_s32 i;            /* loop variable */
 265  mlib_u32 s00, s01, s02, s03;
 266
 267  sa   = (mlib_u32*)src;
 268  dp   = (mlib_f32*)dst;
 269
 270  i = 0;
 271
 272  if (xsize >= 4) {
 273
 274    s0 = *sa++;
 275    s00 = (s0 >> 22) & 0x3FC;
 276    s01 = (s0 >> 14) & 0x3FC;
 277
 278#pragma pipeloop(0)
 279    for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
 280      s02 = (s0 >> 6) & 0x3FC;
 281      s03 = (s0 << 2) & 0x3FC;
 282      acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
 283      acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
 284      acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
 285      acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
 286      acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
 287      acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
 288      acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
 289      acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
 290      s0 = *sa++;
 291      s00 = (s0 >> 22) & 0x3FC;
 292      s01 = (s0 >> 14) & 0x3FC;
 293      dp[0] = acc0;
 294      dp[1] = acc1;
 295      dp[2] = acc2;
 296      dp[3] = acc3;
 297      dp[4] = acc4;
 298      dp[5] = acc5;
 299      dp[6] = acc6;
 300      dp[7] = acc7;
 301    }
 302
 303    s02 = (s0 >> 6) & 0x3FC;
 304    s03 = (s0 << 2) & 0x3FC;
 305    acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
 306    acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
 307    acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
 308    acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
 309    acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
 310    acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
 311    acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
 312    acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
 313    dp[0] = acc0;
 314    dp[1] = acc1;
 315    dp[2] = acc2;
 316    dp[3] = acc3;
 317    dp[4] = acc4;
 318    dp[5] = acc5;
 319    dp[6] = acc6;
 320    dp[7] = acc7;
 321    dp += 8;
 322    i += 4;
 323  }
 324
 325  sp = (mlib_u8*)sa;
 326
 327  if ( i < xsize ) {
 328    *dp++ = table0[sp[0]];
 329    *dp++ = table1[sp[0]];
 330    i++; sp++;
 331  }
 332
 333  if ( i < xsize ) {
 334    *dp++ = table0[sp[0]];
 335    *dp++ = table1[sp[0]];
 336    i++; sp++;
 337  }
 338
 339  if ( i < xsize ) {
 340    *dp++ = table0[sp[0]];
 341    *dp++ = table1[sp[0]];
 342  }
 343}
 344
 345/***************************************************************/
 346void mlib_v_ImageLookUpSI_U8_S32_2(const mlib_u8  *src,
 347                                   mlib_s32       slb,
 348                                   mlib_s32       *dst,
 349                                   mlib_s32       dlb,
 350                                   mlib_s32       xsize,
 351                                   mlib_s32       ysize,
 352                                   const mlib_s32 **table)
 353{
 354  if ((xsize * ysize) < 600) {
 355    mlib_u8  *sl;
 356    mlib_s32 *dl;
 357    mlib_s32 j, i;
 358    const mlib_s32 *tab0 = table[0];
 359    const mlib_s32 *tab1 = table[1];
 360
 361    sl = (void *)src;
 362    dl = dst;
 363
 364    /* row loop */
 365    for (j = 0; j < ysize; j ++) {
 366      mlib_u8  *sp = sl;
 367      mlib_s32 *dp = dl;
 368      mlib_s32 off, size = xsize;
 369
 370      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
 371
 372      off = (off < size) ? off : size;
 373
 374      for (i = 0; i < off; i++) {
 375        *dp++ = tab0[sp[0]];
 376        *dp++ = tab1[sp[0]];
 377        size--; sp++;
 378      }
 379
 380      if (size > 0) {
 381        mlib_v_ImageLookUpSI_U8_S32_2_SMALL(sp, (mlib_s32*)dp, size, table);
 382      }
 383
 384      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
 385      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
 386    }
 387
 388  } else {
 389    mlib_u8  *sl;
 390    mlib_s32 *dl;
 391    mlib_d64 dtab[256];
 392    mlib_u32 *tab;
 393    mlib_u32 *tab0 = (mlib_u32*)table[0];
 394    mlib_u32 *tab1 = (mlib_u32*)table[1];
 395    mlib_s32 i, j;
 396    mlib_u32 s0, s1;
 397
 398    tab = (mlib_u32*)dtab;
 399    s0 = tab0[0];
 400    s1 = tab1[0];
 401    for (i = 0; i < 255; i++) {
 402      tab[2*i] = s0;
 403      tab[2*i+1] = s1;
 404      s0 = tab0[i+1];
 405      s1 = tab1[i+1];
 406    }
 407
 408    tab[510] = s0;
 409    tab[511] = s1;
 410
 411    sl = (void *)src;
 412    dl = dst;
 413
 414    /* row loop */
 415    for (j = 0; j < ysize; j ++) {
 416      mlib_u8  *sp = sl;
 417      mlib_u32 *dp = (mlib_u32*)dl;
 418      mlib_s32 off, size = xsize;
 419
 420      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
 421
 422      off = (off < size) ? off : size;
 423
 424#pragma pipeloop(0)
 425      for (i = 0; i < off; i++) {
 426        dp[0] = tab0[sp[0]];
 427        dp[1] = tab1[sp[0]];
 428        dp += 2; sp++;
 429      }
 430
 431      size -= off;
 432
 433      if (size > 0) {
 434        if (((mlib_addr)dp & 7) == 0) {
 435          mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
 436        } else {
 437          mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
 438        }
 439      }
 440
 441      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
 442      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
 443    }
 444  }
 445}
 446
 447/***************************************************************/
 448void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8  *src,
 449                                              mlib_s32       *dst,
 450                                              mlib_s32       xsize,
 451                                              const mlib_d64 *table)
 452{
 453  mlib_u8  *sp;              /* pointer to source data */
 454  mlib_u32 *sa;              /* aligned pointer to source data */
 455  mlib_u32 s0;               /* source data */
 456  mlib_s32 *dl;              /* pointer to start of destination */
 457  mlib_d64 *dp;              /* aligned pointer to destination */
 458  mlib_d64 t0, t1, t2, t3;   /* destination data */
 459  mlib_d64 t4, t5, t6, t7;   /* destination data */
 460  mlib_s32 i;                /* loop variable */
 461  mlib_s32 *ptr;
 462  mlib_u32 s00, s01, s02, s03;
 463
 464  dl  = dst;
 465  sp  = (void *)src;
 466  dp  = (mlib_d64 *) dl;
 467  sa  = (mlib_u32*)sp;
 468
 469  vis_alignaddr((void *) 0, 4);
 470
 471  i = 0;
 472
 473  if (xsize >= 4) {
 474
 475    s0 = *sa++;
 476    s00 = (s0 >> 20) & 0xFF0;
 477    s01 = (s0 >> 12) & 0xFF0;
 478
 479#pragma pipeloop(0)
 480    for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
 481      s02 = (s0 >> 4) & 0xFF0;
 482      s03 = (s0 << 4) & 0xFF0;
 483      t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 484      t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 485      t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 486      t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 487      t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 488      t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 489      t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 490      t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 491      t1 = vis_faligndata(t1, t1);
 492      t1 = vis_faligndata(t1, t2);
 493      t2 = vis_faligndata(t2, t3);
 494      t5 = vis_faligndata(t5, t5);
 495      t5 = vis_faligndata(t5, t6);
 496      t6 = vis_faligndata(t6, t7);
 497      s0 = *sa++;
 498      s00 = (s0 >> 20) & 0xFF0;
 499      s01 = (s0 >> 12) & 0xFF0;
 500      dp[0] = t0;
 501      dp[1] = t1;
 502      dp[2] = t2;
 503      dp[3] = t4;
 504      dp[4] = t5;
 505      dp[5] = t6;
 506    }
 507
 508    s02 = (s0 >> 4) & 0xFF0;
 509    s03 = (s0 << 4) & 0xFF0;
 510    t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 511    t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 512    t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 513    t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 514    t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 515    t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 516    t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 517    t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 518    t1 = vis_faligndata(t1, t1);
 519    t1 = vis_faligndata(t1, t2);
 520    t2 = vis_faligndata(t2, t3);
 521    t5 = vis_faligndata(t5, t5);
 522    t5 = vis_faligndata(t5, t6);
 523    t6 = vis_faligndata(t6, t7);
 524    dp[0] = t0;
 525    dp[1] = t1;
 526    dp[2] = t2;
 527    dp[3] = t4;
 528    dp[4] = t5;
 529    dp[5] = t6;
 530    i += 4; dp += 6;
 531  }
 532
 533  dl = (mlib_s32*)dp;
 534  sp = (mlib_u8*)sa;
 535
 536#pragma pipeloop(0)
 537  for (; i < xsize; i++) {
 538    ptr = (mlib_s32*)(table + (sp[0] << 1));
 539    dl[0] = ptr[0];
 540    dl[1] = ptr[1];
 541    dl[2] = ptr[2];
 542    dl += 3; sp ++;
 543  }
 544}
 545
 546/***************************************************************/
 547void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8  *src,
 548                                               mlib_s32       *dst,
 549                                               mlib_s32       xsize,
 550                                               const mlib_d64 *table)
 551{
 552  mlib_u8  *sp;              /* pointer to source data */
 553  mlib_u32 *sa;              /* aligned pointer to source data */
 554  mlib_u32 s0;               /* source data */
 555  mlib_s32 *dl;              /* pointer to start of destination */
 556  mlib_d64 *dp;              /* aligned pointer to destination */
 557  mlib_d64 t0, t1, t2, t3;   /* destination data */
 558  mlib_d64 t4, t5, t6, t7;   /* destination data */
 559  mlib_s32 i;                /* loop variable */
 560  mlib_s32 *ptr;
 561  mlib_u32 s00, s01, s02, s03;
 562
 563  dl  = dst;
 564  sp  = (void *)src;
 565  dp   = (mlib_d64 *) ((mlib_addr) dl & (~7));
 566  sa  = (mlib_u32*)sp;
 567
 568  vis_alignaddr((void *) 0, 4);
 569
 570  i = 0;
 571
 572  if (xsize >= 4) {
 573
 574    s0 = *sa++;
 575    s00 = (s0 >> 20) & 0xFF0;
 576    s01 = (s0 >> 12) & 0xFF0;
 577
 578#pragma pipeloop(0)
 579    for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
 580      s02 = (s0 >> 4) & 0xFF0;
 581      s03 = (s0 << 4) & 0xFF0;
 582      t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 583      t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 584      t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 585      t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 586      t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 587      t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 588      t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 589      t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 590      t1 = vis_faligndata(t0, t1);
 591      t3 = vis_faligndata(t3, t3);
 592      t3 = vis_faligndata(t3, t4);
 593      t4 = vis_faligndata(t4, t5);
 594      s0 = *sa++;
 595      s00 = (s0 >> 20) & 0xFF0;
 596      s01 = (s0 >> 12) & 0xFF0;
 597      *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
 598      dp[1] = t1;
 599      dp[2] = t2;
 600      dp[3] = t3;
 601      dp[4] = t4;
 602      dp[5] = t6;
 603      *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
 604    }
 605
 606    s02 = (s0 >> 4) & 0xFF0;
 607    s03 = (s0 << 4) & 0xFF0;
 608    t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 609    t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 610    t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 611    t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 612    t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 613    t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 614    t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 615    t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 616    t1 = vis_faligndata(t0, t1);
 617    t3 = vis_faligndata(t3, t3);
 618    t3 = vis_faligndata(t3, t4);
 619    t4 = vis_faligndata(t4, t5);
 620    *(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
 621    dp[1] = t1;
 622    dp[2] = t2;
 623    dp[3] = t3;
 624    dp[4] = t4;
 625    dp[5] = t6;
 626    *(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
 627    i += 4; dp += 6;
 628  }
 629
 630  dl = (mlib_s32*)dp + 1;
 631  sp = (mlib_u8*)sa;
 632
 633#pragma pipeloop(0)
 634  for (; i < xsize; i++) {
 635    ptr = (mlib_s32*)(table + (sp[0] << 1));
 636    dl[0] = ptr[0];
 637    dl[1] = ptr[1];
 638    dl[2] = ptr[2];
 639    dl += 3; sp ++;
 640  }
 641}
 642
 643/***************************************************************/
 644void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8  *src,
 645                                         mlib_s32       *dst,
 646                                         mlib_s32       xsize,
 647                                         const mlib_s32 **table)
 648{
 649  mlib_u32 *sa;          /* aligned pointer to source data */
 650  mlib_u8  *sp;          /* pointer to source data */
 651  mlib_u32 s0;           /* source data */
 652  mlib_f32 *dp;          /* aligned pointer to destination */
 653  mlib_f32 acc0, acc1;   /* destination data */
 654  mlib_f32 acc2, acc3;   /* destination data */
 655  mlib_f32 acc4, acc5;   /* destination data */
 656  mlib_f32 acc6, acc7;   /* destination data */
 657  mlib_f32 acc8, acc9;   /* destination data */
 658  mlib_f32 acc10, acc11; /* destination data */
 659  mlib_f32 *table0 = (mlib_f32*)table[0];
 660  mlib_f32 *table1 = (mlib_f32*)table[1];
 661  mlib_f32 *table2 = (mlib_f32*)table[2];
 662  mlib_s32 i;            /* loop variable */
 663  mlib_u32 s00, s01, s02, s03;
 664
 665  sa   = (mlib_u32*)src;
 666  dp   = (mlib_f32*)dst;
 667
 668  i = 0;
 669
 670  if (xsize >= 4) {
 671
 672    s0 = *sa++;
 673    s00 = (s0 >> 22) & 0x3FC;
 674    s01 = (s0 >> 14) & 0x3FC;
 675
 676#pragma pipeloop(0)
 677    for(i = 0; i <= xsize - 8; i+=4, dp += 12) {
 678      s02 = (s0 >> 6) & 0x3FC;
 679      s03 = (s0 << 2) & 0x3FC;
 680      acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
 681      acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
 682      acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
 683      acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
 684      acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
 685      acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
 686      acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
 687      acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
 688      acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
 689      acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
 690      acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
 691      acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
 692      s0 = *sa++;
 693      s00 = (s0 >> 22) & 0x3FC;
 694      s01 = (s0 >> 14) & 0x3FC;
 695      dp[0] = acc0;
 696      dp[1] = acc1;
 697      dp[2] = acc2;
 698      dp[3] = acc3;
 699      dp[4] = acc4;
 700      dp[5] = acc5;
 701      dp[6] = acc6;
 702      dp[7] = acc7;
 703      dp[8] = acc8;
 704      dp[9] = acc9;
 705      dp[10] = acc10;
 706      dp[11] = acc11;
 707    }
 708
 709    s02 = (s0 >> 6) & 0x3FC;
 710    s03 = (s0 << 2) & 0x3FC;
 711    acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
 712    acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
 713    acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
 714    acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
 715    acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
 716    acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
 717    acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
 718    acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
 719    acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
 720    acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
 721    acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
 722    acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
 723    dp[0] = acc0;
 724    dp[1] = acc1;
 725    dp[2] = acc2;
 726    dp[3] = acc3;
 727    dp[4] = acc4;
 728    dp[5] = acc5;
 729    dp[6] = acc6;
 730    dp[7] = acc7;
 731    dp[8] = acc8;
 732    dp[9] = acc9;
 733    dp[10] = acc10;
 734    dp[11] = acc11;
 735    dp += 12;
 736    i += 4;
 737  }
 738
 739  sp = (mlib_u8*)sa;
 740
 741  if ( i < xsize ) {
 742    *dp++ = table0[sp[0]];
 743    *dp++ = table1[sp[0]];
 744    *dp++ = table2[sp[0]];
 745    i++; sp++;
 746  }
 747
 748  if ( i < xsize ) {
 749    *dp++ = table0[sp[0]];
 750    *dp++ = table1[sp[0]];
 751    *dp++ = table2[sp[0]];
 752    i++; sp++;
 753  }
 754
 755  if ( i < xsize ) {
 756    *dp++ = table0[sp[0]];
 757    *dp++ = table1[sp[0]];
 758    *dp++ = table2[sp[0]];
 759  }
 760}
 761
 762/***************************************************************/
 763void mlib_v_ImageLookUpSI_U8_S32_3(const mlib_u8  *src,
 764                                   mlib_s32       slb,
 765                                   mlib_s32       *dst,
 766                                   mlib_s32       dlb,
 767                                   mlib_s32       xsize,
 768                                   mlib_s32       ysize,
 769                                   const mlib_s32 **table)
 770{
 771  if ((xsize * ysize) < 600) {
 772    mlib_u8  *sl;
 773    mlib_s32 *dl;
 774    mlib_s32 j, i;
 775    const mlib_s32 *tab0 = table[0];
 776    const mlib_s32 *tab1 = table[1];
 777    const mlib_s32 *tab2 = table[2];
 778
 779    sl = (void *)src;
 780    dl = dst;
 781
 782    /* row loop */
 783    for (j = 0; j < ysize; j ++) {
 784      mlib_u8  *sp = sl;
 785      mlib_s32 *dp = dl;
 786      mlib_s32 off, size = xsize;
 787
 788      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
 789
 790      off = (off < size) ? off : size;
 791
 792      for (i = 0; i < off; i++) {
 793        *dp++ = tab0[sp[0]];
 794        *dp++ = tab1[sp[0]];
 795        *dp++ = tab2[sp[0]];
 796        size--; sp++;
 797      }
 798
 799      if (size > 0) {
 800        mlib_v_ImageLookUpSI_U8_S32_3_SMALL(sp, (mlib_s32*)dp, size, table);
 801      }
 802
 803      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
 804      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
 805    }
 806
 807  } else {
 808    mlib_u8  *sl;
 809    mlib_s32 *dl;
 810    mlib_d64 dtab[512];
 811    mlib_u32 *tab;
 812    mlib_u32 *tab0 = (mlib_u32*)table[0];
 813    mlib_u32 *tab1 = (mlib_u32*)table[1];
 814    mlib_u32 *tab2 = (mlib_u32*)table[2];
 815    mlib_s32 i, j;
 816    mlib_u32 s0, s1, s2;
 817
 818    tab = (mlib_u32*)dtab;
 819    s0 = tab0[0];
 820    s1 = tab1[0];
 821    s2 = tab2[0];
 822    for (i = 0; i < 255; i++) {
 823      tab[4*i] = s0;
 824      tab[4*i+1] = s1;
 825      tab[4*i+2] = s2;
 826      s0 = tab0[i+1];
 827      s1 = tab1[i+1];
 828      s2 = tab2[i+1];
 829    }
 830
 831    tab[1020] = s0;
 832    tab[1021] = s1;
 833    tab[1022] = s2;
 834
 835    sl = (void *)src;
 836    dl = dst;
 837
 838    /* row loop */
 839    for (j = 0; j < ysize; j ++) {
 840      mlib_u8  *sp = sl;
 841      mlib_u32 *dp = (mlib_u32*)dl;
 842      mlib_s32 off, size = xsize;
 843
 844      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
 845
 846      off = (off < size) ? off : size;
 847
 848#pragma pipeloop(0)
 849      for (i = 0; i < off; i++) {
 850        dp[0] = tab0[sp[0]];
 851        dp[1] = tab1[sp[0]];
 852        dp[2] = tab2[sp[0]];
 853        dp += 3; sp++;
 854      }
 855
 856      size -= off;
 857
 858      if (size > 0) {
 859        if (((mlib_addr)dp & 7) == 0) {
 860          mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
 861        } else {
 862          mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
 863        }
 864      }
 865
 866      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
 867      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
 868    }
 869  }
 870}
 871
 872/***************************************************************/
 873void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8  *src,
 874                                              mlib_s32       *dst,
 875                                              mlib_s32       xsize,
 876                                              const mlib_d64 *table)
 877{
 878  mlib_u32 *sa;            /* aligned pointer to source data */
 879  mlib_u8  *sp;            /* pointer to source data */
 880  mlib_u32 s0;             /* source data */
 881  mlib_d64 *dp;            /* aligned pointer to destination */
 882  mlib_d64 t0, t1, t2, t3; /* destination data */
 883  mlib_d64 t4, t5, t6, t7; /* destination data */
 884  mlib_s32 i;              /* loop variable */
 885  mlib_u32 s00, s01, s02, s03;
 886
 887  sa   = (mlib_u32*)src;
 888  dp   = (mlib_d64 *) dst;
 889
 890  i = 0;
 891
 892  if (xsize >= 4) {
 893
 894    s0 = *sa++;
 895    s00 = (s0 >> 20) & 0xFF0;
 896    s01 = (s0 >> 12) & 0xFF0;
 897
 898#pragma pipeloop(0)
 899    for(i = 0; i <= xsize - 8; i+=4, dp+=8) {
 900      s02 = (s0 >> 4) & 0xFF0;
 901      s03 = (s0 << 4) & 0xFF0;
 902      t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 903      t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 904      t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 905      t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 906      t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 907      t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 908      t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 909      t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 910      s0 = *sa++;
 911      s00 = (s0 >> 20) & 0xFF0;
 912      s01 = (s0 >> 12) & 0xFF0;
 913      dp[0] = t0;
 914      dp[1] = t1;
 915      dp[2] = t2;
 916      dp[3] = t3;
 917      dp[4] = t4;
 918      dp[5] = t5;
 919      dp[6] = t6;
 920      dp[7] = t7;
 921    }
 922
 923    s02 = (s0 >> 4) & 0xFF0;
 924    s03 = (s0 << 4) & 0xFF0;
 925    t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 926    t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 927    t2 = *(mlib_d64*)((mlib_u8*)table + s01);
 928    t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
 929    t4 = *(mlib_d64*)((mlib_u8*)table + s02);
 930    t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
 931    t6 = *(mlib_d64*)((mlib_u8*)table + s03);
 932    t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
 933    dp[0] = t0;
 934    dp[1] = t1;
 935    dp[2] = t2;
 936    dp[3] = t3;
 937    dp[4] = t4;
 938    dp[5] = t5;
 939    dp[6] = t6;
 940    dp[7] = t7;
 941    dp += 8;
 942    i += 4;
 943  }
 944
 945  sp = (mlib_u8*)sa;
 946
 947  if ( i < xsize ) {
 948    *dp++ = table[2*sp[0]];
 949    *dp++ = table[2*sp[0] + 1];
 950    i++; sp++;
 951  }
 952
 953  if ( i < xsize ) {
 954    *dp++ = table[2*sp[0]];
 955    *dp++ = table[2*sp[0] + 1];
 956    i++; sp++;
 957  }
 958
 959  if ( i < xsize ) {
 960    *dp++ = table[2*sp[0]];
 961    *dp++ = table[2*sp[0] + 1];
 962  }
 963}
 964
 965/***************************************************************/
 966void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8  *src,
 967                                               mlib_s32       *dst,
 968                                               mlib_s32       xsize,
 969                                               const mlib_d64 *table)
 970{
 971  mlib_u32 *sa;                /* aligned pointer to source data */
 972  mlib_u8  *sp;                /* pointer to source data */
 973  mlib_u32 s0;                 /* source data */
 974  mlib_s32 *dl;                /* pointer to start of destination */
 975  mlib_d64 *dp;                /* aligned pointer to destination */
 976  mlib_d64 t0, t1, t2, t3;     /* destination data */
 977  mlib_d64 t4, t5, t6, t7, t8; /* destination data */
 978  mlib_s32 i;                  /* loop variable */
 979  mlib_u32 s00, s01, s02, s03;
 980
 981  sa = (mlib_u32*)src;
 982  dl = dst;
 983  dp   = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
 984  vis_alignaddr(dp, 4);
 985  s0 = *sa++;
 986  s00 = (s0 >> 20) & 0xFF0;
 987  t0 = *(mlib_d64*)((mlib_u8*)table + s00);
 988  t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
 989  *(mlib_f32*)dl = vis_read_hi(t0);
 990  dp[0] = vis_faligndata(t0, t1);
 991  t0 = t1;
 992  xsize--; dp++;
 993  sp = (mlib_u8*)sa - 3;
 994
 995  if (xsize >= 3) {
 996    s01 = (s0 >> 12) & 0xFF0;
 997    s02 = (s0 >> 4) & 0xFF0;
 998    s03 = (s0 << 4) & 0xFF0;
 999    t1 = *(mlib_d64*)((mlib_u8*)table + s01);
1000    t2 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
1001    t3 = *(mlib_d64*)((mlib_u8*)table + s02);
1002    t4 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
1003    t5 = *(mlib_d64*)((mlib_u8*)table + s03);
1004    t6 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
1005    dp[0] = vis_faligndata(t0, t1);
1006    dp[1] = vis_faligndata(t1, t2);
1007    dp[2] = vis_faligndata(t2, t3);
1008    dp[3] = vis_faligndata(t3, t4);
1009    dp[4] = vis_faligndata(t4, t5);
1010    dp[5] = vis_faligndata(t5, t6);
1011    t0 = t6; dp += 6; xsize -= 3;
1012    sp = (mlib_u8*)sa;
1013  }
1014
1015  i = 0;
1016
1017  if (xsize >= 4) {
1018
1019    s0 = *sa++;
1020    s00 = (s0 >> 20) & 0xFF0;
1021    s01 = (s0 >> 12) & 0xFF0;
1022
1023#pragma pipeloop(0)
1024    for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
1025      s02 = (s0 >> 4) & 0xFF0;
1026      s03 = (s0 << 4) & 0xFF0;
1027      t1 = *(mlib_d64*)((mlib_u8*)table + s00);
1028      t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
1029      t3 = *(mlib_d64*)((mlib_u8*)table + s01);
1030      t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
1031      t5 = *(mlib_d64*)((mlib_u8*)table + s02);
1032      t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
1033      t7 = *(mlib_d64*)((mlib_u8*)table + s03);
1034      t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
1035      s0 = *sa++;
1036      s00 = (s0 >> 20) & 0xFF0;
1037      s01 = (s0 >> 12) & 0xFF0;
1038      dp[0] = vis_faligndata(t0, t1);
1039      dp[1] = vis_faligndata(t1, t2);
1040      dp[2] = vis_faligndata(t2, t3);
1041      dp[3] = vis_faligndata(t3, t4);
1042      dp[4] = vis_faligndata(t4, t5);
1043      dp[5] = vis_faligndata(t5, t6);
1044      dp[6] = vis_faligndata(t6, t7);
1045      dp[7] = vis_faligndata(t7, t8);
1046      t0 = t8;
1047    }
1048
1049    s02 = (s0 >> 4) & 0xFF0;
1050    s03 = (s0 << 4) & 0xFF0;
1051    t1 = *(mlib_d64*)((mlib_u8*)table + s00);
1052    t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
1053    t3 = *(mlib_d64*)((mlib_u8*)table + s01);
1054    t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
1055    t5 = *(mlib_d64*)((mlib_u8*)table + s02);
1056    t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
1057    t7 = *(mlib_d64*)((mlib_u8*)table + s03);
1058    t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
1059    dp[0] = vis_faligndata(t0, t1);
1060    dp[1] = vis_faligndata(t1, t2);
1061    dp[2] = vis_faligndata(t2, t3);
1062    dp[3] = vis_faligndata(t3, t4);
1063    dp[4] = vis_faligndata(t4, t5);
1064    dp[5] = vis_faligndata(t5, t6);
1065    dp[6] = vis_faligndata(t6, t7);
1066    dp[7] = vis_faligndata(t7, t8);
1067    t0 = t8;
1068    dp += 8;
1069    i += 4;
1070    sp = (mlib_u8*)sa;
1071  }
1072
1073  if ( i < xsize ) {
1074    t1 = table[2*sp[0]];
1075    t2 = table[2*sp[0] + 1];
1076    *dp++ = vis_faligndata(t0, t1);
1077    *dp++ = vis_faligndata(t1, t2);
1078    i++; sp++;
1079    t0 = t2;
1080  }
1081
1082  if ( i < xsize ) {
1083    t1 = table[2*sp[0]];
1084    t2 = table[2*sp[0] + 1];
1085    *dp++ = vis_faligndata(t0, t1);
1086    *dp++ = vis_faligndata(t1, t2);
1087    i++; sp++;
1088    t0 = t2;
1089  }
1090
1091  if ( i < xsize ) {
1092    t1 = table[2*sp[0]];
1093    t2 = table[2*sp[0] + 1];
1094    *dp++ = vis_faligndata(t0, t1);
1095    *dp++ = vis_faligndata(t1, t2);
1096    t0 = t2;
1097  }
1098
1099  *(mlib_f32*) dp = vis_read_lo(t0);
1100}
1101
1102/***************************************************************/
1103void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8  *src,
1104                                         mlib_s32       *dst,
1105                                         mlib_s32       xsize,
1106                                         const mlib_s32 **table)
1107{
1108  mlib_u32 *sa;          /* aligned pointer to source data */
1109  mlib_u8  *sp;          /* pointer to source data */
1110  mlib_u32 s0;           /* source data */
1111  mlib_f32 *dp;          /* aligned pointer to destination */
1112  mlib_f32 acc0, acc1;   /* destination data */
1113  mlib_f32 acc2, acc3;   /* destination data */
1114  mlib_f32 acc4, acc5;   /* destination data */
1115  mlib_f32 acc6, acc7;   /* destination data */
1116  mlib_f32 acc8, acc9;   /* destination data */
1117  mlib_f32 acc10, acc11; /* destination data */
1118  mlib_f32 acc12, acc13; /* destination data */
1119  mlib_f32 acc14, acc15; /* destination data */
1120  mlib_f32 *table0 = (mlib_f32*)table[0];
1121  mlib_f32 *table1 = (mlib_f32*)table[1];
1122  mlib_f32 *table2 = (mlib_f32*)table[2];
1123  mlib_f32 *table3 = (mlib_f32*)table[3];
1124  mlib_s32 i;            /* loop variable */
1125  mlib_u32 s00, s01, s02, s03;
1126
1127  sa   = (mlib_u32*)src;
1128  dp   = (mlib_f32*)dst;
1129
1130  i = 0;
1131
1132  if (xsize >= 4) {
1133
1134    s0 = *sa++;
1135    s00 = (s0 >> 22) & 0x3FC;
1136    s01 = (s0 >> 14) & 0x3FC;
1137
1138#pragma pipeloop(0)
1139    for(i = 0; i <= xsize - 8; i+=4, dp += 16) {
1140      s02 = (s0 >> 6) & 0x3FC;
1141      s03 = (s0 << 2) & 0x3FC;
1142      acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
1143      acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
1144      acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
1145      acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
1146      acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
1147      acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
1148      acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
1149      acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
1150      acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
1151      acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
1152      acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
1153      acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
1154      acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
1155      acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
1156      acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
1157      acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
1158      s0 = *sa++;
1159      s00 = (s0 >> 22) & 0x3FC;
1160      s01 = (s0 >> 14) & 0x3FC;
1161      dp[0] = acc0;
1162      dp[1] = acc1;
1163      dp[2] = acc2;
1164      dp[3] = acc3;
1165      dp[4] = acc4;
1166      dp[5] = acc5;
1167      dp[6] = acc6;
1168      dp[7] = acc7;
1169      dp[8] = acc8;
1170      dp[9] = acc9;
1171      dp[10] = acc10;
1172      dp[11] = acc11;
1173      dp[12] = acc12;
1174      dp[13] = acc13;
1175      dp[14] = acc14;
1176      dp[15] = acc15;
1177    }
1178
1179    s02 = (s0 >> 6) & 0x3FC;
1180    s03 = (s0 << 2) & 0x3FC;
1181    acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
1182    acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
1183    acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
1184    acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
1185    acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
1186    acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
1187    acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
1188    acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
1189    acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
1190    acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
1191    acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
1192    acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
1193    acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
1194    acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
1195    acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
1196    acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
1197    dp[0] = acc0;
1198    dp[1] = acc1;
1199    dp[2] = acc2;
1200    dp[3] = acc3;
1201    dp[4] = acc4;
1202    dp[5] = acc5;
1203    dp[6] = acc6;
1204    dp[7] = acc7;
1205    dp[8] = acc8;
1206    dp[9] = acc9;
1207    dp[10] = acc10;
1208    dp[11] = acc11;
1209    dp[12] = acc12;
1210    dp[13] = acc13;
1211    dp[14] = acc14;
1212    dp[15] = acc15;
1213    dp += 16;
1214    i += 4;
1215  }
1216
1217  sp = (mlib_u8*)sa;
1218
1219  if ( i < xsize ) {
1220    *dp++ = table0[sp[0]];
1221    *dp++ = table1[sp[0]];
1222    *dp++ = table2[sp[0]];
1223    *dp++ = table3[sp[0]];
1224    i++; sp++;
1225  }
1226
1227  if ( i < xsize ) {
1228    *dp++ = table0[sp[0]];
1229    *dp++ = table1[sp[0]];
1230    *dp++ = table2[sp[0]];
1231    *dp++ = table3[sp[0]];
1232    i++; sp++;
1233  }
1234
1235  if ( i < xsize ) {
1236    *dp++ = table0[sp[0]];
1237    *dp++ = table1[sp[0]];
1238    *dp++ = table2[sp[0]];
1239    *dp++ = table3[sp[0]];
1240  }
1241}
1242
1243/***************************************************************/
1244void mlib_v_ImageLookUpSI_U8_S32_4(const mlib_u8  *src,
1245                                   mlib_s32       slb,
1246                                   mlib_s32       *dst,
1247                                   mlib_s32       dlb,
1248                                   mlib_s32       xsize,
1249                                   mlib_s32       ysize,
1250                                   const mlib_s32 **table)
1251{
1252  if ((xsize * ysize) < 600) {
1253    mlib_u8  *sl;
1254    mlib_s32 *dl;
1255    mlib_s32 j, i;
1256    const mlib_s32 *tab0 = table[0];
1257    const mlib_s32 *tab1 = table[1];
1258    const mlib_s32 *tab2 = table[2];
1259    const mlib_s32 *tab3 = table[3];
1260
1261    sl = (void *)src;
1262    dl = dst;
1263
1264    /* row loop */
1265    for (j = 0; j < ysize; j ++) {
1266      mlib_u8  *sp = sl;
1267      mlib_s32 *dp = dl;
1268      mlib_s32 off, size = xsize;
1269
1270      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
1271
1272      off = (off < size) ? off : size;
1273
1274      for (i = 0; i < off; i++) {
1275        *dp++ = tab0[sp[0]];
1276        *dp++ = tab1[sp[0]];
1277        *dp++ = tab2[sp[0]];
1278        *dp++ = tab3[sp[0]];
1279        size--; sp++;
1280      }
1281
1282      if (size > 0) {
1283        mlib_v_ImageLookUpSI_U8_S32_4_SMALL(sp, (mlib_s32*)dp, size, table);
1284      }
1285
1286      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1287      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
1288    }
1289
1290  } else {
1291    mlib_u8  *sl;
1292    mlib_s32 *dl;
1293    mlib_d64 dtab[512];
1294    mlib_u32 *tab;
1295    mlib_u32 *tab0 = (mlib_u32*)table[0];
1296    mlib_u32 *tab1 = (mlib_u32*)table[1];
1297    mlib_u32 *tab2 = (mlib_u32*)table[2];
1298    mlib_u32 *tab3 = (mlib_u32*)table[3];
1299    mlib_s32 i, j;
1300    mlib_u32 s0, s1, s2, s3;
1301
1302    tab = (mlib_u32*)dtab;
1303    s0 = tab0[0];
1304    s1 = tab1[0];
1305    s2 = tab2[0];
1306    s3 = tab3[0];
1307    for (i = 0; i < 255; i++) {
1308      tab[4*i] = s0;
1309      tab[4*i+1] = s1;
1310      tab[4*i+2] = s2;
1311      tab[4*i+3] = s3;
1312      s0 = tab0[i+1];
1313      s1 = tab1[i+1];
1314      s2 = tab2[i+1];
1315      s3 = tab3[i+1];
1316    }
1317
1318    tab[1020] = s0;
1319    tab[1021] = s1;
1320    tab[1022] = s2;
1321    tab[1023] = s3;
1322
1323    sl = (void *)src;
1324    dl = dst;
1325
1326    /* row loop */
1327    for (j = 0; j < ysize; j ++) {
1328      mlib_u8  *sp = sl;
1329      mlib_u32 *dp = (mlib_u32*)dl;
1330      mlib_s32 off, size = xsize;
1331
1332      off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
1333
1334      off = (off < size) ? off : size;
1335
1336#pragma pipeloop(0)
1337      for (i = 0; i < off; i++) {
1338        dp[0] = tab0[sp[0]];
1339        dp[1] = tab1[sp[0]];
1340        dp[2] = tab2[sp[0]];
1341        dp[3] = tab3[sp[0]];
1342        dp += 4; sp++;
1343      }
1344
1345      size -= off;
1346
1347      if (size > 0) {
1348        if (((mlib_addr)dp & 7) == 0) {
1349          mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
1350        } else {
1351          mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
1352        }
1353      }
1354
1355      sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1356      dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
1357    }
1358  }
1359}
1360
1361/***************************************************************/