h264bsd_reconstruct.c - This C code implements a function f…

/H264Dec/source/h264bsd_reconstruct.c

http://github.com/mbebenita/Broadway · C · 2315 lines · 1666 code · 259 blank · 390 comment · 110 complexity · a243c534b0997850d53f65736fd8ffbe MD5 · raw file
Large files are truncated click here to view the full file

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*------------------------------------------------------------------------------

    Table of contents

     1. Include headers
     2. External compiler flags
     3. Module defines
     4. Local function prototypes
     5. Functions

------------------------------------------------------------------------------*/

/*------------------------------------------------------------------------------
    1. Include headers
------------------------------------------------------------------------------*/

#include "basetype.h"
#include "h264bsd_reconstruct.h"
#include "h264bsd_macroblock_layer.h"
#include "h264bsd_image.h"
#include "h264bsd_util.h"

#ifdef H264DEC_OMXDL
#include "omxtypes.h"
#include "omxVC.h"
#include "armVC.h"
#endif /* H264DEC_OMXDL */

/*------------------------------------------------------------------------------
    2. External compiler flags
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
    3. Module defines
------------------------------------------------------------------------------*/

/* Switch off the following Lint messages for this file:
 * Info 701: Shift left of signed quantity (int)
 * Info 702: Shift right of signed quantity (int)
 */
/*lint -e701 -e702 */

/* Luma fractional-sample positions
 *
 *  G a b c H
 *  d e f g
 *  h i j k m
 *  n p q r
 *  M   s   N
 *
 *  G, H, M and N are integer sample positions
 *  a-s are fractional samples that need to be interpolated.
 */
#ifndef H264DEC_OMXDL
static const u32 lumaFracPos[4][4] = {
  /* G  d  h  n    a  e  i  p    b  f  j   q     c   g   k   r */
    {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}};
#endif /* H264DEC_OMXDL */

/* clipping table, defined in h264bsd_intra_prediction.c */
extern const u8 h264bsdClip[];

/*------------------------------------------------------------------------------
    4. Local function prototypes
------------------------------------------------------------------------------*/

#ifndef H264DEC_OMXDL

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateChromaHor

        Functional description:
          This function performs chroma interpolation in horizontal direction.
          Overfilling is done only if needed. Reference image (pRef) is
          read at correct position and the predicted part is written to
          macroblock's chrominance (predPartChroma)
        Inputs:
          pRef              pointer to reference frame Cb top-left corner
          x0                integer x-coordinate for prediction
          y0                integer y-coordinate for prediction
          width             width of the reference frame chrominance in pixels
          height            height of the reference frame chrominance in pixels
          xFrac             horizontal fraction for prediction in 1/8 pixels
          chromaPartWidth   width of the predicted part in pixels
          chromaPartHeight  height of the predicted part in pixels
        Outputs:
          predPartChroma    pointer where predicted part is written

------------------------------------------------------------------------------*/
#ifndef H264DEC_ARM11
void h264bsdInterpolateChromaHor(
  u8 *pRef,
  u8 *predPartChroma,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 xFrac,
  u32 chromaPartWidth,
  u32 chromaPartHeight)
{

/* Variables */

    u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val;
    u8 *ptrA, *cbr;
    u32 comp;
    u8 block[9*8*2];

/* Code */

    ASSERT(predPartChroma);
    ASSERT(chromaPartWidth);
    ASSERT(chromaPartHeight);
    ASSERT(xFrac < 8);
    ASSERT(pRef);

    if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
        (y0 < 0) || ((u32)y0+chromaPartHeight > height))
    {
        h264bsdFillBlock(pRef, block, x0, y0, width, height,
            chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1);
        pRef += width * height;
        h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight,
            x0, y0, width, height, chromaPartWidth + 1,
            chromaPartHeight, chromaPartWidth + 1);

        pRef = block;
        x0 = 0;
        y0 = 0;
        width = chromaPartWidth+1;
        height = chromaPartHeight;
    }

    val = 8 - xFrac;

    for (comp = 0; comp <= 1; comp++)
    {

        ptrA = pRef + (comp * height + (u32)y0) * width + x0;
        cbr = predPartChroma + comp * 8 * 8;

        /* 2x2 pels per iteration
         * bilinear horizontal interpolation */
        for (y = (chromaPartHeight >> 1); y; y--)
        {
            for (x = (chromaPartWidth >> 1); x; x--)
            {
                tmp1 = ptrA[width];
                tmp2 = *ptrA++;
                tmp3 = ptrA[width];
                tmp4 = *ptrA++;
                c = ((val * tmp1 + xFrac * tmp3) << 3) + 32;
                c >>= 6;
                cbr[8] = (u8)c;
                c = ((val * tmp2 + xFrac * tmp4) << 3) + 32;
                c >>= 6;
                *cbr++ = (u8)c;
                tmp1 = ptrA[width];
                tmp2 = *ptrA;
                c = ((val * tmp3 + xFrac * tmp1) << 3) + 32;
                c >>= 6;
                cbr[8] = (u8)c;
                c = ((val * tmp4 + xFrac * tmp2) << 3) + 32;
                c >>= 6;
                *cbr++ = (u8)c;
            }
            cbr += 2*8 - chromaPartWidth;
            ptrA += 2*width - chromaPartWidth;
        }
    }

}

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateChromaVer

        Functional description:
          This function performs chroma interpolation in vertical direction.
          Overfilling is done only if needed. Reference image (pRef) is
          read at correct position and the predicted part is written to
          macroblock's chrominance (predPartChroma)

------------------------------------------------------------------------------*/

void h264bsdInterpolateChromaVer(
  u8 *pRef,
  u8 *predPartChroma,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 yFrac,
  u32 chromaPartWidth,
  u32 chromaPartHeight)
{

/* Variables */

    u32 x, y, tmp1, tmp2, tmp3, c, val;
    u8 *ptrA, *cbr;
    u32 comp;
    u8 block[9*8*2];

/* Code */

    ASSERT(predPartChroma);
    ASSERT(chromaPartWidth);
    ASSERT(chromaPartHeight);
    ASSERT(yFrac < 8);
    ASSERT(pRef);

    if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) ||
        (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
    {
        h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth,
            chromaPartHeight + 1, chromaPartWidth);
        pRef += width * height;
        h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1),
            x0, y0, width, height, chromaPartWidth,
            chromaPartHeight + 1, chromaPartWidth);

        pRef = block;
        x0 = 0;
        y0 = 0;
        width = chromaPartWidth;
        height = chromaPartHeight+1;
    }

    val = 8 - yFrac;

    for (comp = 0; comp <= 1; comp++)
    {

        ptrA = pRef + (comp * height + (u32)y0) * width + x0;
        cbr = predPartChroma + comp * 8 * 8;

        /* 2x2 pels per iteration
         * bilinear vertical interpolation */
        for (y = (chromaPartHeight >> 1); y; y--)
        {
            for (x = (chromaPartWidth >> 1); x; x--)
            {
                tmp3 = ptrA[width*2];
                tmp2 = ptrA[width];
                tmp1 = *ptrA++;
                c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
                c >>= 6;
                cbr[8] = (u8)c;
                c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
                c >>= 6;
                *cbr++ = (u8)c;
                tmp3 = ptrA[width*2];
                tmp2 = ptrA[width];
                tmp1 = *ptrA++;
                c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
                c >>= 6;
                cbr[8] = (u8)c;
                c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
                c >>= 6;
                *cbr++ = (u8)c;
            }
            cbr += 2*8 - chromaPartWidth;
            ptrA += 2*width - chromaPartWidth;
        }
    }

}
#endif
/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateChromaHorVer

        Functional description:
          This function performs chroma interpolation in horizontal and
          vertical direction. Overfilling is done only if needed. Reference
          image (ref) is read at correct position and the predicted part
          is written to macroblock's chrominance (predPartChroma)

------------------------------------------------------------------------------*/

void h264bsdInterpolateChromaHorVer(
  u8 *ref,
  u8 *predPartChroma,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 xFrac,
  u32 yFrac,
  u32 chromaPartWidth,
  u32 chromaPartHeight)
{
    u8 block[9*9*2];
    u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32;
    u32 comp;
    u8 *ptrA, *cbr;

/* Code */

    ASSERT(predPartChroma);
    ASSERT(chromaPartWidth);
    ASSERT(chromaPartHeight);
    ASSERT(xFrac < 8);
    ASSERT(yFrac < 8);
    ASSERT(ref);

    if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
        (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
    {
        h264bsdFillBlock(ref, block, x0, y0, width, height,
            chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1);
        ref += width * height;
        h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1),
            x0, y0, width, height, chromaPartWidth + 1,
            chromaPartHeight + 1, chromaPartWidth + 1);

        ref = block;
        x0 = 0;
        y0 = 0;
        width = chromaPartWidth+1;
        height = chromaPartHeight+1;
    }

    valX = 8 - xFrac;
    valY = 8 - yFrac;

    for (comp = 0; comp <= 1; comp++)
    {

        ptrA = ref + (comp * height + (u32)y0) * width + x0;
        cbr = predPartChroma + comp * 8 * 8;

        /* 2x2 pels per iteration
         * bilinear vertical and horizontal interpolation */
        for (y = (chromaPartHeight >> 1); y; y--)
        {
            tmp1 = *ptrA;
            tmp3 = ptrA[width];
            tmp5 = ptrA[width*2];
            tmp1 *= valY;
            tmp1 += tmp3 * yFrac;
            tmp3 *= valY;
            tmp3 += tmp5 * yFrac;
            for (x = (chromaPartWidth >> 1); x; x--)
            {
                tmp2 = *++ptrA;
                tmp4 = ptrA[width];
                tmp6 = ptrA[width*2];
                tmp2 *= valY;
                tmp2 += tmp4 * yFrac;
                tmp4 *= valY;
                tmp4 += tmp6 * yFrac;
                tmp1 = tmp1 * valX + plus32;
                tmp3 = tmp3 * valX + plus32;
                tmp1 += tmp2 * xFrac;
                tmp1 >>= 6;
                tmp3 += tmp4 * xFrac;
                tmp3 >>= 6;
                cbr[8] = (u8)tmp3;
                *cbr++ = (u8)tmp1;

                tmp1 = *++ptrA;
                tmp3 = ptrA[width];
                tmp5 = ptrA[width*2];
                tmp1 *= valY;
                tmp1 += tmp3 * yFrac;
                tmp3 *= valY;
                tmp3 += tmp5 * yFrac;
                tmp2 = tmp2 * valX + plus32;
                tmp4 = tmp4 * valX + plus32;
                tmp2 += tmp1 * xFrac;
                tmp2 >>= 6;
                tmp4 += tmp3 * xFrac;
                tmp4 >>= 6;
                cbr[8] = (u8)tmp4;
                *cbr++ = (u8)tmp2;
            }
            cbr += 2*8 - chromaPartWidth;
            ptrA += 2*width - chromaPartWidth;
        }
    }

}

/*------------------------------------------------------------------------------

    Function: PredictChroma

        Functional description:
          Top level chroma prediction function that calls the appropriate
          interpolation function. The output is written to macroblock array.

------------------------------------------------------------------------------*/

static void PredictChroma(
  u8 *mbPartChroma,
  u32 xAL,
  u32 yAL,
  u32 partWidth,
  u32 partHeight,
  mv_t *mv,
  image_t *refPic)
{

/* Variables */

    u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight;
    i32 xInt, yInt;
    u8 *ref;

/* Code */

    ASSERT(mv);
    ASSERT(refPic);
    ASSERT(refPic->data);
    ASSERT(refPic->width);
    ASSERT(refPic->height);

    width  = 8 * refPic->width;
    height = 8 * refPic->height;

    xInt = (xAL >> 1) + (mv->hor >> 3);
    yInt = (yAL >> 1) + (mv->ver >> 3);
    xFrac = mv->hor & 0x7;
    yFrac = mv->ver & 0x7;

    chromaPartWidth  = partWidth >> 1;
    chromaPartHeight = partHeight >> 1;
    ref = refPic->data + 256 * refPic->width * refPic->height;

    if (xFrac && yFrac)
    {
        h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width,
                height, xFrac, yFrac, chromaPartWidth, chromaPartHeight);
    }
    else if (xFrac)
    {
        h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width,
                height, xFrac, chromaPartWidth, chromaPartHeight);
    }
    else if (yFrac)
    {
        h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width,
                height, yFrac, chromaPartWidth, chromaPartHeight);
    }
    else
    {
        h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height,
            chromaPartWidth, chromaPartHeight, 8);
        ref += width * height;
        h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height,
            chromaPartWidth, chromaPartHeight, 8);
    }

}


/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateVerHalf

        Functional description:
          Function to perform vertical interpolation of pixel position 'h'
          for a block. Overfilling is done only if needed. Reference
          image (ref) is read at correct position and the predicted part
          is written to macroblock array (mb)

------------------------------------------------------------------------------*/
#ifndef H264DEC_ARM11
void h264bsdInterpolateVerHalf(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight)
{
    u32 p1[21*21/4+1];
    u32 i, j;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    u8 *ptrC, *ptrV;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth, partHeight+5, partWidth);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth;
    }

    ref += (u32)y0 * width + (u32)x0;

    ptrC = ref + width;
    ptrV = ptrC + 5*width;

    /* 4 pixels per iteration, interpolate using 5 vertical samples */
    for (i = (partHeight >> 2); i; i--)
    {
        /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
        for (j = partWidth; j; j--)
        {
            tmp4 = ptrV[-(i32)width*2];
            tmp5 = ptrV[-(i32)width];
            tmp1 = ptrV[width];
            tmp2 = ptrV[width*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp2 += 16;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[width*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp2 += tmp3;
            tmp2 = clp[tmp2>>5];
            tmp1 += 16;
            mb[48] = (u8)tmp2;

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[width];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp1 += tmp2;
            tmp1 = clp[tmp1>>5];
            tmp6 += 16;
            mb[32] = (u8)tmp1;

            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp1 = *ptrC;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>5];
            tmp5 += 16;
            mb[16] = (u8)tmp6;

            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp6 = ptrC[-(i32)width];
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>5];
            *mb++ = (u8)tmp5;
            ptrC++;
        }
        ptrC += 4*width - partWidth;
        ptrV += 4*width - partWidth;
        mb += 4*16 - partWidth;
    }

}

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateVerQuarter

        Functional description:
          Function to perform vertical interpolation of pixel position 'd'
          or 'n' for a block. Overfilling is done only if needed. Reference
          image (ref) is read at correct position and the predicted part
          is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateVerQuarter(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight,
  u32 verOffset)    /* 0 for pixel d, 1 for pixel n */
{
    u32 p1[21*21/4+1];
    u32 i, j;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    u8 *ptrC, *ptrV, *ptrInt;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth, partHeight+5, partWidth);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth;
    }

    ref += (u32)y0 * width + (u32)x0;

    ptrC = ref + width;
    ptrV = ptrC + 5*width;

    /* Pointer to integer sample position, either M or R */
    ptrInt = ptrC + (2+verOffset)*width;

    /* 4 pixels per iteration
     * interpolate using 5 vertical samples and average between
     * interpolated value and integer sample value */
    for (i = (partHeight >> 2); i; i--)
    {
        /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
        for (j = partWidth; j; j--)
        {
            tmp4 = ptrV[-(i32)width*2];
            tmp5 = ptrV[-(i32)width];
            tmp1 = ptrV[width];
            tmp2 = ptrV[width*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp2 += 16;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[width*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp2 += tmp3;
            tmp2 = clp[tmp2>>5];
            tmp7 = ptrInt[width*2];
            tmp1 += 16;
            tmp2++;
            mb[48] = (u8)((tmp2 + tmp7) >> 1);

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[width];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp1 += tmp2;
            tmp1 = clp[tmp1>>5];
            tmp7 = ptrInt[width];
            tmp6 += 16;
            tmp1++;
            mb[32] = (u8)((tmp1 + tmp7) >> 1);

            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp1 = *ptrC;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>5];
            tmp7 = *ptrInt;
            tmp5 += 16;
            tmp6++;
            mb[16] = (u8)((tmp6 + tmp7) >> 1);

            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp6 = ptrC[-(i32)width];
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>5];
            tmp7 = ptrInt[-(i32)width];
            tmp5++;
            *mb++ = (u8)((tmp5 + tmp7) >> 1);
            ptrC++;
            ptrInt++;
        }
        ptrC += 4*width - partWidth;
        ptrV += 4*width - partWidth;
        ptrInt += 4*width - partWidth;
        mb += 4*16 - partWidth;
    }

}

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateHorHalf

        Functional description:
          Function to perform horizontal interpolation of pixel position 'b'
          for a block. Overfilling is done only if needed. Reference
          image (ref) is read at correct position and the predicted part
          is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateHorHalf(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight)
{
    u32 p1[21*21/4+1];
    u8 *ptrJ;
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);
    ASSERT((partWidth&0x3) == 0);
    ASSERT((partHeight&0x3) == 0);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth + 5;
    }

    ref += (u32)y0 * width + (u32)x0;

    ptrJ = ref + 5;

    for (y = partHeight; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);

        /* calculate 4 pels per iteration */
        for (x = (partWidth >> 2); x; x--)
        {
            /* First pixel */
            tmp6 += 16;
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>5];
            /* Second pixel */
            tmp5 += 16;
            tmp7 = tmp2 + tmp3;
            *mb++ = (u8)tmp6;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>5];
            /* Third pixel */
            tmp4 += 16;
            tmp7 = tmp1 + tmp2;
            *mb++ = (u8)tmp5;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp4 += tmp5;
            tmp4 = clp[tmp4>>5];
            /* Fourth pixel */
            tmp3 += 16;
            tmp7 = tmp6 + tmp1;
            *mb++ = (u8)tmp4;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp3 += tmp4;
            tmp3 = clp[tmp3>>5];
            tmp7 = tmp4;
            tmp4 = tmp6;
            tmp6 = tmp2;
            tmp2 = tmp7;
            *mb++ = (u8)tmp3;
            tmp3 = tmp5;
            tmp5 = tmp1;
        }
        ptrJ += width - partWidth;
        mb += 16 - partWidth;
    }

}

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateHorQuarter

        Functional description:
          Function to perform horizontal interpolation of pixel position 'a'
          or 'c' for a block. Overfilling is done only if needed. Reference
          image (ref) is read at correct position and the predicted part
          is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateHorQuarter(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight,
  u32 horOffset) /* 0 for pixel a, 1 for pixel c */
{
    u32 p1[21*21/4+1];
    u8 *ptrJ;
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth + 5;
    }

    ref += (u32)y0 * width + (u32)x0;

    ptrJ = ref + 5;

    for (y = partHeight; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);

        /* calculate 4 pels per iteration */
        for (x = (partWidth >> 2); x; x--)
        {
            /* First pixel */
            tmp6 += 16;
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>5];
            tmp5 += 16;
            if (!horOffset)
                tmp6 += tmp4;
            else
                tmp6 += tmp3;
            *mb++ = (u8)((tmp6 + 1) >> 1);
            /* Second pixel */
            tmp7 = tmp2 + tmp3;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>5];
            tmp4 += 16;
            if (!horOffset)
                tmp5 += tmp3;
            else
                tmp5 += tmp2;
            *mb++ = (u8)((tmp5 + 1) >> 1);
            /* Third pixel */
            tmp7 = tmp1 + tmp2;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp4 += tmp5;
            tmp4 = clp[tmp4>>5];
            tmp3 += 16;
            if (!horOffset)
                tmp4 += tmp2;
            else
                tmp4 += tmp1;
            *mb++ = (u8)((tmp4 + 1) >> 1);
            /* Fourth pixel */
            tmp7 = tmp6 + tmp1;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp3 += tmp4;
            tmp3 = clp[tmp3>>5];
            if (!horOffset)
                tmp3 += tmp1;
            else
                tmp3 += tmp6;
            *mb++ = (u8)((tmp3 + 1) >> 1);
            tmp3 = tmp5;
            tmp5 = tmp1;
            tmp7 = tmp4;
            tmp4 = tmp6;
            tmp6 = tmp2;
            tmp2 = tmp7;
        }
        ptrJ += width - partWidth;
        mb += 16 - partWidth;
    }

}

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateHorVerQuarter

        Functional description:
          Function to perform horizontal and vertical interpolation of pixel
          position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only
          if needed. Reference image (ref) is read at correct position and
          the predicted part is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateHorVerQuarter(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight,
  u32 horVerOffset) /* 0 for pixel e, 1 for pixel g,
                       2 for pixel p, 3 for pixel r */
{
    u32 p1[21*21/4+1];
    u8 *ptrC, *ptrJ, *ptrV;
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight+5, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth+5;
    }

    /* Ref points to G + (-2, -2) */
    ref += (u32)y0 * width + (u32)x0;

    /* ptrJ points to either J or Q, depending on vertical offset */
    ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5;

    /* ptrC points to either C or D, depending on horizontal offset */
    ptrC = ref + width + 2 + (horVerOffset & 0x1);

    for (y = partHeight; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);

        /* Horizontal interpolation, calculate 4 pels per iteration */
        for (x = (partWidth >> 2); x; x--)
        {
            /* First pixel */
            tmp6 += 16;
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>5];
            /* Second pixel */
            tmp5 += 16;
            tmp7 = tmp2 + tmp3;
            *mb++ = (u8)tmp6;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>5];
            /* Third pixel */
            tmp4 += 16;
            tmp7 = tmp1 + tmp2;
            *mb++ = (u8)tmp5;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp4 += tmp5;
            tmp4 = clp[tmp4>>5];
            /* Fourth pixel */
            tmp3 += 16;
            tmp7 = tmp6 + tmp1;
            *mb++ = (u8)tmp4;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp3 += tmp4;
            tmp3 = clp[tmp3>>5];
            tmp7 = tmp4;
            tmp4 = tmp6;
            tmp6 = tmp2;
            tmp2 = tmp7;
            *mb++ = (u8)tmp3;
            tmp3 = tmp5;
            tmp5 = tmp1;
        }
        ptrJ += width - partWidth;
        mb += 16 - partWidth;
    }

    mb -= 16*partHeight;
    ptrV = ptrC + 5*width;

    for (y = (partHeight >> 2); y; y--)
    {
        /* Vertical interpolation and averaging, 4 pels per iteration */
        for (x = partWidth; x; x--)
        {
            tmp4 = ptrV[-(i32)width*2];
            tmp5 = ptrV[-(i32)width];
            tmp1 = ptrV[width];
            tmp2 = ptrV[width*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp2 += 16;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[width*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp2 += tmp3;
            tmp7 = clp[tmp2>>5];
            tmp2 = mb[48];
            tmp1 += 16;
            tmp7++;
            mb[48] = (u8)((tmp2 + tmp7) >> 1);

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[width];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp1 += tmp2;
            tmp7 = clp[tmp1>>5];
            tmp1 = mb[32];
            tmp6 += 16;
            tmp7++;
            mb[32] = (u8)((tmp1 + tmp7) >> 1);

            tmp1 = *ptrC;
            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp6 += tmp1;
            tmp7 = clp[tmp6>>5];
            tmp6 = mb[16];
            tmp5 += 16;
            tmp7++;
            mb[16] = (u8)((tmp6 + tmp7) >> 1);

            tmp6 = ptrC[-(i32)width];
            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp5 += tmp6;
            tmp7 = clp[tmp5>>5];
            tmp5 = *mb;
            tmp7++;
            *mb++ = (u8)((tmp5 + tmp7) >> 1);
            ptrC++;

        }
        ptrC += 4*width - partWidth;
        ptrV += 4*width - partWidth;
        mb += 4*16 - partWidth;
    }

}
#endif

/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateMidHalf

        Functional description:
          Function to perform horizontal and vertical interpolation of pixel
          position 'j' for a block. Overfilling is done only if needed.
          Reference image (ref) is read at correct position and the predicted
          part is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateMidHalf(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight)
{
    u32 p1[21*21/4+1];
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    i32 *ptrC, *ptrV, *b1;
    u8  *ptrJ;
    i32 table[21*16];
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight+5, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth+5;
    }

    ref += (u32)y0 * width + (u32)x0;

    b1 = table;
    ptrJ = ref + 5;

    /* First step: calculate intermediate values for
     * horizontal interpolation */
    for (y = partHeight + 5; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);

        /* 4 pels per iteration */
        for (x = (partWidth >> 2); x; x--)
        {
            /* First pixel */
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp6 += tmp1;
            *b1++ = tmp6;
            /* Second pixel */
            tmp7 = tmp2 + tmp3;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp5 += tmp6;
            *b1++ = tmp5;
            /* Third pixel */
            tmp7 = tmp1 + tmp2;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp4 += tmp5;
            *b1++ = tmp4;
            /* Fourth pixel */
            tmp7 = tmp6 + tmp1;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp3 += tmp4;
            *b1++ = tmp3;
            tmp7 = tmp4;
            tmp4 = tmp6;
            tmp6 = tmp2;
            tmp2 = tmp7;
            tmp3 = tmp5;
            tmp5 = tmp1;
        }
        ptrJ += width - partWidth;
    }

    /* Second step: calculate vertical interpolation */
    ptrC = table + partWidth;
    ptrV = ptrC + 5*partWidth;
    for (y = (partHeight >> 2); y; y--)
    {
        /* 4 pels per iteration */
        for (x = partWidth; x; x--)
        {
            tmp4 = ptrV[-(i32)partWidth*2];
            tmp5 = ptrV[-(i32)partWidth];
            tmp1 = ptrV[partWidth];
            tmp2 = ptrV[partWidth*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp2 += 512;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[partWidth*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp2 += tmp3;
            tmp7 = clp[tmp2>>10];
            tmp1 += 512;
            mb[48] = (u8)tmp7;

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[partWidth];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp1 += tmp2;
            tmp7 = clp[tmp1>>10];
            tmp6 += 512;
            mb[32] = (u8)tmp7;

            tmp1 = *ptrC;
            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp6 += tmp1;
            tmp7 = clp[tmp6>>10];
            tmp5 += 512;
            mb[16] = (u8)tmp7;

            tmp6 = ptrC[-(i32)partWidth];
            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp5 += tmp6;
            tmp7 = clp[tmp5>>10];
            *mb++ = (u8)tmp7;
            ptrC++;
        }
        mb += 4*16 - partWidth;
        ptrC += 3*partWidth;
        ptrV += 3*partWidth;
    }

}


/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateMidVerQuarter

        Functional description:
          Function to perform horizontal and vertical interpolation of pixel
          position 'f' or 'q' for a block. Overfilling is done only if needed.
          Reference image (ref) is read at correct position and the predicted
          part is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateMidVerQuarter(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight,
  u32 verOffset)    /* 0 for pixel f, 1 for pixel q */
{
    u32 p1[21*21/4+1];
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    i32 *ptrC, *ptrV, *ptrInt, *b1;
    u8  *ptrJ;
    i32 table[21*16];
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight+5, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth+5;
    }

    ref += (u32)y0 * width + (u32)x0;

    b1 = table;
    ptrJ = ref + 5;

    /* First step: calculate intermediate values for
     * horizontal interpolation */
    for (y = partHeight + 5; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);
        for (x = (partWidth >> 2); x; x--)
        {
            /* First pixel */
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp6 += tmp1;
            *b1++ = tmp6;
            /* Second pixel */
            tmp7 = tmp2 + tmp3;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp5 += tmp6;
            *b1++ = tmp5;
            /* Third pixel */
            tmp7 = tmp1 + tmp2;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp4 += tmp5;
            *b1++ = tmp4;
            /* Fourth pixel */
            tmp7 = tmp6 + tmp1;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp3 += tmp4;
            *b1++ = tmp3;
            tmp7 = tmp4;
            tmp4 = tmp6;
            tmp6 = tmp2;
            tmp2 = tmp7;
            tmp3 = tmp5;
            tmp5 = tmp1;
        }
        ptrJ += width - partWidth;
    }

    /* Second step: calculate vertical interpolation and average */
    ptrC = table + partWidth;
    ptrV = ptrC + 5*partWidth;
    /* Pointer to integer sample position, either M or R */
    ptrInt = ptrC + (2+verOffset)*partWidth;
    for (y = (partHeight >> 2); y; y--)
    {
        for (x = partWidth; x; x--)
        {
            tmp4 = ptrV[-(i32)partWidth*2];
            tmp5 = ptrV[-(i32)partWidth];
            tmp1 = ptrV[partWidth];
            tmp2 = ptrV[partWidth*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp2 += 512;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[partWidth*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp7 = ptrInt[partWidth*2];
            tmp2 += tmp3;
            tmp2 = clp[tmp2>>10];
            tmp7 += 16;
            tmp7 = clp[tmp7>>5];
            tmp1 += 512;
            tmp2++;
            mb[48] = (u8)((tmp7 + tmp2) >> 1);

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[partWidth];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp7 = ptrInt[partWidth];
            tmp1 += tmp2;
            tmp1 = clp[tmp1>>10];
            tmp7 += 16;
            tmp7 = clp[tmp7>>5];
            tmp6 += 512;
            tmp1++;
            mb[32] = (u8)((tmp7 + tmp1) >> 1);

            tmp1 = *ptrC;
            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = *ptrInt;
            tmp6 += tmp1;
            tmp6 = clp[tmp6>>10];
            tmp7 += 16;
            tmp7 = clp[tmp7>>5];
            tmp5 += 512;
            tmp6++;
            mb[16] = (u8)((tmp7 + tmp6) >> 1);

            tmp6 = ptrC[-(i32)partWidth];
            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp7 = ptrInt[-(i32)partWidth];
            tmp5 += tmp6;
            tmp5 = clp[tmp5>>10];
            tmp7 += 16;
            tmp7 = clp[tmp7>>5];
            tmp5++;
            *mb++ = (u8)((tmp7 + tmp5) >> 1);
            ptrC++;
            ptrInt++;
        }
        mb += 4*16 - partWidth;
        ptrC += 3*partWidth;
        ptrV += 3*partWidth;
        ptrInt += 3*partWidth;
    }

}


/*------------------------------------------------------------------------------

    Function: h264bsdInterpolateMidHorQuarter

        Functional description:
          Function to perform horizontal and vertical interpolation of pixel
          position 'i' or 'k' for a block. Overfilling is done only if needed.
          Reference image (ref) is read at correct position and the predicted
          part is written to macroblock array (mb)

------------------------------------------------------------------------------*/

void h264bsdInterpolateMidHorQuarter(
  u8 *ref,
  u8 *mb,
  i32 x0,
  i32 y0,
  u32 width,
  u32 height,
  u32 partWidth,
  u32 partHeight,
  u32 horOffset)    /* 0 for pixel i, 1 for pixel k */
{
    u32 p1[21*21/4+1];
    u32 x, y;
    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    i32 *ptrJ, *ptrInt, *h1;
    u8  *ptrC, *ptrV;
    i32 table[21*16];
    i32 tableWidth = (i32)partWidth+5;
    const u8 *clp = h264bsdClip + 512;

    /* Code */

    ASSERT(ref);
    ASSERT(mb);

    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
        (y0 < 0) || ((u32)y0+partHeight+5 > height))
    {
        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
                partWidth+5, partHeight+5, partWidth+5);

        x0 = 0;
        y0 = 0;
        ref = (u8*)p1;
        width = partWidth+5;
    }

    ref += (u32)y0 * width + (u32)x0;

    h1 = table + tableWidth;
    ptrC = ref + width;
    ptrV = ptrC + 5*width;

    /* First step: calculate intermediate values for
     * vertical interpolation */
    for (y = (partHeight >> 2); y; y--)
    {
        for (x = (u32)tableWidth; x; x--)
        {
            tmp4 = ptrV[-(i32)width*2];
            tmp5 = ptrV[-(i32)width];
            tmp1 = ptrV[width];
            tmp2 = ptrV[width*2];
            tmp6 = *ptrV++;

            tmp7 = tmp4 + tmp1;
            tmp2 -= (tmp7 << 2);
            tmp2 -= tmp7;
            tmp7 = tmp5 + tmp6;
            tmp3 = ptrC[width*2];
            tmp2 += (tmp7 << 4);
            tmp2 += (tmp7 << 2);
            tmp2 += tmp3;
            h1[tableWidth*2] = tmp2;

            tmp7 = tmp3 + tmp6;
            tmp1 -= (tmp7 << 2);
            tmp1 -= tmp7;
            tmp7 = tmp4 + tmp5;
            tmp2 = ptrC[width];
            tmp1 += (tmp7 << 4);
            tmp1 += (tmp7 << 2);
            tmp1 += tmp2;
            h1[tableWidth] = tmp1;

            tmp1 = *ptrC;
            tmp7 = tmp2 + tmp5;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = tmp4 + tmp3;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp6 += tmp1;
            *h1 = tmp6;

            tmp6 = ptrC[-(i32)width];
            tmp1 += tmp4;
            tmp5 -= (tmp1 << 2);
            tmp5 -= tmp1;
            tmp3 += tmp2;
            tmp5 += (tmp3 << 4);
            tmp5 += (tmp3 << 2);
            tmp5 += tmp6;
            h1[-tableWidth] = tmp5;
            h1++;
            ptrC++;
        }
        ptrC += 4*width - partWidth - 5;
        ptrV += 4*width - partWidth - 5;
        h1 += 3*tableWidth;
    }

    /* Second step: calculate horizontal interpolation and average */
    ptrJ = table + 5;
    /* Pointer to integer sample position, either G or H */
    ptrInt = table + 2 + horOffset;
    for (y = partHeight; y; y--)
    {
        tmp6 = *(ptrJ - 5);
        tmp5 = *(ptrJ - 4);
        tmp4 = *(ptrJ - 3);
        tmp3 = *(ptrJ - 2);
        tmp2 = *(ptrJ - 1);
        for (x = (partWidth>>2); x; x--)
        {
            /* First pixel */
            tmp6 += 512;
            tmp7 = tmp3 + tmp4;
            tmp6 += (tmp7 << 4);
            tmp6 += (tmp7 << 2);
            tmp7 = tmp2 + tmp5;
            tmp1 = *ptrJ++;
            tmp6 -= (tmp7 << 2);
            tmp6 -= tmp7;
            tmp7 = *ptrInt++;
            tmp6 += tmp1;
            tmp6 = clp[tmp6 >> 10];
            tmp7 += 16;
            tmp7 = clp[tmp7 >> 5];
            tmp5 += 512;
            tmp6++;
            *mb++ = (u8)((tmp6 + tmp7) >> 1);
            /* Second pixel */
            tmp7 = tmp2 + tmp3;
            tmp5 += (tmp7 << 4);
            tmp5 += (tmp7 << 2);
            tmp7 = tmp1 + tmp4;
            tmp6 = *ptrJ++;
            tmp5 -= (tmp7 << 2);
            tmp5 -= tmp7;
            tmp7 = *ptrInt++;
            tmp5 += tmp6;
            tmp5 = clp[tmp5 >> 10];
            tmp7 += 16;
            tmp7 = clp[tmp7 >> 5];
            tmp4 += 512;
            tmp5++;
            *mb++ = (u8)((tmp5 + tmp7) >> 1);
            /* Third pixel */
            tmp7 = tmp1 + tmp2;
            tmp4 += (tmp7 << 4);
            tmp4 += (tmp7 << 2);
            tmp7 = tmp6 + tmp3;
            tmp5 = *ptrJ++;
            tmp4 -= (tmp7 << 2);
            tmp4 -= tmp7;
            tmp7 = *ptrInt++;
            tmp4 += tmp5;
            tmp4 = clp[tmp4 >> 10];
            tmp7 += 16;
            tmp7 = clp[tmp7 >> 5];
            tmp3 += 512;
            tmp4++;
            *mb++ = (u8)((tmp4 + tmp7) >> 1);
            /* Fourth pixel */
            tmp7 = tmp6 + tmp1;
            tmp3 += (tmp7 << 4);
            tmp3 += (tmp7 << 2);
            tmp7 = tmp5 + tmp2;
            tmp4 = *ptrJ++;
            tmp3 -= (tmp7 << 2);
            tmp3 -= tmp7;
            tmp7 = *ptrInt++;
            tmp3 += tmp4;
            tmp3 = clp[tmp3 >> 10];
            tmp7 += 16;
            tmp7 = clp[tmp7 >> 5];
            tmp3++;
            *mb++ = (u8)((tmp3 + tmp7) >> 1);
            tmp3 = tmp5;
            tmp5…
Summary ✨

This C code implements a function for filling blocks of reference frames with data from another frame, handling overfilling on edges and vertical lines within the block. It uses a pointer to a function (fp) that determines which row-filling function to use based on the block’s position in the frame. The h264bsdFillBlock function iterates through rows, calling the chosen function for each line, and handles edge cases where overfilling is necessary.
Alerts (22)

Complexity hotspot; lines 135 to 136 (total complexity: 3)
135 136
Complexity hotspot; lines 231 to 232 (total complexity: 3)
231 232
Complexity hotspot; lines 326 to 327 (total complexity: 3)
326 327
Complexity hotspot; lines 510 to 511 (total complexity: 3)
510 511
Complexity hotspot; lines 629 to 630 (total complexity: 3)
629 630
Complexity hotspot; lines 764 to 765 (total complexity: 3)
764 765
Complexity hotspot; lines 886 to 887 (total complexity: 3)
886 887
Complexity hotspot; lines 1025 to 1026 (total complexity: 3)
1025 1026
Complexity hotspot; lines 1231 to 1232 (total complexity: 3)
1231 1232
Complexity hotspot; lines 1418 to 1419 (total complexity: 3)
1418 1419
Complexity hotspot; lines 1623 to 1624 (total complexity: 3)
1623 1624