PageRenderTime 146ms CodeModel.GetById 13ms app.highlight 122ms RepoModel.GetById 1ms app.codeStats 0ms

/media/libvpx/vp8/encoder/mcomp.c

http://github.com/zpao/v8monkey
C | 2017 lines | 1587 code | 328 blank | 102 comment | 193 complexity | 9d17ed43bd5fc633103567d6787ee8aa MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3 *
   4 *  Use of this source code is governed by a BSD-style license
   5 *  that can be found in the LICENSE file in the root of the source
   6 *  tree. An additional intellectual property rights grant can be found
   7 *  in the file PATENTS.  All contributing project authors may
   8 *  be found in the AUTHORS file in the root of the source tree.
   9 */
  10
  11
  12#include "mcomp.h"
  13#include "vpx_mem/vpx_mem.h"
  14#include "vpx_ports/config.h"
  15#include <stdio.h>
  16#include <limits.h>
  17#include <math.h>
  18#include "vp8/common/findnearmv.h"
  19
  20#ifdef ENTROPY_STATS
  21static int mv_ref_ct [31] [4] [2];
  22static int mv_mode_cts [4] [2];
  23#endif
  24
  25int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
  26{
  27    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
  28    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
  29    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
  30    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
  31    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
  32}
  33
  34static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
  35{
  36    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
  37        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
  38        * error_per_bit + 128) >> 8;
  39}
  40
  41static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
  42{
  43    /* Calculate sad error cost on full pixel basis. */
  44    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
  45        mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
  46        * error_per_bit + 128) >> 8;
  47}
  48
  49void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
  50{
  51    int Len;
  52    int search_site_count = 0;
  53
  54
  55    // Generate offsets for 4 search sites per step.
  56    Len = MAX_FIRST_STEP;
  57    x->ss[search_site_count].mv.col = 0;
  58    x->ss[search_site_count].mv.row = 0;
  59    x->ss[search_site_count].offset = 0;
  60    search_site_count++;
  61
  62    while (Len > 0)
  63    {
  64
  65        // Compute offsets for search sites.
  66        x->ss[search_site_count].mv.col = 0;
  67        x->ss[search_site_count].mv.row = -Len;
  68        x->ss[search_site_count].offset = -Len * stride;
  69        search_site_count++;
  70
  71        // Compute offsets for search sites.
  72        x->ss[search_site_count].mv.col = 0;
  73        x->ss[search_site_count].mv.row = Len;
  74        x->ss[search_site_count].offset = Len * stride;
  75        search_site_count++;
  76
  77        // Compute offsets for search sites.
  78        x->ss[search_site_count].mv.col = -Len;
  79        x->ss[search_site_count].mv.row = 0;
  80        x->ss[search_site_count].offset = -Len;
  81        search_site_count++;
  82
  83        // Compute offsets for search sites.
  84        x->ss[search_site_count].mv.col = Len;
  85        x->ss[search_site_count].mv.row = 0;
  86        x->ss[search_site_count].offset = Len;
  87        search_site_count++;
  88
  89        // Contract.
  90        Len /= 2;
  91    }
  92
  93    x->ss_count = search_site_count;
  94    x->searches_per_step = 4;
  95}
  96
  97void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
  98{
  99    int Len;
 100    int search_site_count = 0;
 101
 102    // Generate offsets for 8 search sites per step.
 103    Len = MAX_FIRST_STEP;
 104    x->ss[search_site_count].mv.col = 0;
 105    x->ss[search_site_count].mv.row = 0;
 106    x->ss[search_site_count].offset = 0;
 107    search_site_count++;
 108
 109    while (Len > 0)
 110    {
 111
 112        // Compute offsets for search sites.
 113        x->ss[search_site_count].mv.col = 0;
 114        x->ss[search_site_count].mv.row = -Len;
 115        x->ss[search_site_count].offset = -Len * stride;
 116        search_site_count++;
 117
 118        // Compute offsets for search sites.
 119        x->ss[search_site_count].mv.col = 0;
 120        x->ss[search_site_count].mv.row = Len;
 121        x->ss[search_site_count].offset = Len * stride;
 122        search_site_count++;
 123
 124        // Compute offsets for search sites.
 125        x->ss[search_site_count].mv.col = -Len;
 126        x->ss[search_site_count].mv.row = 0;
 127        x->ss[search_site_count].offset = -Len;
 128        search_site_count++;
 129
 130        // Compute offsets for search sites.
 131        x->ss[search_site_count].mv.col = Len;
 132        x->ss[search_site_count].mv.row = 0;
 133        x->ss[search_site_count].offset = Len;
 134        search_site_count++;
 135
 136        // Compute offsets for search sites.
 137        x->ss[search_site_count].mv.col = -Len;
 138        x->ss[search_site_count].mv.row = -Len;
 139        x->ss[search_site_count].offset = -Len * stride - Len;
 140        search_site_count++;
 141
 142        // Compute offsets for search sites.
 143        x->ss[search_site_count].mv.col = Len;
 144        x->ss[search_site_count].mv.row = -Len;
 145        x->ss[search_site_count].offset = -Len * stride + Len;
 146        search_site_count++;
 147
 148        // Compute offsets for search sites.
 149        x->ss[search_site_count].mv.col = -Len;
 150        x->ss[search_site_count].mv.row = Len;
 151        x->ss[search_site_count].offset = Len * stride - Len;
 152        search_site_count++;
 153
 154        // Compute offsets for search sites.
 155        x->ss[search_site_count].mv.col = Len;
 156        x->ss[search_site_count].mv.row = Len;
 157        x->ss[search_site_count].offset = Len * stride + Len;
 158        search_site_count++;
 159
 160
 161        // Contract.
 162        Len /= 2;
 163    }
 164
 165    x->ss_count = search_site_count;
 166    x->searches_per_step = 8;
 167}
 168
 169/*
 170 * To avoid the penalty for crossing cache-line read, preload the reference
 171 * area in a small buffer, which is aligned to make sure there won't be crossing
 172 * cache-line read while reading from this buffer. This reduced the cpu
 173 * cycles spent on reading ref data in sub-pixel filter functions.
 174 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 175 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 176 * could reduce the area.
 177 */
 178#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
 179#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
 180#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
 181#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
 182#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
 183#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
 184#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
 185#define MIN(x,y) (((x)<(y))?(x):(y))
 186#define MAX(x,y) (((x)>(y))?(x):(y))
 187
 188int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 189                                             int_mv *bestmv, int_mv *ref_mv,
 190                                             int error_per_bit,
 191                                             const vp8_variance_fn_ptr_t *vfp,
 192                                             int *mvcost[2], int *distortion,
 193                                             unsigned int *sse1)
 194{
 195    unsigned char *z = (*(b->base_src) + b->src);
 196
 197    int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
 198    int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
 199    int tr = br, tc = bc;
 200    unsigned int besterr = INT_MAX;
 201    unsigned int left, right, up, down, diag;
 202    unsigned int sse;
 203    unsigned int whichdir;
 204    unsigned int halfiters = 4;
 205    unsigned int quarteriters = 4;
 206    int thismse;
 207
 208    int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
 209    int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
 210    int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
 211    int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
 212
 213    int y_stride;
 214    int offset;
 215
 216#if ARCH_X86 || ARCH_X86_64
 217    MACROBLOCKD *xd = &x->e_mbd;
 218    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 219    unsigned char *y;
 220    int buf_r1, buf_r2, buf_c1, buf_c2;
 221
 222    // Clamping to avoid out-of-range data access
 223    buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
 224    buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
 225    buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
 226    buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
 227    y_stride = 32;
 228
 229    /* Copy to intermediate buffer before searching. */
 230    vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
 231    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
 232#else
 233    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 234    y_stride = d->pre_stride;
 235#endif
 236
 237    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
 238
 239    // central mv
 240    bestmv->as_mv.row <<= 3;
 241    bestmv->as_mv.col <<= 3;
 242
 243    // calculate central point error
 244    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
 245    *distortion = besterr;
 246    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 247
 248    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
 249    while (--halfiters)
 250    {
 251        // 1/2 pel
 252        CHECK_BETTER(left, tr, tc - 2);
 253        CHECK_BETTER(right, tr, tc + 2);
 254        CHECK_BETTER(up, tr - 2, tc);
 255        CHECK_BETTER(down, tr + 2, tc);
 256
 257        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 258
 259        switch (whichdir)
 260        {
 261        case 0:
 262            CHECK_BETTER(diag, tr - 2, tc - 2);
 263            break;
 264        case 1:
 265            CHECK_BETTER(diag, tr - 2, tc + 2);
 266            break;
 267        case 2:
 268            CHECK_BETTER(diag, tr + 2, tc - 2);
 269            break;
 270        case 3:
 271            CHECK_BETTER(diag, tr + 2, tc + 2);
 272            break;
 273        }
 274
 275        // no reason to check the same one again.
 276        if (tr == br && tc == bc)
 277            break;
 278
 279        tr = br;
 280        tc = bc;
 281    }
 282
 283    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
 284    // 1/4 pel
 285    while (--quarteriters)
 286    {
 287        CHECK_BETTER(left, tr, tc - 1);
 288        CHECK_BETTER(right, tr, tc + 1);
 289        CHECK_BETTER(up, tr - 1, tc);
 290        CHECK_BETTER(down, tr + 1, tc);
 291
 292        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 293
 294        switch (whichdir)
 295        {
 296        case 0:
 297            CHECK_BETTER(diag, tr - 1, tc - 1);
 298            break;
 299        case 1:
 300            CHECK_BETTER(diag, tr - 1, tc + 1);
 301            break;
 302        case 2:
 303            CHECK_BETTER(diag, tr + 1, tc - 1);
 304            break;
 305        case 3:
 306            CHECK_BETTER(diag, tr + 1, tc + 1);
 307            break;
 308        }
 309
 310        // no reason to check the same one again.
 311        if (tr == br && tc == bc)
 312            break;
 313
 314        tr = br;
 315        tc = bc;
 316    }
 317
 318    bestmv->as_mv.row = br << 1;
 319    bestmv->as_mv.col = bc << 1;
 320
 321    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
 322        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
 323        return INT_MAX;
 324
 325    return besterr;
 326}
 327#undef MVC
 328#undef PRE
 329#undef SP
 330#undef DIST
 331#undef IFMVCV
 332#undef ERR
 333#undef CHECK_BETTER
 334#undef MIN
 335#undef MAX
 336int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 337                                 int_mv *bestmv, int_mv *ref_mv,
 338                                 int error_per_bit,
 339                                 const vp8_variance_fn_ptr_t *vfp,
 340                                 int *mvcost[2], int *distortion,
 341                                 unsigned int *sse1)
 342{
 343    int bestmse = INT_MAX;
 344    int_mv startmv;
 345    int_mv this_mv;
 346    unsigned char *z = (*(b->base_src) + b->src);
 347    int left, right, up, down, diag;
 348    unsigned int sse;
 349    int whichdir ;
 350    int thismse;
 351    int y_stride;
 352
 353#if ARCH_X86 || ARCH_X86_64
 354    MACROBLOCKD *xd = &x->e_mbd;
 355    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 356    unsigned char *y;
 357
 358    y_stride = 32;
 359    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
 360     vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
 361     y = xd->y_buf + y_stride + 1;
 362#else
 363     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 364     y_stride = d->pre_stride;
 365#endif
 366
 367    // central mv
 368    bestmv->as_mv.row <<= 3;
 369    bestmv->as_mv.col <<= 3;
 370    startmv = *bestmv;
 371
 372    // calculate central point error
 373    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
 374    *distortion = bestmse;
 375    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 376
 377    // go left then right and check error
 378    this_mv.as_mv.row = startmv.as_mv.row;
 379    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
 380    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
 381    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 382
 383    if (left < bestmse)
 384    {
 385        *bestmv = this_mv;
 386        bestmse = left;
 387        *distortion = thismse;
 388        *sse1 = sse;
 389    }
 390
 391    this_mv.as_mv.col += 8;
 392    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
 393    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 394
 395    if (right < bestmse)
 396    {
 397        *bestmv = this_mv;
 398        bestmse = right;
 399        *distortion = thismse;
 400        *sse1 = sse;
 401    }
 402
 403    // go up then down and check error
 404    this_mv.as_mv.col = startmv.as_mv.col;
 405    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
 406    thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
 407    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 408
 409    if (up < bestmse)
 410    {
 411        *bestmv = this_mv;
 412        bestmse = up;
 413        *distortion = thismse;
 414        *sse1 = sse;
 415    }
 416
 417    this_mv.as_mv.row += 8;
 418    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
 419    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 420
 421    if (down < bestmse)
 422    {
 423        *bestmv = this_mv;
 424        bestmse = down;
 425        *distortion = thismse;
 426        *sse1 = sse;
 427    }
 428
 429
 430    // now check 1 more diagonal
 431    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 432    //for(whichdir =0;whichdir<4;whichdir++)
 433    //{
 434    this_mv = startmv;
 435
 436    switch (whichdir)
 437    {
 438    case 0:
 439        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
 440        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
 441        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
 442        break;
 443    case 1:
 444        this_mv.as_mv.col += 4;
 445        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
 446        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
 447        break;
 448    case 2:
 449        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
 450        this_mv.as_mv.row += 4;
 451        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
 452        break;
 453    case 3:
 454    default:
 455        this_mv.as_mv.col += 4;
 456        this_mv.as_mv.row += 4;
 457        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
 458        break;
 459    }
 460
 461    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 462
 463    if (diag < bestmse)
 464    {
 465        *bestmv = this_mv;
 466        bestmse = diag;
 467        *distortion = thismse;
 468        *sse1 = sse;
 469    }
 470
 471//  }
 472
 473
 474    // time to check quarter pels.
 475    if (bestmv->as_mv.row < startmv.as_mv.row)
 476        y -= y_stride;
 477
 478    if (bestmv->as_mv.col < startmv.as_mv.col)
 479        y--;
 480
 481    startmv = *bestmv;
 482
 483
 484
 485    // go left then right and check error
 486    this_mv.as_mv.row = startmv.as_mv.row;
 487
 488    if (startmv.as_mv.col & 7)
 489    {
 490        this_mv.as_mv.col = startmv.as_mv.col - 2;
 491        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 492    }
 493    else
 494    {
 495        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
 496        thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 497    }
 498
 499    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 500
 501    if (left < bestmse)
 502    {
 503        *bestmv = this_mv;
 504        bestmse = left;
 505        *distortion = thismse;
 506        *sse1 = sse;
 507    }
 508
 509    this_mv.as_mv.col += 4;
 510    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 511    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 512
 513    if (right < bestmse)
 514    {
 515        *bestmv = this_mv;
 516        bestmse = right;
 517        *distortion = thismse;
 518        *sse1 = sse;
 519    }
 520
 521    // go up then down and check error
 522    this_mv.as_mv.col = startmv.as_mv.col;
 523
 524    if (startmv.as_mv.row & 7)
 525    {
 526        this_mv.as_mv.row = startmv.as_mv.row - 2;
 527        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 528    }
 529    else
 530    {
 531        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
 532        thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
 533    }
 534
 535    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 536
 537    if (up < bestmse)
 538    {
 539        *bestmv = this_mv;
 540        bestmse = up;
 541        *distortion = thismse;
 542        *sse1 = sse;
 543    }
 544
 545    this_mv.as_mv.row += 4;
 546    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 547    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 548
 549    if (down < bestmse)
 550    {
 551        *bestmv = this_mv;
 552        bestmse = down;
 553        *distortion = thismse;
 554        *sse1 = sse;
 555    }
 556
 557
 558    // now check 1 more diagonal
 559    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 560
 561//  for(whichdir=0;whichdir<4;whichdir++)
 562//  {
 563    this_mv = startmv;
 564
 565    switch (whichdir)
 566    {
 567    case 0:
 568
 569        if (startmv.as_mv.row & 7)
 570        {
 571            this_mv.as_mv.row -= 2;
 572
 573            if (startmv.as_mv.col & 7)
 574            {
 575                this_mv.as_mv.col -= 2;
 576                thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 577            }
 578            else
 579            {
 580                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
 581                thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
 582            }
 583        }
 584        else
 585        {
 586            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
 587
 588            if (startmv.as_mv.col & 7)
 589            {
 590                this_mv.as_mv.col -= 2;
 591                thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
 592            }
 593            else
 594            {
 595                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
 596                thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
 597            }
 598        }
 599
 600        break;
 601    case 1:
 602        this_mv.as_mv.col += 2;
 603
 604        if (startmv.as_mv.row & 7)
 605        {
 606            this_mv.as_mv.row -= 2;
 607            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 608        }
 609        else
 610        {
 611            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
 612            thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
 613        }
 614
 615        break;
 616    case 2:
 617        this_mv.as_mv.row += 2;
 618
 619        if (startmv.as_mv.col & 7)
 620        {
 621            this_mv.as_mv.col -= 2;
 622            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 623        }
 624        else
 625        {
 626            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
 627            thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 628        }
 629
 630        break;
 631    case 3:
 632        this_mv.as_mv.col += 2;
 633        this_mv.as_mv.row += 2;
 634        thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
 635        break;
 636    }
 637
 638    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 639
 640    if (diag < bestmse)
 641    {
 642        *bestmv = this_mv;
 643        bestmse = diag;
 644        *distortion = thismse;
 645        *sse1 = sse;
 646    }
 647
 648    return bestmse;
 649}
 650
 651int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 652                                  int_mv *bestmv, int_mv *ref_mv,
 653                                  int error_per_bit,
 654                                  const vp8_variance_fn_ptr_t *vfp,
 655                                  int *mvcost[2], int *distortion,
 656                                  unsigned int *sse1)
 657{
 658    int bestmse = INT_MAX;
 659    int_mv startmv;
 660    int_mv this_mv;
 661    unsigned char *z = (*(b->base_src) + b->src);
 662    int left, right, up, down, diag;
 663    unsigned int sse;
 664    int thismse;
 665    int y_stride;
 666
 667#if ARCH_X86 || ARCH_X86_64
 668    MACROBLOCKD *xd = &x->e_mbd;
 669    unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 670    unsigned char *y;
 671
 672    y_stride = 32;
 673    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
 674    vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
 675    y = xd->y_buf + y_stride + 1;
 676#else
 677    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
 678    y_stride = d->pre_stride;
 679#endif
 680
 681    // central mv
 682    bestmv->as_mv.row <<= 3;
 683    bestmv->as_mv.col <<= 3;
 684    startmv = *bestmv;
 685
 686    // calculate central point error
 687    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
 688    *distortion = bestmse;
 689    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 690
 691    // go left then right and check error
 692    this_mv.as_mv.row = startmv.as_mv.row;
 693    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
 694    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
 695    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 696
 697    if (left < bestmse)
 698    {
 699        *bestmv = this_mv;
 700        bestmse = left;
 701        *distortion = thismse;
 702        *sse1 = sse;
 703    }
 704
 705    this_mv.as_mv.col += 8;
 706    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
 707    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 708
 709    if (right < bestmse)
 710    {
 711        *bestmv = this_mv;
 712        bestmse = right;
 713        *distortion = thismse;
 714        *sse1 = sse;
 715    }
 716
 717    // go up then down and check error
 718    this_mv.as_mv.col = startmv.as_mv.col;
 719    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
 720    thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
 721    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 722
 723    if (up < bestmse)
 724    {
 725        *bestmv = this_mv;
 726        bestmse = up;
 727        *distortion = thismse;
 728        *sse1 = sse;
 729    }
 730
 731    this_mv.as_mv.row += 8;
 732    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
 733    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 734
 735    if (down < bestmse)
 736    {
 737        *bestmv = this_mv;
 738        bestmse = down;
 739        *distortion = thismse;
 740        *sse1 = sse;
 741    }
 742
 743    // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
 744#if 0
 745    // now check 1 more diagonal -
 746    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 747    this_mv = startmv;
 748
 749    switch (whichdir)
 750    {
 751    case 0:
 752        this_mv.col = (this_mv.col - 8) | 4;
 753        this_mv.row = (this_mv.row - 8) | 4;
 754        diag = vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
 755        break;
 756    case 1:
 757        this_mv.col += 4;
 758        this_mv.row = (this_mv.row - 8) | 4;
 759        diag = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
 760        break;
 761    case 2:
 762        this_mv.col = (this_mv.col - 8) | 4;
 763        this_mv.row += 4;
 764        diag = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
 765        break;
 766    case 3:
 767        this_mv.col += 4;
 768        this_mv.row += 4;
 769        diag = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
 770        break;
 771    }
 772
 773    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 774
 775    if (diag < bestmse)
 776    {
 777        *bestmv = this_mv;
 778        bestmse = diag;
 779    }
 780
 781#else
 782    this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
 783    this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
 784    thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
 785    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 786
 787    if (diag < bestmse)
 788    {
 789        *bestmv = this_mv;
 790        bestmse = diag;
 791        *distortion = thismse;
 792        *sse1 = sse;
 793    }
 794
 795    this_mv.as_mv.col += 8;
 796    thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
 797    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 798
 799    if (diag < bestmse)
 800    {
 801        *bestmv = this_mv;
 802        bestmse = diag;
 803        *distortion = thismse;
 804        *sse1 = sse;
 805    }
 806
 807    this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
 808    this_mv.as_mv.row = startmv.as_mv.row + 4;
 809    thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
 810    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 811
 812    if (diag < bestmse)
 813    {
 814        *bestmv = this_mv;
 815        bestmse = diag;
 816        *distortion = thismse;
 817        *sse1 = sse;
 818    }
 819
 820    this_mv.as_mv.col += 8;
 821    thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
 822    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 823
 824    if (diag < bestmse)
 825    {
 826        *bestmv = this_mv;
 827        bestmse = diag;
 828        *distortion = thismse;
 829        *sse1 = sse;
 830    }
 831
 832#endif
 833    return bestmse;
 834}
 835
 836#define CHECK_BOUNDS(range) \
 837{\
 838    all_in = 1;\
 839    all_in &= ((br-range) >= x->mv_row_min);\
 840    all_in &= ((br+range) <= x->mv_row_max);\
 841    all_in &= ((bc-range) >= x->mv_col_min);\
 842    all_in &= ((bc+range) <= x->mv_col_max);\
 843}
 844
 845#define CHECK_POINT \
 846{\
 847    if (this_mv.as_mv.col < x->mv_col_min) continue;\
 848    if (this_mv.as_mv.col > x->mv_col_max) continue;\
 849    if (this_mv.as_mv.row < x->mv_row_min) continue;\
 850    if (this_mv.as_mv.row > x->mv_row_max) continue;\
 851}
 852
 853#define CHECK_BETTER \
 854{\
 855    if (thissad < bestsad)\
 856    {\
 857        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
 858        if (thissad < bestsad)\
 859        {\
 860            bestsad = thissad;\
 861            best_site = i;\
 862        }\
 863    }\
 864}
 865
 866static const MV next_chkpts[6][3] =
 867{
 868    {{ -2, 0}, { -1, -2}, {1, -2}},
 869    {{ -1, -2}, {1, -2}, {2, 0}},
 870    {{1, -2}, {2, 0}, {1, 2}},
 871    {{2, 0}, {1, 2}, { -1, 2}},
 872    {{1, 2}, { -1, 2}, { -2, 0}},
 873    {{ -1, 2}, { -2, 0}, { -1, -2}}
 874};
 875
 876int vp8_hex_search
 877(
 878    MACROBLOCK *x,
 879    BLOCK *b,
 880    BLOCKD *d,
 881    int_mv *ref_mv,
 882    int_mv *best_mv,
 883    int search_param,
 884    int sad_per_bit,
 885    const vp8_variance_fn_ptr_t *vfp,
 886    int *mvsadcost[2],
 887    int *mvcost[2],
 888    int_mv *center_mv
 889)
 890{
 891    MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
 892    MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
 893    int i, j;
 894
 895    unsigned char *what = (*(b->base_src) + b->src);
 896    int what_stride = b->src_stride;
 897    int in_what_stride = d->pre_stride;
 898    int br, bc;
 899    int_mv this_mv;
 900    unsigned int bestsad = 0x7fffffff;
 901    unsigned int thissad;
 902    unsigned char *base_offset;
 903    unsigned char *this_offset;
 904    int k = -1;
 905    int all_in;
 906    int best_site = -1;
 907
 908    int_mv fcenter_mv;
 909    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
 910    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 911
 912    // adjust ref_mv to make sure it is within MV range
 913    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
 914    br = ref_mv->as_mv.row;
 915    bc = ref_mv->as_mv.col;
 916
 917    // Work out the start point for the search
 918    base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
 919    this_offset = base_offset + (br * (d->pre_stride)) + bc;
 920    this_mv.as_mv.row = br;
 921    this_mv.as_mv.col = bc;
 922    bestsad = vfp->sdf( what, what_stride, this_offset,
 923                        in_what_stride, 0x7fffffff)
 924            + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 925
 926    // hex search
 927    //j=0
 928    CHECK_BOUNDS(2)
 929
 930    if(all_in)
 931    {
 932        for (i = 0; i < 6; i++)
 933        {
 934            this_mv.as_mv.row = br + hex[i].row;
 935            this_mv.as_mv.col = bc + hex[i].col;
 936            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
 937            thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
 938            CHECK_BETTER
 939        }
 940    }else
 941    {
 942        for (i = 0; i < 6; i++)
 943        {
 944            this_mv.as_mv.row = br + hex[i].row;
 945            this_mv.as_mv.col = bc + hex[i].col;
 946            CHECK_POINT
 947            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
 948            thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
 949            CHECK_BETTER
 950        }
 951    }
 952
 953    if (best_site == -1)
 954        goto cal_neighbors;
 955    else
 956    {
 957        br += hex[best_site].row;
 958        bc += hex[best_site].col;
 959        k = best_site;
 960    }
 961
 962    for (j = 1; j < 127; j++)
 963    {
 964        best_site = -1;
 965        CHECK_BOUNDS(2)
 966
 967        if(all_in)
 968        {
 969            for (i = 0; i < 3; i++)
 970            {
 971                this_mv.as_mv.row = br + next_chkpts[k][i].row;
 972                this_mv.as_mv.col = bc + next_chkpts[k][i].col;
 973                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
 974                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
 975                CHECK_BETTER
 976            }
 977        }else
 978        {
 979            for (i = 0; i < 3; i++)
 980            {
 981                this_mv.as_mv.row = br + next_chkpts[k][i].row;
 982                this_mv.as_mv.col = bc + next_chkpts[k][i].col;
 983                CHECK_POINT
 984                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
 985                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
 986                CHECK_BETTER
 987            }
 988        }
 989
 990        if (best_site == -1)
 991            break;
 992        else
 993        {
 994            br += next_chkpts[k][best_site].row;
 995            bc += next_chkpts[k][best_site].col;
 996            k += 5 + best_site;
 997            if (k >= 12) k -= 12;
 998            else if (k >= 6) k -= 6;
 999        }
1000    }
1001
1002    // check 4 1-away neighbors
1003cal_neighbors:
1004    for (j = 0; j < 32; j++)
1005    {
1006        best_site = -1;
1007        CHECK_BOUNDS(1)
1008
1009        if(all_in)
1010        {
1011            for (i = 0; i < 4; i++)
1012            {
1013                this_mv.as_mv.row = br + neighbors[i].row;
1014                this_mv.as_mv.col = bc + neighbors[i].col;
1015                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1016                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
1017                CHECK_BETTER
1018            }
1019        }else
1020        {
1021            for (i = 0; i < 4; i++)
1022            {
1023                this_mv.as_mv.row = br + neighbors[i].row;
1024                this_mv.as_mv.col = bc + neighbors[i].col;
1025                CHECK_POINT
1026                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1027                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
1028                CHECK_BETTER
1029            }
1030        }
1031
1032        if (best_site == -1)
1033            break;
1034        else
1035        {
1036            br += neighbors[best_site].row;
1037            bc += neighbors[best_site].col;
1038        }
1039    }
1040
1041    best_mv->as_mv.row = br;
1042    best_mv->as_mv.col = bc;
1043
1044    return bestsad;
1045}
1046#undef CHECK_BOUNDS
1047#undef CHECK_POINT
1048#undef CHECK_BETTER
1049
1050int vp8_diamond_search_sad
1051(
1052    MACROBLOCK *x,
1053    BLOCK *b,
1054    BLOCKD *d,
1055    int_mv *ref_mv,
1056    int_mv *best_mv,
1057    int search_param,
1058    int sad_per_bit,
1059    int *num00,
1060    vp8_variance_fn_ptr_t *fn_ptr,
1061    int *mvcost[2],
1062    int_mv *center_mv
1063)
1064{
1065    int i, j, step;
1066
1067    unsigned char *what = (*(b->base_src) + b->src);
1068    int what_stride = b->src_stride;
1069    unsigned char *in_what;
1070    int in_what_stride = d->pre_stride;
1071    unsigned char *best_address;
1072
1073    int tot_steps;
1074    int_mv this_mv;
1075
1076    int bestsad = INT_MAX;
1077    int best_site = 0;
1078    int last_site = 0;
1079
1080    int ref_row;
1081    int ref_col;
1082    int this_row_offset;
1083    int this_col_offset;
1084    search_site *ss;
1085
1086    unsigned char *check_here;
1087    int thissad;
1088
1089    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1090    int_mv fcenter_mv;
1091    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1092    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1093
1094    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1095    ref_row = ref_mv->as_mv.row;
1096    ref_col = ref_mv->as_mv.col;
1097    *num00 = 0;
1098    best_mv->as_mv.row = ref_row;
1099    best_mv->as_mv.col = ref_col;
1100
1101    // Work out the start point for the search
1102    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1103    best_address = in_what;
1104
1105    // Check the starting position
1106    bestsad = fn_ptr->sdf(what, what_stride, in_what,
1107                          in_what_stride, 0x7fffffff)
1108              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1109
1110    // search_param determines the length of the initial step and hence the number of iterations
1111    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1112    ss = &x->ss[search_param * x->searches_per_step];
1113    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1114
1115    i = 1;
1116
1117    for (step = 0; step < tot_steps ; step++)
1118    {
1119        for (j = 0 ; j < x->searches_per_step ; j++)
1120        {
1121            // Trap illegal vectors
1122            this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1123            this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1124
1125            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1126            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1127
1128            {
1129                check_here = ss[i].offset + best_address;
1130                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1131
1132                if (thissad < bestsad)
1133                {
1134                    this_mv.as_mv.row = this_row_offset;
1135                    this_mv.as_mv.col = this_col_offset;
1136                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1137                                                mvsadcost, sad_per_bit);
1138
1139                    if (thissad < bestsad)
1140                    {
1141                        bestsad = thissad;
1142                        best_site = i;
1143                    }
1144                }
1145            }
1146
1147            i++;
1148        }
1149
1150        if (best_site != last_site)
1151        {
1152            best_mv->as_mv.row += ss[best_site].mv.row;
1153            best_mv->as_mv.col += ss[best_site].mv.col;
1154            best_address += ss[best_site].offset;
1155            last_site = best_site;
1156        }
1157        else if (best_address == in_what)
1158            (*num00)++;
1159    }
1160
1161    this_mv.as_mv.row = best_mv->as_mv.row << 3;
1162    this_mv.as_mv.col = best_mv->as_mv.col << 3;
1163
1164    if (bestsad == INT_MAX)
1165        return INT_MAX;
1166
1167    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1168        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1169}
1170
1171int vp8_diamond_search_sadx4
1172(
1173    MACROBLOCK *x,
1174    BLOCK *b,
1175    BLOCKD *d,
1176    int_mv *ref_mv,
1177    int_mv *best_mv,
1178    int search_param,
1179    int sad_per_bit,
1180    int *num00,
1181    vp8_variance_fn_ptr_t *fn_ptr,
1182    int *mvcost[2],
1183    int_mv *center_mv
1184)
1185{
1186    int i, j, step;
1187
1188    unsigned char *what = (*(b->base_src) + b->src);
1189    int what_stride = b->src_stride;
1190    unsigned char *in_what;
1191    int in_what_stride = d->pre_stride;
1192    unsigned char *best_address;
1193
1194    int tot_steps;
1195    int_mv this_mv;
1196
1197    int bestsad = INT_MAX;
1198    int best_site = 0;
1199    int last_site = 0;
1200
1201    int ref_row;
1202    int ref_col;
1203    int this_row_offset;
1204    int this_col_offset;
1205    search_site *ss;
1206
1207    unsigned char *check_here;
1208    unsigned int thissad;
1209
1210    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1211    int_mv fcenter_mv;
1212    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1213    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1214
1215    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1216    ref_row = ref_mv->as_mv.row;
1217    ref_col = ref_mv->as_mv.col;
1218    *num00 = 0;
1219    best_mv->as_mv.row = ref_row;
1220    best_mv->as_mv.col = ref_col;
1221
1222    // Work out the start point for the search
1223    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1224    best_address = in_what;
1225
1226    // Check the starting position
1227    bestsad = fn_ptr->sdf(what, what_stride,
1228                          in_what, in_what_stride, 0x7fffffff)
1229              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1230
1231    // search_param determines the length of the initial step and hence the number of iterations
1232    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1233    ss = &x->ss[search_param * x->searches_per_step];
1234    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1235
1236    i = 1;
1237
1238    for (step = 0; step < tot_steps ; step++)
1239    {
1240        int all_in = 1, t;
1241
1242        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1243        // checking 4 bounds for each points.
1244        all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1245        all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1246        all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1247        all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1248
1249        if (all_in)
1250        {
1251            unsigned int sad_array[4];
1252
1253            for (j = 0 ; j < x->searches_per_step ; j += 4)
1254            {
1255                unsigned char *block_offset[4];
1256
1257                for (t = 0; t < 4; t++)
1258                    block_offset[t] = ss[i+t].offset + best_address;
1259
1260                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1261
1262                for (t = 0; t < 4; t++, i++)
1263                {
1264                    if (sad_array[t] < bestsad)
1265                    {
1266                        this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1267                        this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1268                        sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1269                                                       mvsadcost, sad_per_bit);
1270
1271                        if (sad_array[t] < bestsad)
1272                        {
1273                            bestsad = sad_array[t];
1274                            best_site = i;
1275                        }
1276                    }
1277                }
1278            }
1279        }
1280        else
1281        {
1282            for (j = 0 ; j < x->searches_per_step ; j++)
1283            {
1284                // Trap illegal vectors
1285                this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1286                this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1287
1288                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1289                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1290                {
1291                    check_here = ss[i].offset + best_address;
1292                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1293
1294                    if (thissad < bestsad)
1295                    {
1296                        this_mv.as_mv.row = this_row_offset;
1297                        this_mv.as_mv.col = this_col_offset;
1298                        thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1299                                                   mvsadcost, sad_per_bit);
1300
1301                        if (thissad < bestsad)
1302                        {
1303                            bestsad = thissad;
1304                            best_site = i;
1305                        }
1306                    }
1307                }
1308                i++;
1309            }
1310        }
1311
1312        if (best_site != last_site)
1313        {
1314            best_mv->as_mv.row += ss[best_site].mv.row;
1315            best_mv->as_mv.col += ss[best_site].mv.col;
1316            best_address += ss[best_site].offset;
1317            last_site = best_site;
1318        }
1319        else if (best_address == in_what)
1320            (*num00)++;
1321    }
1322
1323    this_mv.as_mv.row = best_mv->as_mv.row << 3;
1324    this_mv.as_mv.col = best_mv->as_mv.col << 3;
1325
1326    if (bestsad == INT_MAX)
1327        return INT_MAX;
1328
1329    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1330        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1331}
1332
1333int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1334                        int sad_per_bit, int distance,
1335                        vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1336                        int_mv *center_mv)
1337{
1338    unsigned char *what = (*(b->base_src) + b->src);
1339    int what_stride = b->src_stride;
1340    unsigned char *in_what;
1341    int in_what_stride = d->pre_stride;
1342    int mv_stride = d->pre_stride;
1343    unsigned char *bestaddress;
1344    int_mv *best_mv = &d->bmi.mv;
1345    int_mv this_mv;
1346    int bestsad = INT_MAX;
1347    int r, c;
1348
1349    unsigned char *check_here;
1350    int thissad;
1351
1352    int ref_row = ref_mv->as_mv.row;
1353    int ref_col = ref_mv->as_mv.col;
1354
1355    int row_min = ref_row - distance;
1356    int row_max = ref_row + distance;
1357    int col_min = ref_col - distance;
1358    int col_max = ref_col + distance;
1359
1360    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1361    int_mv fcenter_mv;
1362    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1363    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1364
1365    // Work out the mid point for the search
1366    in_what = *(d->base_pre) + d->pre;
1367    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1368
1369    best_mv->as_mv.row = ref_row;
1370    best_mv->as_mv.col = ref_col;
1371
1372    // Baseline value at the centre
1373    bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1374                          in_what_stride, 0x7fffffff)
1375              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1376
1377    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1378    if (col_min < x->mv_col_min)
1379        col_min = x->mv_col_min;
1380
1381    if (col_max > x->mv_col_max)
1382        col_max = x->mv_col_max;
1383
1384    if (row_min < x->mv_row_min)
1385        row_min = x->mv_row_min;
1386
1387    if (row_max > x->mv_row_max)
1388        row_max = x->mv_row_max;
1389
1390    for (r = row_min; r < row_max ; r++)
1391    {
1392        this_mv.as_mv.row = r;
1393        check_here = r * mv_stride + in_what + col_min;
1394
1395        for (c = col_min; c < col_max; c++)
1396        {
1397            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1398
1399            this_mv.as_mv.col = c;
1400            thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1401                        mvsadcost, sad_per_bit);
1402
1403            if (thissad < bestsad)
1404            {
1405                bestsad = thissad;
1406                best_mv->as_mv.row = r;
1407                best_mv->as_mv.col = c;
1408                bestaddress = check_here;
1409            }
1410
1411            check_here++;
1412        }
1413    }
1414
1415    this_mv.as_mv.row = best_mv->as_mv.row << 3;
1416    this_mv.as_mv.col = best_mv->as_mv.col << 3;
1417
1418    if (bestsad < INT_MAX)
1419        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1420        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1421    else
1422        return INT_MAX;
1423}
1424
1425int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1426                          int sad_per_bit, int distance,
1427                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1428                          int_mv *center_mv)
1429{
1430    unsigned char *what = (*(b->base_src) + b->src);
1431    int what_stride = b->src_stride;
1432    unsigned char *in_what;
1433    int in_what_stride = d->pre_stride;
1434    int mv_stride = d->pre_stride;
1435    unsigned char *bestaddress;
1436    int_mv *best_mv = &d->bmi.mv;
1437    int_mv this_mv;
1438    int bestsad = INT_MAX;
1439    int r, c;
1440
1441    unsigned char *check_here;
1442    unsigned int thissad;
1443
1444    int ref_row = ref_mv->as_mv.row;
1445    int ref_col = ref_mv->as_mv.col;
1446
1447    int row_min = ref_row - distance;
1448    int row_max = ref_row + distance;
1449    int col_min = ref_col - distance;
1450    int col_max = ref_col + distance;
1451
1452    unsigned int sad_array[3];
1453
1454    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1455    int_mv fcenter_mv;
1456    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1457    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1458
1459    // Work out the mid point for the search
1460    in_what = *(d->base_pre) + d->pre;
1461    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1462
1463    best_mv->as_mv.row = ref_row;
1464    best_mv->as_mv.col = ref_col;
1465
1466    // Baseline value at the centre
1467    bestsad = fn_ptr->sdf(what, what_stride,
1468                          bestaddress, in_what_stride, 0x7fffffff)
1469              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1470
1471    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1472    if (col_min < x->mv_col_min)
1473        col_min = x->mv_col_min;
1474
1475    if (col_max > x->mv_col_max)
1476        col_max = x->mv_col_max;
1477
1478    if (row_min < x->mv_row_min)
1479        row_min = x->mv_row_min;
1480
1481    if (row_max > x->mv_row_max)
1482        row_max = x->mv_row_max;
1483
1484    for (r = row_min; r < row_max ; r++)
1485    {
1486        this_mv.as_mv.row = r;
1487        check_here = r * mv_stride + in_what + col_min;
1488        c = col_min;
1489
1490        while ((c + 2) < col_max)
1491        {
1492            int i;
1493
1494            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1495
1496            for (i = 0; i < 3; i++)
1497            {
1498                thissad = sad_array[i];
1499
1500                if (thissad < bestsad)
1501                {
1502                    this_mv.as_mv.col = c;
1503                    thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1504                                                mvsadcost, sad_per_bit);
1505
1506                    if (thissad < bestsad)
1507                    {
1508                        bestsad = thissad;
1509                        best_mv->as_mv.row = r;
1510                        best_mv->as_mv.col = c;
1511                        bestaddress = check_here;
1512                    }
1513                }
1514
1515                check_here++;
1516                c++;
1517            }
1518        }
1519
1520        while (c < col_max)
1521        {
1522            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1523
1524            if (thissad < bestsad)
1525            {
1526                this_mv.as_mv.col = c;
1527                thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1528                                            mvsadcost, sad_per_bit);
1529
1530                if (thissad < bestsad)
1531                {
1532                    bestsad = thissad;
1533                    best_mv->as_mv.row = r;
1534                    best_mv->as_mv.col = c;
1535                    bestaddress = check_here;
1536                }
1537            }
1538
1539            check_here ++;
1540            c ++;
1541        }
1542
1543    }
1544
1545    this_mv.as_mv.row = best_mv->as_mv.row << 3;
1546    this_mv.as_mv.col = best_mv->as_mv.col << 3;
1547
1548    if (bestsad < INT_MAX)
1549        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1550        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1551    else
1552        return INT_MAX;
1553}
1554
1555int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1556                          int sad_per_bit, int distance,
1557                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1558                          int_mv *center_mv)
1559{
1560    unsigned char *what = (*(b->base_src) + b->src);
1561    int what_stride = b->src_stride;
1562    unsigned char *in_what;
1563    int in_what_stride = d->pre_stride;
1564    int mv_stride = d->pre_stride;
1565    unsigned char *bestaddress;
1566    int_mv *best_mv = &d->bmi.mv;
1567    int_mv this_mv;
1568    int bestsad = INT_MAX;
1569    int r, c;
1570
1571    unsigned char *check_here;
1572    unsigned int thissad;
1573
1574    int ref_row = ref_mv->as_mv.row;
1575    int ref_col = ref_mv->as_mv.col;
1576
1577    int row_min = ref_row - distance;
1578    int row_max = ref_row + distance;
1579    int col_min = ref_col - distance;
1580    int col_max = ref_col + distance;
1581
1582    DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1583    unsigned int sad_array[3];
1584
1585    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1586    int_mv fcenter_mv;
1587    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1588    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1589
1590    // Work out the mid point for the search
1591    in_what = *(d->base_pre) + d->pre;
1592    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1593
1594    best_mv->as_mv.row = ref_row;
1595    best_mv->as_mv.col = ref_col;
1596
1597    // Baseline value at the centre
1598   

Large files files are truncated, but you can click here to view the full file