PageRenderTime 137ms CodeModel.GetById 26ms app.highlight 96ms RepoModel.GetById 2ms app.codeStats 0ms

/media/libvpx/vp8/encoder/rdopt.c

http://github.com/zpao/v8monkey
C | 2440 lines | 1814 code | 453 blank | 173 comment | 283 complexity | b514ae315fad58f484ed2f377e680084 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3 *
   4 *  Use of this source code is governed by a BSD-style license
   5 *  that can be found in the LICENSE file in the root of the source
   6 *  tree. An additional intellectual property rights grant can be found
   7 *  in the file PATENTS.  All contributing project authors may
   8 *  be found in the AUTHORS file in the root of the source tree.
   9 */
  10
  11
  12#include <stdio.h>
  13#include <math.h>
  14#include <limits.h>
  15#include <assert.h>
  16#include "vp8/common/pragmas.h"
  17
  18#include "tokenize.h"
  19#include "treewriter.h"
  20#include "onyx_int.h"
  21#include "modecosts.h"
  22#include "encodeintra.h"
  23#include "vp8/common/entropymode.h"
  24#include "vp8/common/reconinter.h"
  25#include "vp8/common/reconintra.h"
  26#include "vp8/common/reconintra4x4.h"
  27#include "vp8/common/findnearmv.h"
  28#include "encodemb.h"
  29#include "quantize.h"
  30#include "vp8/common/idct.h"
  31#include "vp8/common/g_common.h"
  32#include "variance.h"
  33#include "mcomp.h"
  34#include "rdopt.h"
  35#include "vpx_mem/vpx_mem.h"
  36#include "dct.h"
  37#include "vp8/common/systemdependent.h"
  38
  39#if CONFIG_RUNTIME_CPU_DETECT
  40#define IF_RTCD(x)  (x)
  41#else
  42#define IF_RTCD(x)  NULL
  43#endif
  44
  45
  46extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
  47extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
  48
  49#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
  50
  51static const int auto_speed_thresh[17] =
  52{
  53    1000,
  54    200,
  55    150,
  56    130,
  57    150,
  58    125,
  59    120,
  60    115,
  61    115,
  62    115,
  63    115,
  64    115,
  65    115,
  66    115,
  67    115,
  68    115,
  69    105
  70};
  71
  72const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
  73{
  74    ZEROMV,
  75    DC_PRED,
  76
  77    NEARESTMV,
  78    NEARMV,
  79
  80    ZEROMV,
  81    NEARESTMV,
  82
  83    ZEROMV,
  84    NEARESTMV,
  85
  86    NEARMV,
  87    NEARMV,
  88
  89    V_PRED,
  90    H_PRED,
  91    TM_PRED,
  92
  93    NEWMV,
  94    NEWMV,
  95    NEWMV,
  96
  97    SPLITMV,
  98    SPLITMV,
  99    SPLITMV,
 100
 101    B_PRED,
 102};
 103
 104const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] =
 105{
 106    LAST_FRAME,
 107    INTRA_FRAME,
 108
 109    LAST_FRAME,
 110    LAST_FRAME,
 111
 112    GOLDEN_FRAME,
 113    GOLDEN_FRAME,
 114
 115    ALTREF_FRAME,
 116    ALTREF_FRAME,
 117
 118    GOLDEN_FRAME,
 119    ALTREF_FRAME,
 120
 121    INTRA_FRAME,
 122    INTRA_FRAME,
 123    INTRA_FRAME,
 124
 125    LAST_FRAME,
 126    GOLDEN_FRAME,
 127    ALTREF_FRAME,
 128
 129    LAST_FRAME,
 130    GOLDEN_FRAME,
 131    ALTREF_FRAME,
 132
 133    INTRA_FRAME,
 134};
 135
 136static void fill_token_costs(
 137    unsigned int c      [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
 138    const vp8_prob p    [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
 139)
 140{
 141    int i, j, k;
 142
 143
 144    for (i = 0; i < BLOCK_TYPES; i++)
 145        for (j = 0; j < COEF_BANDS; j++)
 146            for (k = 0; k < PREV_COEF_CONTEXTS; k++)
 147
 148                vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
 149
 150}
 151
 152static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
 153                                      0,   0,   0,   0,   0,   0,   0,   0,
 154                                      0,   0,   0,   0,   0,   0,   0,   0,
 155                                      0,   0,   0,   0,   0,   0,   0,   0,
 156                                 };
 157
 158/* values are now correlated to quantizer */
 159static int sad_per_bit16lut[QINDEX_RANGE] =
 160{
 161    2,  2,  2,  2,  2,  2,  2,  2,
 162    2,  2,  2,  2,  2,  2,  2,  2,
 163    3,  3,  3,  3,  3,  3,  3,  3,
 164    3,  3,  3,  3,  3,  3,  4,  4,
 165    4,  4,  4,  4,  4,  4,  4,  4,
 166    4,  4,  5,  5,  5,  5,  5,  5,
 167    5,  5,  5,  5,  5,  5,  6,  6,
 168    6,  6,  6,  6,  6,  6,  6,  6,
 169    6,  6,  7,  7,  7,  7,  7,  7,
 170    7,  7,  7,  7,  7,  7,  8,  8,
 171    8,  8,  8,  8,  8,  8,  8,  8,
 172    8,  8,  9,  9,  9,  9,  9,  9,
 173    9,  9,  9,  9,  9,  9,  10, 10,
 174    10, 10, 10, 10, 10, 10, 11, 11,
 175    11, 11, 11, 11, 12, 12, 12, 12,
 176    12, 12, 13, 13, 13, 13, 14, 14
 177};
 178static int sad_per_bit4lut[QINDEX_RANGE] =
 179{
 180    2,  2,  2,  2,  2,  2,  3,  3,
 181    3,  3,  3,  3,  3,  3,  3,  3,
 182    3,  3,  3,  3,  4,  4,  4,  4,
 183    4,  4,  4,  4,  4,  4,  5,  5,
 184    5,  5,  5,  5,  6,  6,  6,  6,
 185    6,  6,  6,  6,  6,  6,  6,  6,
 186    7,  7,  7,  7,  7,  7,  7,  7,
 187    7,  7,  7,  7,  7,  8,  8,  8,
 188    8,  8,  9,  9,  9,  9,  9,  9,
 189    10, 10, 10, 10, 10, 10, 10, 10,
 190    11, 11, 11, 11, 11, 11, 11, 11,
 191    12, 12, 12, 12, 12, 12, 12, 12,
 192    13, 13, 13, 13, 13, 13, 13, 14,
 193    14, 14, 14, 14, 15, 15, 15, 15,
 194    16, 16, 16, 16, 17, 17, 17, 18,
 195    18, 18, 19, 19, 19, 20, 20, 20,
 196};
 197
 198void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
 199{
 200    cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
 201    cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
 202}
 203
 204void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
 205{
 206    int q;
 207    int i;
 208    double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
 209    double rdconst = 2.70;
 210
 211    vp8_clear_system_state();  //__asm emms;
 212
 213    // Further tests required to see if optimum is different
 214    // for key frames, golden frames and arf frames.
 215    // if (cpi->common.refresh_golden_frame ||
 216    //     cpi->common.refresh_alt_ref_frame)
 217    cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
 218
 219    // Extend rate multiplier along side quantizer zbin increases
 220    if (cpi->zbin_over_quant  > 0)
 221    {
 222        double oq_factor;
 223        double modq;
 224
 225        // Experimental code using the same basic equation as used for Q above
 226        // The units of cpi->zbin_over_quant are 1/128 of Q bin size
 227        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
 228        modq = (int)((double)capped_q * oq_factor);
 229        cpi->RDMULT = (int)(rdconst * (modq * modq));
 230    }
 231
 232    if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
 233    {
 234        if (cpi->twopass.next_iiratio > 31)
 235            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
 236        else
 237            cpi->RDMULT +=
 238                (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
 239    }
 240
 241    cpi->mb.errorperbit = (cpi->RDMULT / 110);
 242    cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
 243
 244    vp8_set_speed_features(cpi);
 245
 246    q = (int)pow(Qvalue, 1.25);
 247
 248    if (q < 8)
 249        q = 8;
 250
 251    if (cpi->RDMULT > 1000)
 252    {
 253        cpi->RDDIV = 1;
 254        cpi->RDMULT /= 100;
 255
 256        for (i = 0; i < MAX_MODES; i++)
 257        {
 258            if (cpi->sf.thresh_mult[i] < INT_MAX)
 259            {
 260                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
 261            }
 262            else
 263            {
 264                cpi->rd_threshes[i] = INT_MAX;
 265            }
 266
 267            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
 268        }
 269    }
 270    else
 271    {
 272        cpi->RDDIV = 100;
 273
 274        for (i = 0; i < MAX_MODES; i++)
 275        {
 276            if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
 277            {
 278                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
 279            }
 280            else
 281            {
 282                cpi->rd_threshes[i] = INT_MAX;
 283            }
 284
 285            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
 286        }
 287    }
 288
 289    fill_token_costs(
 290        cpi->mb.token_costs,
 291        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs
 292    );
 293
 294    vp8_init_mode_costs(cpi);
 295
 296}
 297
 298void vp8_auto_select_speed(VP8_COMP *cpi)
 299{
 300    int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);
 301
 302    milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
 303
 304#if 0
 305
 306    if (0)
 307    {
 308        FILE *f;
 309
 310        f = fopen("speed.stt", "a");
 311        fprintf(f, " %8ld %10ld %10ld %10ld\n",
 312                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
 313        fclose(f);
 314    }
 315
 316#endif
 317
 318    /*
 319    // this is done during parameter valid check
 320    if( cpi->oxcf.cpu_used > 16)
 321        cpi->oxcf.cpu_used = 16;
 322    if( cpi->oxcf.cpu_used < -16)
 323        cpi->oxcf.cpu_used = -16;
 324    */
 325
 326    if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
 327    {
 328        if (cpi->avg_pick_mode_time == 0)
 329        {
 330            cpi->Speed = 4;
 331        }
 332        else
 333        {
 334            if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
 335            {
 336                cpi->Speed          += 2;
 337                cpi->avg_pick_mode_time = 0;
 338                cpi->avg_encode_time = 0;
 339
 340                if (cpi->Speed > 16)
 341                {
 342                    cpi->Speed = 16;
 343                }
 344            }
 345
 346            if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
 347            {
 348                cpi->Speed          -= 1;
 349                cpi->avg_pick_mode_time = 0;
 350                cpi->avg_encode_time = 0;
 351
 352                // In real-time mode, cpi->speed is in [4, 16].
 353                if (cpi->Speed < 4)        //if ( cpi->Speed < 0 )
 354                {
 355                    cpi->Speed = 4;        //cpi->Speed = 0;
 356                }
 357            }
 358        }
 359    }
 360    else
 361    {
 362        cpi->Speed += 4;
 363
 364        if (cpi->Speed > 16)
 365            cpi->Speed = 16;
 366
 367
 368        cpi->avg_pick_mode_time = 0;
 369        cpi->avg_encode_time = 0;
 370    }
 371}
 372
 373int vp8_block_error_c(short *coeff, short *dqcoeff)
 374{
 375    int i;
 376    int error = 0;
 377
 378    for (i = 0; i < 16; i++)
 379    {
 380        int this_diff = coeff[i] - dqcoeff[i];
 381        error += this_diff * this_diff;
 382    }
 383
 384    return error;
 385}
 386
 387int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
 388{
 389    BLOCK  *be;
 390    BLOCKD *bd;
 391    int i, j;
 392    int berror, error = 0;
 393
 394    for (i = 0; i < 16; i++)
 395    {
 396        be = &mb->block[i];
 397        bd = &mb->e_mbd.block[i];
 398
 399        berror = 0;
 400
 401        for (j = dc; j < 16; j++)
 402        {
 403            int this_diff = be->coeff[j] - bd->dqcoeff[j];
 404            berror += this_diff * this_diff;
 405        }
 406
 407        error += berror;
 408    }
 409
 410    return error;
 411}
 412
 413int vp8_mbuverror_c(MACROBLOCK *mb)
 414{
 415
 416    BLOCK  *be;
 417    BLOCKD *bd;
 418
 419
 420    int i;
 421    int error = 0;
 422
 423    for (i = 16; i < 24; i++)
 424    {
 425        be = &mb->block[i];
 426        bd = &mb->e_mbd.block[i];
 427
 428        error += vp8_block_error_c(be->coeff, bd->dqcoeff);
 429    }
 430
 431    return error;
 432}
 433
 434int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
 435{
 436    unsigned char *uptr, *vptr;
 437    unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
 438    unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
 439    int uv_stride = x->block[16].src_stride;
 440
 441    unsigned int sse1 = 0;
 442    unsigned int sse2 = 0;
 443    int mv_row;
 444    int mv_col;
 445    int offset;
 446    int pre_stride = x->e_mbd.block[16].pre_stride;
 447
 448    vp8_build_uvmvs(&x->e_mbd, 0);
 449    mv_row = x->e_mbd.block[16].bmi.mv.as_mv.row;
 450    mv_col = x->e_mbd.block[16].bmi.mv.as_mv.col;
 451
 452    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
 453    uptr = x->e_mbd.pre.u_buffer + offset;
 454    vptr = x->e_mbd.pre.v_buffer + offset;
 455
 456    if ((mv_row | mv_col) & 7)
 457    {
 458        VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
 459            mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
 460        VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
 461            mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
 462        sse2 += sse1;
 463    }
 464    else
 465    {
 466        VARIANCE_INVOKE(rtcd, var8x8)(uptr, pre_stride,
 467            upred_ptr, uv_stride, &sse2);
 468        VARIANCE_INVOKE(rtcd, var8x8)(vptr, pre_stride,
 469            vpred_ptr, uv_stride, &sse1);
 470        sse2 += sse1;
 471    }
 472    return sse2;
 473
 474}
 475
 476static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
 477{
 478    int c = !type;              /* start at coef 0, unless Y with Y2 */
 479    int eob = b->eob;
 480    int pt ;    /* surrounding block/prev coef predictor */
 481    int cost = 0;
 482    short *qcoeff_ptr = b->qcoeff;
 483
 484    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 485
 486# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
 487
 488    for (; c < eob; c++)
 489    {
 490        int v = QC(c);
 491        int t = vp8_dct_value_tokens_ptr[v].Token;
 492        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
 493        cost += vp8_dct_value_cost_ptr[v];
 494        pt = vp8_prev_token_class[t];
 495    }
 496
 497# undef QC
 498
 499    if (c < 16)
 500        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
 501
 502    pt = (c != !type); // is eob first coefficient;
 503    *a = *l = pt;
 504
 505    return cost;
 506}
 507
 508static int vp8_rdcost_mby(MACROBLOCK *mb)
 509{
 510    int cost = 0;
 511    int b;
 512    MACROBLOCKD *x = &mb->e_mbd;
 513    ENTROPY_CONTEXT_PLANES t_above, t_left;
 514    ENTROPY_CONTEXT *ta;
 515    ENTROPY_CONTEXT *tl;
 516
 517    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
 518    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 519
 520    ta = (ENTROPY_CONTEXT *)&t_above;
 521    tl = (ENTROPY_CONTEXT *)&t_left;
 522
 523    for (b = 0; b < 16; b++)
 524        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
 525                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 526
 527    cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
 528                ta + vp8_block2above[24], tl + vp8_block2left[24]);
 529
 530    return cost;
 531}
 532
 533static void macro_block_yrd( MACROBLOCK *mb,
 534                             int *Rate,
 535                             int *Distortion,
 536                             const vp8_encodemb_rtcd_vtable_t *rtcd)
 537{
 538    int b;
 539    MACROBLOCKD *const x = &mb->e_mbd;
 540    BLOCK   *const mb_y2 = mb->block + 24;
 541    BLOCKD *const x_y2  = x->block + 24;
 542    short *Y2DCPtr = mb_y2->src_diff;
 543    BLOCK *beptr;
 544    int d;
 545
 546    ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, *(mb->block[0].base_src),
 547                                   mb->e_mbd.predictor, mb->block[0].src_stride );
 548
 549    // Fdct and building the 2nd order block
 550    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
 551    {
 552        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
 553        *Y2DCPtr++ = beptr->coeff[0];
 554        *Y2DCPtr++ = beptr->coeff[16];
 555    }
 556
 557    // 2nd order fdct
 558    mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
 559
 560    // Quantization
 561    for (b = 0; b < 16; b++)
 562    {
 563        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
 564    }
 565
 566    // DC predication and Quantization of 2nd Order block
 567    mb->quantize_b(mb_y2, x_y2);
 568
 569    // Distortion
 570    d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
 571    d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
 572
 573    *Distortion = (d >> 4);
 574
 575    // rate
 576    *Rate = vp8_rdcost_mby(mb);
 577}
 578
 579static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
 580{
 581    const unsigned int *p = (const unsigned int *)predictor;
 582    unsigned int *d = (unsigned int *)dst;
 583    d[0] = p[0];
 584    d[4] = p[4];
 585    d[8] = p[8];
 586    d[12] = p[12];
 587}
 588static int rd_pick_intra4x4block(
 589    VP8_COMP *cpi,
 590    MACROBLOCK *x,
 591    BLOCK *be,
 592    BLOCKD *b,
 593    B_PREDICTION_MODE *best_mode,
 594    unsigned int *bmode_costs,
 595    ENTROPY_CONTEXT *a,
 596    ENTROPY_CONTEXT *l,
 597
 598    int *bestrate,
 599    int *bestratey,
 600    int *bestdistortion)
 601{
 602    B_PREDICTION_MODE mode;
 603    int best_rd = INT_MAX;
 604    int rate = 0;
 605    int distortion;
 606
 607    ENTROPY_CONTEXT ta = *a, tempa = *a;
 608    ENTROPY_CONTEXT tl = *l, templ = *l;
 609    /*
 610     * The predictor buffer is a 2d buffer with a stride of 16.  Create
 611     * a temp buffer that meets the stride requirements, but we are only
 612     * interested in the left 4x4 block
 613     * */
 614    DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
 615    DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
 616
 617    for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
 618    {
 619        int this_rd;
 620        int ratey;
 621
 622        rate = bmode_costs[mode];
 623
 624        RECON_INVOKE(&cpi->rtcd.common->recon, intra4x4_predict)
 625                     (b, mode, b->predictor);
 626        ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
 627        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 628        x->quantize_b(be, b);
 629
 630        tempa = ta;
 631        templ = tl;
 632
 633        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
 634        rate += ratey;
 635        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(be->coeff, b->dqcoeff) >> 2;
 636
 637        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 638
 639        if (this_rd < best_rd)
 640        {
 641            *bestrate = rate;
 642            *bestratey = ratey;
 643            *bestdistortion = distortion;
 644            best_rd = this_rd;
 645            *best_mode = mode;
 646            *a = tempa;
 647            *l = templ;
 648            copy_predictor(best_predictor, b->predictor);
 649            vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
 650        }
 651    }
 652    b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
 653
 654    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
 655    RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 656
 657    return best_rd;
 658}
 659
 660static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
 661                                     int *rate_y, int *Distortion, int best_rd)
 662{
 663    MACROBLOCKD *const xd = &mb->e_mbd;
 664    int i;
 665    int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
 666    int distortion = 0;
 667    int tot_rate_y = 0;
 668    int64_t total_rd = 0;
 669    ENTROPY_CONTEXT_PLANES t_above, t_left;
 670    ENTROPY_CONTEXT *ta;
 671    ENTROPY_CONTEXT *tl;
 672    unsigned int *bmode_costs;
 673
 674    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
 675    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 676
 677    ta = (ENTROPY_CONTEXT *)&t_above;
 678    tl = (ENTROPY_CONTEXT *)&t_left;
 679
 680    vp8_intra_prediction_down_copy(xd);
 681
 682    bmode_costs = mb->inter_bmode_costs;
 683
 684    for (i = 0; i < 16; i++)
 685    {
 686        MODE_INFO *const mic = xd->mode_info_context;
 687        const int mis = xd->mode_info_stride;
 688        B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
 689        int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
 690
 691        if (mb->e_mbd.frame_type == KEY_FRAME)
 692        {
 693            const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
 694            const B_PREDICTION_MODE L = left_block_mode(mic, i);
 695
 696            bmode_costs  = mb->bmode_costs[A][L];
 697        }
 698
 699        total_rd += rd_pick_intra4x4block(
 700            cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
 701            ta + vp8_block2above[i],
 702            tl + vp8_block2left[i], &r, &ry, &d);
 703
 704        cost += r;
 705        distortion += d;
 706        tot_rate_y += ry;
 707
 708        mic->bmi[i].as_mode = best_mode;
 709
 710        if(total_rd >= (int64_t)best_rd)
 711            break;
 712    }
 713
 714    if(total_rd >= (int64_t)best_rd)
 715        return INT_MAX;
 716
 717    *Rate = cost;
 718    *rate_y += tot_rate_y;
 719    *Distortion = distortion;
 720
 721    return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
 722}
 723
 724
 725static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
 726                                      MACROBLOCK *x,
 727                                      int *Rate,
 728                                      int *rate_y,
 729                                      int *Distortion)
 730{
 731    MB_PREDICTION_MODE mode;
 732    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
 733    int rate, ratey;
 734    int distortion;
 735    int best_rd = INT_MAX;
 736    int this_rd;
 737
 738    //Y Search for 16x16 intra prediction mode
 739    for (mode = DC_PRED; mode <= TM_PRED; mode++)
 740    {
 741        x->e_mbd.mode_info_context->mbmi.mode = mode;
 742
 743        RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
 744            (&x->e_mbd);
 745
 746        macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
 747        rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
 748                                     [x->e_mbd.mode_info_context->mbmi.mode];
 749
 750        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 751
 752        if (this_rd < best_rd)
 753        {
 754            mode_selected = mode;
 755            best_rd = this_rd;
 756            *Rate = rate;
 757            *rate_y = ratey;
 758            *Distortion = distortion;
 759        }
 760    }
 761
 762    x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
 763    return best_rd;
 764}
 765
 766static int rd_cost_mbuv(MACROBLOCK *mb)
 767{
 768    int b;
 769    int cost = 0;
 770    MACROBLOCKD *x = &mb->e_mbd;
 771    ENTROPY_CONTEXT_PLANES t_above, t_left;
 772    ENTROPY_CONTEXT *ta;
 773    ENTROPY_CONTEXT *tl;
 774
 775    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
 776    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 777
 778    ta = (ENTROPY_CONTEXT *)&t_above;
 779    tl = (ENTROPY_CONTEXT *)&t_left;
 780
 781    for (b = 16; b < 24; b++)
 782        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
 783                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 784
 785    return cost;
 786}
 787
 788
 789static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
 790{
 791    vp8_build_uvmvs(&x->e_mbd, fullpixel);
 792    vp8_encode_inter16x16uvrd(IF_RTCD(&cpi->rtcd), x);
 793
 794
 795    *rate       = rd_cost_mbuv(x);
 796    *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
 797
 798    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 799}
 800
 801static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
 802{
 803    MB_PREDICTION_MODE mode;
 804    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
 805    int best_rd = INT_MAX;
 806    int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
 807    int rate_to;
 808
 809    for (mode = DC_PRED; mode <= TM_PRED; mode++)
 810    {
 811        int rate;
 812        int distortion;
 813        int this_rd;
 814
 815        x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
 816        RECON_INVOKE(&cpi->rtcd.common->recon, build_intra_predictors_mbuv)
 817                     (&x->e_mbd);
 818        ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
 819                      x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
 820                      x->src.uv_stride);
 821        vp8_transform_mbuv(x);
 822        vp8_quantize_mbuv(x);
 823
 824        rate_to = rd_cost_mbuv(x);
 825        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
 826
 827        distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
 828
 829        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 830
 831        if (this_rd < best_rd)
 832        {
 833            best_rd = this_rd;
 834            d = distortion;
 835            r = rate;
 836            *rate_tokenonly = rate_to;
 837            mode_selected = mode;
 838        }
 839    }
 840
 841    *rate = r;
 842    *distortion = d;
 843
 844    x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
 845}
 846
 847int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
 848{
 849    vp8_prob p [VP8_MVREFS-1];
 850    assert(NEARESTMV <= m  &&  m <= SPLITMV);
 851    vp8_mv_ref_probs(p, near_mv_ref_ct);
 852    return vp8_cost_token(vp8_mv_ref_tree, p,
 853                          vp8_mv_ref_encoding_array - NEARESTMV + m);
 854}
 855
 856void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
 857{
 858    x->e_mbd.mode_info_context->mbmi.mode = mb;
 859    x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
 860}
 861
 862static int labels2mode(
 863    MACROBLOCK *x,
 864    int const *labelings, int which_label,
 865    B_PREDICTION_MODE this_mode,
 866    int_mv *this_mv, int_mv *best_ref_mv,
 867    int *mvcost[2]
 868)
 869{
 870    MACROBLOCKD *const xd = & x->e_mbd;
 871    MODE_INFO *const mic = xd->mode_info_context;
 872    const int mis = xd->mode_info_stride;
 873
 874    int cost = 0;
 875    int thismvcost = 0;
 876
 877    /* We have to be careful retrieving previously-encoded motion vectors.
 878       Ones from this macroblock have to be pulled from the BLOCKD array
 879       as they have not yet made it to the bmi array in our MB_MODE_INFO. */
 880
 881    int i = 0;
 882
 883    do
 884    {
 885        BLOCKD *const d = xd->block + i;
 886        const int row = i >> 2,  col = i & 3;
 887
 888        B_PREDICTION_MODE m;
 889
 890        if (labelings[i] != which_label)
 891            continue;
 892
 893        if (col  &&  labelings[i] == labelings[i-1])
 894            m = LEFT4X4;
 895        else if (row  &&  labelings[i] == labelings[i-4])
 896            m = ABOVE4X4;
 897        else
 898        {
 899            // the only time we should do costing for new motion vector or mode
 900            // is when we are on a new label  (jbb May 08, 2007)
 901            switch (m = this_mode)
 902            {
 903            case NEW4X4 :
 904                thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
 905                break;
 906            case LEFT4X4:
 907                this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
 908                break;
 909            case ABOVE4X4:
 910                this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
 911                break;
 912            case ZERO4X4:
 913                this_mv->as_int = 0;
 914                break;
 915            default:
 916                break;
 917            }
 918
 919            if (m == ABOVE4X4)  // replace above with left if same
 920            {
 921                int_mv left_mv;
 922
 923                left_mv.as_int = col ? d[-1].bmi.mv.as_int :
 924                                        left_block_mv(mic, i);
 925
 926                if (left_mv.as_int == this_mv->as_int)
 927                    m = LEFT4X4;
 928            }
 929
 930            cost = x->inter_bmode_costs[ m];
 931        }
 932
 933        d->bmi.mv.as_int = this_mv->as_int;
 934
 935        x->partition_info->bmi[i].mode = m;
 936        x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
 937
 938    }
 939    while (++i < 16);
 940
 941    cost += thismvcost ;
 942    return cost;
 943}
 944
 945static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
 946                              int which_label, ENTROPY_CONTEXT *ta,
 947                              ENTROPY_CONTEXT *tl)
 948{
 949    int cost = 0;
 950    int b;
 951    MACROBLOCKD *x = &mb->e_mbd;
 952
 953    for (b = 0; b < 16; b++)
 954        if (labels[ b] == which_label)
 955            cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
 956                                ta + vp8_block2above[b],
 957                                tl + vp8_block2left[b]);
 958
 959    return cost;
 960
 961}
 962static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label, const vp8_encodemb_rtcd_vtable_t *rtcd)
 963{
 964    int i;
 965    unsigned int distortion = 0;
 966
 967    for (i = 0; i < 16; i++)
 968    {
 969        if (labels[i] == which_label)
 970        {
 971            BLOCKD *bd = &x->e_mbd.block[i];
 972            BLOCK *be = &x->block[i];
 973
 974
 975            vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
 976            ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
 977            x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 978
 979            // set to 0 no way to account for 2nd order DC so discount
 980            //be->coeff[0] = 0;
 981            x->quantize_b(be, bd);
 982
 983            distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
 984        }
 985    }
 986
 987    return distortion;
 988}
 989
 990
 991static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
 992
 993
 994typedef struct
 995{
 996  int_mv *ref_mv;
 997  int_mv mvp;
 998
 999  int segment_rd;
1000  int segment_num;
1001  int r;
1002  int d;
1003  int segment_yrate;
1004  B_PREDICTION_MODE modes[16];
1005  int_mv mvs[16];
1006  unsigned char eobs[16];
1007
1008  int mvthresh;
1009  int *mdcounts;
1010
1011  int_mv sv_mvp[4];     // save 4 mvp from 8x8
1012  int sv_istep[2];  // save 2 initial step_param for 16x8/8x16
1013
1014} BEST_SEG_INFO;
1015
1016
1017static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
1018                             BEST_SEG_INFO *bsi, unsigned int segmentation)
1019{
1020    int i;
1021    int const *labels;
1022    int br = 0;
1023    int bd = 0;
1024    B_PREDICTION_MODE this_mode;
1025
1026
1027    int label_count;
1028    int this_segment_rd = 0;
1029    int label_mv_thresh;
1030    int rate = 0;
1031    int sbr = 0;
1032    int sbd = 0;
1033    int segmentyrate = 0;
1034
1035    vp8_variance_fn_ptr_t *v_fn_ptr;
1036
1037    ENTROPY_CONTEXT_PLANES t_above, t_left;
1038    ENTROPY_CONTEXT *ta;
1039    ENTROPY_CONTEXT *tl;
1040    ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
1041    ENTROPY_CONTEXT *ta_b;
1042    ENTROPY_CONTEXT *tl_b;
1043
1044    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
1045    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
1046
1047    ta = (ENTROPY_CONTEXT *)&t_above;
1048    tl = (ENTROPY_CONTEXT *)&t_left;
1049    ta_b = (ENTROPY_CONTEXT *)&t_above_b;
1050    tl_b = (ENTROPY_CONTEXT *)&t_left_b;
1051
1052    br = 0;
1053    bd = 0;
1054
1055    v_fn_ptr = &cpi->fn_ptr[segmentation];
1056    labels = vp8_mbsplits[segmentation];
1057    label_count = vp8_mbsplit_count[segmentation];
1058
1059    // 64 makes this threshold really big effectively
1060    // making it so that we very rarely check mvs on
1061    // segments.   setting this to 1 would make mv thresh
1062    // roughly equal to what it is for macroblocks
1063    label_mv_thresh = 1 * bsi->mvthresh / label_count ;
1064
1065    // Segmentation method overheads
1066    rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
1067    rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
1068    this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1069    br += rate;
1070
1071    for (i = 0; i < label_count; i++)
1072    {
1073        int_mv mode_mv[B_MODE_COUNT];
1074        int best_label_rd = INT_MAX;
1075        B_PREDICTION_MODE mode_selected = ZERO4X4;
1076        int bestlabelyrate = 0;
1077
1078        // search for the best motion vector on this segment
1079        for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
1080        {
1081            int this_rd;
1082            int distortion;
1083            int labelyrate;
1084            ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1085            ENTROPY_CONTEXT *ta_s;
1086            ENTROPY_CONTEXT *tl_s;
1087
1088            vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
1089            vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
1090
1091            ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1092            tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1093
1094            if (this_mode == NEW4X4)
1095            {
1096                int sseshift;
1097                int num00;
1098                int step_param = 0;
1099                int further_steps;
1100                int n;
1101                int thissme;
1102                int bestsme = INT_MAX;
1103                int_mv  temp_mv;
1104                BLOCK *c;
1105                BLOCKD *e;
1106
1107                // Is the best so far sufficiently good that we cant justify doing and new motion search.
1108                if (best_label_rd < label_mv_thresh)
1109                    break;
1110
1111                if(cpi->compressor_speed)
1112                {
1113                    if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
1114                    {
1115                        bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
1116                        if (i==1 && segmentation == BLOCK_16X8)
1117                          bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
1118
1119                        step_param = bsi->sv_istep[i];
1120                    }
1121
1122                    // use previous block's result as next block's MV predictor.
1123                    if (segmentation == BLOCK_4X4 && i>0)
1124                    {
1125                        bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
1126                        if (i==4 || i==8 || i==12)
1127                            bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
1128                        step_param = 2;
1129                    }
1130                }
1131
1132                further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1133
1134                {
1135                    int sadpb = x->sadperbit4;
1136                    int_mv mvp_full;
1137
1138                    mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
1139                    mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
1140
1141                    // find first label
1142                    n = vp8_mbsplit_offset[segmentation][i];
1143
1144                    c = &x->block[n];
1145                    e = &x->e_mbd.block[n];
1146
1147                    {
1148                        bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
1149                                                &mode_mv[NEW4X4], step_param,
1150                                                sadpb, &num00, v_fn_ptr,
1151                                                x->mvcost, bsi->ref_mv);
1152
1153                        n = num00;
1154                        num00 = 0;
1155
1156                        while (n < further_steps)
1157                        {
1158                            n++;
1159
1160                            if (num00)
1161                                num00--;
1162                            else
1163                            {
1164                                thissme = cpi->diamond_search_sad(x, c, e,
1165                                                    &mvp_full, &temp_mv,
1166                                                    step_param + n, sadpb,
1167                                                    &num00, v_fn_ptr,
1168                                                    x->mvcost, bsi->ref_mv);
1169
1170                                if (thissme < bestsme)
1171                                {
1172                                    bestsme = thissme;
1173                                    mode_mv[NEW4X4].as_int = temp_mv.as_int;
1174                                }
1175                            }
1176                        }
1177                    }
1178
1179                    sseshift = segmentation_to_sseshift[segmentation];
1180
1181                    // Should we do a full search (best quality only)
1182                    if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
1183                    {
1184                        /* Check if mvp_full is within the range. */
1185                        vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1186
1187                        thissme = cpi->full_search_sad(x, c, e, &mvp_full,
1188                                                       sadpb, 16, v_fn_ptr,
1189                                                       x->mvcost, bsi->ref_mv);
1190
1191                        if (thissme < bestsme)
1192                        {
1193                            bestsme = thissme;
1194                            mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
1195                        }
1196                        else
1197                        {
1198                            // The full search result is actually worse so re-instate the previous best vector
1199                            e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
1200                        }
1201                    }
1202                }
1203
1204                if (bestsme < INT_MAX)
1205                {
1206                    int distortion;
1207                    unsigned int sse;
1208                    cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
1209                        bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
1210                        &distortion, &sse);
1211
1212                }
1213            } /* NEW4X4 */
1214
1215            rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1216                               bsi->ref_mv, x->mvcost);
1217
1218            // Trap vectors that reach beyond the UMV borders
1219            if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1220                ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
1221            {
1222                continue;
1223            }
1224
1225            distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
1226
1227            labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1228            rate += labelyrate;
1229
1230            this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1231
1232            if (this_rd < best_label_rd)
1233            {
1234                sbr = rate;
1235                sbd = distortion;
1236                bestlabelyrate = labelyrate;
1237                mode_selected = this_mode;
1238                best_label_rd = this_rd;
1239
1240                vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
1241                vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
1242
1243            }
1244        } /*for each 4x4 mode*/
1245
1246        vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
1247        vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
1248
1249        labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1250                    bsi->ref_mv, x->mvcost);
1251
1252        br += sbr;
1253        bd += sbd;
1254        segmentyrate += bestlabelyrate;
1255        this_segment_rd += best_label_rd;
1256
1257        if (this_segment_rd >= bsi->segment_rd)
1258            break;
1259
1260    } /* for each label */
1261
1262    if (this_segment_rd < bsi->segment_rd)
1263    {
1264        bsi->r = br;
1265        bsi->d = bd;
1266        bsi->segment_yrate = segmentyrate;
1267        bsi->segment_rd = this_segment_rd;
1268        bsi->segment_num = segmentation;
1269
1270        // store everything needed to come back to this!!
1271        for (i = 0; i < 16; i++)
1272        {
1273            BLOCKD *bd = &x->e_mbd.block[i];
1274
1275            bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1276            bsi->modes[i] = x->partition_info->bmi[i].mode;
1277            bsi->eobs[i] = bd->eob;
1278        }
1279    }
1280}
1281
1282static __inline
1283void vp8_cal_step_param(int sr, int *sp)
1284{
1285    int step = 0;
1286
1287    if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
1288    else if (sr < 1) sr = 1;
1289
1290    while (sr>>=1)
1291        step++;
1292
1293    *sp = MAX_MVSEARCH_STEPS - 1 - step;
1294}
1295
1296static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1297                                           int_mv *best_ref_mv, int best_rd,
1298                                           int *mdcounts, int *returntotrate,
1299                                           int *returnyrate, int *returndistortion,
1300                                           int mvthresh)
1301{
1302    int i;
1303    BEST_SEG_INFO bsi;
1304
1305    vpx_memset(&bsi, 0, sizeof(bsi));
1306
1307    bsi.segment_rd = best_rd;
1308    bsi.ref_mv = best_ref_mv;
1309    bsi.mvp.as_int = best_ref_mv->as_int;
1310    bsi.mvthresh = mvthresh;
1311    bsi.mdcounts = mdcounts;
1312
1313    for(i = 0; i < 16; i++)
1314    {
1315        bsi.modes[i] = ZERO4X4;
1316    }
1317
1318    if(cpi->compressor_speed == 0)
1319    {
1320        /* for now, we will keep the original segmentation order
1321           when in best quality mode */
1322        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1323        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1324        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1325        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1326    }
1327    else
1328    {
1329        int sr;
1330
1331        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1332
1333        if (bsi.segment_rd < best_rd)
1334        {
1335            int col_min = (best_ref_mv->as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.col & 7)?1:0);
1336            int row_min = (best_ref_mv->as_mv.row>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.row & 7)?1:0);
1337            int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
1338            int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
1339
1340            int tmp_col_min = x->mv_col_min;
1341            int tmp_col_max = x->mv_col_max;
1342            int tmp_row_min = x->mv_row_min;
1343            int tmp_row_max = x->mv_row_max;
1344
1345            /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
1346            if (x->mv_col_min < col_min )
1347                x->mv_col_min = col_min;
1348            if (x->mv_col_max > col_max )
1349                x->mv_col_max = col_max;
1350            if (x->mv_row_min < row_min )
1351                x->mv_row_min = row_min;
1352            if (x->mv_row_max > row_max )
1353                x->mv_row_max = row_max;
1354
1355            /* Get 8x8 result */
1356            bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
1357            bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
1358            bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
1359            bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
1360
1361            /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
1362            /* block 8X16 */
1363            {
1364                sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
1365                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1366
1367                sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
1368                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1369
1370                rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1371            }
1372
1373            /* block 16X8 */
1374            {
1375                sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
1376                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1377
1378                sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
1379                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1380
1381                rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1382            }
1383
1384            /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1385            /* Not skip 4x4 if speed=0 (good quality) */
1386            if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1387            {
1388                bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
1389                rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1390            }
1391
1392            /* restore UMV window */
1393            x->mv_col_min = tmp_col_min;
1394            x->mv_col_max = tmp_col_max;
1395            x->mv_row_min = tmp_row_min;
1396            x->mv_row_max = tmp_row_max;
1397        }
1398    }
1399
1400    /* set it to the best */
1401    for (i = 0; i < 16; i++)
1402    {
1403        BLOCKD *bd = &x->e_mbd.block[i];
1404
1405        bd->bmi.mv.as_int = bsi.mvs[i].as_int;
1406        bd->eob = bsi.eobs[i];
1407    }
1408
1409    *returntotrate = bsi.r;
1410    *returndistortion = bsi.d;
1411    *returnyrate = bsi.segment_yrate;
1412
1413    /* save partitions */
1414    x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1415    x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1416
1417    for (i = 0; i < x->partition_info->count; i++)
1418    {
1419        int j;
1420
1421        j = vp8_mbsplit_offset[bsi.segment_num][i];
1422
1423        x->partition_info->bmi[i].mode = bsi.modes[j];
1424        x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
1425    }
1426    /*
1427     * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
1428     */
1429    x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
1430
1431    return bsi.segment_rd;
1432}
1433
1434static void insertsortmv(int arr[], int len)
1435{
1436    int i, j, k;
1437
1438    for ( i = 1 ; i <= len-1 ; i++ )
1439    {
1440        for ( j = 0 ; j < i ; j++ )
1441        {
1442            if ( arr[j] > arr[i] )
1443            {
1444                int temp;
1445
1446                temp = arr[i];
1447
1448                for ( k = i; k >j; k--)
1449                    arr[k] = arr[k - 1] ;
1450
1451                arr[j] = temp ;
1452            }
1453        }
1454    }
1455}
1456
1457static void insertsortsad(int arr[],int idx[], int len)
1458{
1459    int i, j, k;
1460
1461    for ( i = 1 ; i <= len-1 ; i++ )
1462    {
1463        for ( j = 0 ; j < i ; j++ )
1464        {
1465            if ( arr[j] > arr[i] )
1466            {
1467                int temp, tempi;
1468
1469                temp = arr[i];
1470                tempi = idx[i];
1471
1472                for ( k = i; k >j; k--)
1473                {
1474                    arr[k] = arr[k - 1] ;
1475                    idx[k] = idx[k - 1];
1476                }
1477
1478                arr[j] = temp ;
1479                idx[j] = tempi;
1480            }
1481        }
1482    }
1483}
1484
1485//The improved MV prediction
1486void vp8_mv_pred
1487(
1488    VP8_COMP *cpi,
1489    MACROBLOCKD *xd,
1490    const MODE_INFO *here,
1491    int_mv *mvp,
1492    int refframe,
1493    int *ref_frame_sign_bias,
1494    int *sr,
1495    int near_sadidx[]
1496)
1497{
1498    const MODE_INFO *above = here - xd->mode_info_stride;
1499    const MODE_INFO *left = here - 1;
1500    const MODE_INFO *aboveleft = above - 1;
1501    int_mv           near_mvs[8];
1502    int              near_ref[8];
1503    int_mv           mv;
1504    int              vcnt=0;
1505    int              find=0;
1506    int              mb_offset;
1507
1508    int              mvx[8];
1509    int              mvy[8];
1510    int              i;
1511
1512    mv.as_int = 0;
1513
1514    if(here->mbmi.ref_frame != INTRA_FRAME)
1515    {
1516        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
1517        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
1518
1519        // read in 3 nearby block's MVs from current frame as prediction candidates.
1520        if (above->mbmi.ref_frame != INTRA_FRAME)
1521        {
1522            near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1523            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1524            near_ref[vcnt] =  above->mbmi.ref_frame;
1525        }
1526        vcnt++;
1527        if (left->mbmi.ref_frame != INTRA_FRAME)
1528        {
1529            near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1530            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1531            near_ref[vcnt] =  left->mbmi.ref_frame;
1532        }
1533        vcnt++;
1534        if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
1535        {
1536            near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1537            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1538            near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
1539        }
1540        vcnt++;
1541
1542        // read in 5 nearby block's MVs from last frame.
1543        if(cpi->common.last_frame_type != KEY_FRAME)
1544        {
1545            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
1546
1547            // current in last frame
1548            if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
1549            {
1550                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1551                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1552                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
1553            }
1554            vcnt++;
1555
1556            // above in last frame
1557            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
1558            {
1559                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
1560                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1561                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
1562            }
1563            vcnt++;
1564
1565            // left in last frame
1566            if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
1567            {
1568                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
1569                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1570                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
1571            }
1572            vcnt++;
1573
1574            // right in last frame
1575            if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
1576            {
1577                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
1578                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1579                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
1580            }
1581            vcnt++;
1582
1583            // below in last frame
1584            if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
1585            {
1586                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
1587                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1588                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
1589            }
1590            vcnt++;
1591        }
1592
1593        for(i=0; i< vcnt; i++)
1594        {
1595            if(near_ref[near_sadidx[i]] != INTRA_FRAME)
1596            {
1597                if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
1598                {
1599                    mv.as_int = near_mvs[near_sadidx[i]].as_int;
1600                    find = 1;
1601                    if (i < 3)
1602                        *sr = 3;
1603                    else
1604                        *sr = 2;
1605                    break;
1606                }
1607            }
1608        }
1609
1610        if(!find)
1611        {
1612            for(i=0; i<vcnt; i++)
1613            {
1614                mvx[i] = near_mvs[i].as_mv.row;
1615                mvy[i] = near_mvs[i].as_mv.col;
1616            }
1617
1618            insertsortmv(mvx, vcnt);
1619            insertsortmv(mvy, vcnt);
1620            mv.as_mv.row = mvx[vcnt/2];
1621            mv.as_mv.col = mvy[vcnt/2];
1622
1623            find = 1;
1624            //sr is set to 0 to allow calling function to decide the search range.
1625            *sr = 0;
1626        }
1627    }
1628
1629    /* Set up return values */
1630    mvp->as_int = mv.as_int;
1631    vp8_clamp_mv2(mvp, xd);
1632}
1633
1634void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
1635{
1636
1637    int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1638    BLOCK *b = &x->block[0];
1639    unsigned char *src_y_ptr = *(b->base_src);
1640
1641    //calculate sad for current frame 3 nearby MBs.
1642    if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
1643    {
1644        near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1645    }else if(xd->mb_to_top_edge==0)
1646    {   //only has left MB for sad calculation.
1647        near_sad[0] = near_sad[2] = INT_MAX;
1648        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
1649    }else if(xd->mb_to_left_edge ==0)
1650    {   //only has left MB for sad calculation.
1651        near_sad[1] = near_sad[2] = INT_MAX;
1652        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
1653    }else
1654    {
1655        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
1656        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
1657        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
1658    }
1659
1660    if(cpi->common.last_frame_type != KEY_FRAME)
1661    {
1662        //calculate sad for last frame 5 nearby MBs.
1663        unsigned char *pre_y_buffer = cpi->co

Large files files are truncated, but you can click here to view the full file