/H264Dec/source/h264bsd_reconstruct.c
C | 2315 lines | 1666 code | 259 blank | 390 comment | 110 complexity | a243c534b0997850d53f65736fd8ffbe MD5 | raw file
Possible License(s): BSD-3-Clause
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*------------------------------------------------------------------------------
18
19 Table of contents
20
21 1. Include headers
22 2. External compiler flags
23 3. Module defines
24 4. Local function prototypes
25 5. Functions
26
27------------------------------------------------------------------------------*/
28
29/*------------------------------------------------------------------------------
30 1. Include headers
31------------------------------------------------------------------------------*/
32
33#include "basetype.h"
34#include "h264bsd_reconstruct.h"
35#include "h264bsd_macroblock_layer.h"
36#include "h264bsd_image.h"
37#include "h264bsd_util.h"
38
39#ifdef H264DEC_OMXDL
40#include "omxtypes.h"
41#include "omxVC.h"
42#include "armVC.h"
43#endif /* H264DEC_OMXDL */
44
45/*------------------------------------------------------------------------------
46 2. External compiler flags
47--------------------------------------------------------------------------------
48
49--------------------------------------------------------------------------------
50 3. Module defines
51------------------------------------------------------------------------------*/
52
53/* Switch off the following Lint messages for this file:
54 * Info 701: Shift left of signed quantity (int)
55 * Info 702: Shift right of signed quantity (int)
56 */
57/*lint -e701 -e702 */
58
59/* Luma fractional-sample positions
60 *
61 * G a b c H
62 * d e f g
63 * h i j k m
64 * n p q r
65 * M s N
66 *
67 * G, H, M and N are integer sample positions
68 * a-s are fractional samples that need to be interpolated.
69 */
70#ifndef H264DEC_OMXDL
71static const u32 lumaFracPos[4][4] = {
72 /* G d h n a e i p b f j q c g k r */
73 {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}};
74#endif /* H264DEC_OMXDL */
75
76/* clipping table, defined in h264bsd_intra_prediction.c */
77extern const u8 h264bsdClip[];
78
79/*------------------------------------------------------------------------------
80 4. Local function prototypes
81------------------------------------------------------------------------------*/
82
83#ifndef H264DEC_OMXDL
84
85/*------------------------------------------------------------------------------
86
87 Function: h264bsdInterpolateChromaHor
88
89 Functional description:
90 This function performs chroma interpolation in horizontal direction.
91 Overfilling is done only if needed. Reference image (pRef) is
92 read at correct position and the predicted part is written to
93 macroblock's chrominance (predPartChroma)
94 Inputs:
95 pRef pointer to reference frame Cb top-left corner
96 x0 integer x-coordinate for prediction
97 y0 integer y-coordinate for prediction
98 width width of the reference frame chrominance in pixels
99 height height of the reference frame chrominance in pixels
100 xFrac horizontal fraction for prediction in 1/8 pixels
101 chromaPartWidth width of the predicted part in pixels
102 chromaPartHeight height of the predicted part in pixels
103 Outputs:
104 predPartChroma pointer where predicted part is written
105
106------------------------------------------------------------------------------*/
107#ifndef H264DEC_ARM11
108void h264bsdInterpolateChromaHor(
109 u8 *pRef,
110 u8 *predPartChroma,
111 i32 x0,
112 i32 y0,
113 u32 width,
114 u32 height,
115 u32 xFrac,
116 u32 chromaPartWidth,
117 u32 chromaPartHeight)
118{
119
120/* Variables */
121
122 u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val;
123 u8 *ptrA, *cbr;
124 u32 comp;
125 u8 block[9*8*2];
126
127/* Code */
128
129 ASSERT(predPartChroma);
130 ASSERT(chromaPartWidth);
131 ASSERT(chromaPartHeight);
132 ASSERT(xFrac < 8);
133 ASSERT(pRef);
134
135 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
136 (y0 < 0) || ((u32)y0+chromaPartHeight > height))
137 {
138 h264bsdFillBlock(pRef, block, x0, y0, width, height,
139 chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1);
140 pRef += width * height;
141 h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight,
142 x0, y0, width, height, chromaPartWidth + 1,
143 chromaPartHeight, chromaPartWidth + 1);
144
145 pRef = block;
146 x0 = 0;
147 y0 = 0;
148 width = chromaPartWidth+1;
149 height = chromaPartHeight;
150 }
151
152 val = 8 - xFrac;
153
154 for (comp = 0; comp <= 1; comp++)
155 {
156
157 ptrA = pRef + (comp * height + (u32)y0) * width + x0;
158 cbr = predPartChroma + comp * 8 * 8;
159
160 /* 2x2 pels per iteration
161 * bilinear horizontal interpolation */
162 for (y = (chromaPartHeight >> 1); y; y--)
163 {
164 for (x = (chromaPartWidth >> 1); x; x--)
165 {
166 tmp1 = ptrA[width];
167 tmp2 = *ptrA++;
168 tmp3 = ptrA[width];
169 tmp4 = *ptrA++;
170 c = ((val * tmp1 + xFrac * tmp3) << 3) + 32;
171 c >>= 6;
172 cbr[8] = (u8)c;
173 c = ((val * tmp2 + xFrac * tmp4) << 3) + 32;
174 c >>= 6;
175 *cbr++ = (u8)c;
176 tmp1 = ptrA[width];
177 tmp2 = *ptrA;
178 c = ((val * tmp3 + xFrac * tmp1) << 3) + 32;
179 c >>= 6;
180 cbr[8] = (u8)c;
181 c = ((val * tmp4 + xFrac * tmp2) << 3) + 32;
182 c >>= 6;
183 *cbr++ = (u8)c;
184 }
185 cbr += 2*8 - chromaPartWidth;
186 ptrA += 2*width - chromaPartWidth;
187 }
188 }
189
190}
191
192/*------------------------------------------------------------------------------
193
194 Function: h264bsdInterpolateChromaVer
195
196 Functional description:
197 This function performs chroma interpolation in vertical direction.
198 Overfilling is done only if needed. Reference image (pRef) is
199 read at correct position and the predicted part is written to
200 macroblock's chrominance (predPartChroma)
201
202------------------------------------------------------------------------------*/
203
204void h264bsdInterpolateChromaVer(
205 u8 *pRef,
206 u8 *predPartChroma,
207 i32 x0,
208 i32 y0,
209 u32 width,
210 u32 height,
211 u32 yFrac,
212 u32 chromaPartWidth,
213 u32 chromaPartHeight)
214{
215
216/* Variables */
217
218 u32 x, y, tmp1, tmp2, tmp3, c, val;
219 u8 *ptrA, *cbr;
220 u32 comp;
221 u8 block[9*8*2];
222
223/* Code */
224
225 ASSERT(predPartChroma);
226 ASSERT(chromaPartWidth);
227 ASSERT(chromaPartHeight);
228 ASSERT(yFrac < 8);
229 ASSERT(pRef);
230
231 if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) ||
232 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
233 {
234 h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth,
235 chromaPartHeight + 1, chromaPartWidth);
236 pRef += width * height;
237 h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1),
238 x0, y0, width, height, chromaPartWidth,
239 chromaPartHeight + 1, chromaPartWidth);
240
241 pRef = block;
242 x0 = 0;
243 y0 = 0;
244 width = chromaPartWidth;
245 height = chromaPartHeight+1;
246 }
247
248 val = 8 - yFrac;
249
250 for (comp = 0; comp <= 1; comp++)
251 {
252
253 ptrA = pRef + (comp * height + (u32)y0) * width + x0;
254 cbr = predPartChroma + comp * 8 * 8;
255
256 /* 2x2 pels per iteration
257 * bilinear vertical interpolation */
258 for (y = (chromaPartHeight >> 1); y; y--)
259 {
260 for (x = (chromaPartWidth >> 1); x; x--)
261 {
262 tmp3 = ptrA[width*2];
263 tmp2 = ptrA[width];
264 tmp1 = *ptrA++;
265 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
266 c >>= 6;
267 cbr[8] = (u8)c;
268 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
269 c >>= 6;
270 *cbr++ = (u8)c;
271 tmp3 = ptrA[width*2];
272 tmp2 = ptrA[width];
273 tmp1 = *ptrA++;
274 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
275 c >>= 6;
276 cbr[8] = (u8)c;
277 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
278 c >>= 6;
279 *cbr++ = (u8)c;
280 }
281 cbr += 2*8 - chromaPartWidth;
282 ptrA += 2*width - chromaPartWidth;
283 }
284 }
285
286}
287#endif
288/*------------------------------------------------------------------------------
289
290 Function: h264bsdInterpolateChromaHorVer
291
292 Functional description:
293 This function performs chroma interpolation in horizontal and
294 vertical direction. Overfilling is done only if needed. Reference
295 image (ref) is read at correct position and the predicted part
296 is written to macroblock's chrominance (predPartChroma)
297
298------------------------------------------------------------------------------*/
299
300void h264bsdInterpolateChromaHorVer(
301 u8 *ref,
302 u8 *predPartChroma,
303 i32 x0,
304 i32 y0,
305 u32 width,
306 u32 height,
307 u32 xFrac,
308 u32 yFrac,
309 u32 chromaPartWidth,
310 u32 chromaPartHeight)
311{
312 u8 block[9*9*2];
313 u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32;
314 u32 comp;
315 u8 *ptrA, *cbr;
316
317/* Code */
318
319 ASSERT(predPartChroma);
320 ASSERT(chromaPartWidth);
321 ASSERT(chromaPartHeight);
322 ASSERT(xFrac < 8);
323 ASSERT(yFrac < 8);
324 ASSERT(ref);
325
326 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
327 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
328 {
329 h264bsdFillBlock(ref, block, x0, y0, width, height,
330 chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1);
331 ref += width * height;
332 h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1),
333 x0, y0, width, height, chromaPartWidth + 1,
334 chromaPartHeight + 1, chromaPartWidth + 1);
335
336 ref = block;
337 x0 = 0;
338 y0 = 0;
339 width = chromaPartWidth+1;
340 height = chromaPartHeight+1;
341 }
342
343 valX = 8 - xFrac;
344 valY = 8 - yFrac;
345
346 for (comp = 0; comp <= 1; comp++)
347 {
348
349 ptrA = ref + (comp * height + (u32)y0) * width + x0;
350 cbr = predPartChroma + comp * 8 * 8;
351
352 /* 2x2 pels per iteration
353 * bilinear vertical and horizontal interpolation */
354 for (y = (chromaPartHeight >> 1); y; y--)
355 {
356 tmp1 = *ptrA;
357 tmp3 = ptrA[width];
358 tmp5 = ptrA[width*2];
359 tmp1 *= valY;
360 tmp1 += tmp3 * yFrac;
361 tmp3 *= valY;
362 tmp3 += tmp5 * yFrac;
363 for (x = (chromaPartWidth >> 1); x; x--)
364 {
365 tmp2 = *++ptrA;
366 tmp4 = ptrA[width];
367 tmp6 = ptrA[width*2];
368 tmp2 *= valY;
369 tmp2 += tmp4 * yFrac;
370 tmp4 *= valY;
371 tmp4 += tmp6 * yFrac;
372 tmp1 = tmp1 * valX + plus32;
373 tmp3 = tmp3 * valX + plus32;
374 tmp1 += tmp2 * xFrac;
375 tmp1 >>= 6;
376 tmp3 += tmp4 * xFrac;
377 tmp3 >>= 6;
378 cbr[8] = (u8)tmp3;
379 *cbr++ = (u8)tmp1;
380
381 tmp1 = *++ptrA;
382 tmp3 = ptrA[width];
383 tmp5 = ptrA[width*2];
384 tmp1 *= valY;
385 tmp1 += tmp3 * yFrac;
386 tmp3 *= valY;
387 tmp3 += tmp5 * yFrac;
388 tmp2 = tmp2 * valX + plus32;
389 tmp4 = tmp4 * valX + plus32;
390 tmp2 += tmp1 * xFrac;
391 tmp2 >>= 6;
392 tmp4 += tmp3 * xFrac;
393 tmp4 >>= 6;
394 cbr[8] = (u8)tmp4;
395 *cbr++ = (u8)tmp2;
396 }
397 cbr += 2*8 - chromaPartWidth;
398 ptrA += 2*width - chromaPartWidth;
399 }
400 }
401
402}
403
404/*------------------------------------------------------------------------------
405
406 Function: PredictChroma
407
408 Functional description:
409 Top level chroma prediction function that calls the appropriate
410 interpolation function. The output is written to macroblock array.
411
412------------------------------------------------------------------------------*/
413
414static void PredictChroma(
415 u8 *mbPartChroma,
416 u32 xAL,
417 u32 yAL,
418 u32 partWidth,
419 u32 partHeight,
420 mv_t *mv,
421 image_t *refPic)
422{
423
424/* Variables */
425
426 u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight;
427 i32 xInt, yInt;
428 u8 *ref;
429
430/* Code */
431
432 ASSERT(mv);
433 ASSERT(refPic);
434 ASSERT(refPic->data);
435 ASSERT(refPic->width);
436 ASSERT(refPic->height);
437
438 width = 8 * refPic->width;
439 height = 8 * refPic->height;
440
441 xInt = (xAL >> 1) + (mv->hor >> 3);
442 yInt = (yAL >> 1) + (mv->ver >> 3);
443 xFrac = mv->hor & 0x7;
444 yFrac = mv->ver & 0x7;
445
446 chromaPartWidth = partWidth >> 1;
447 chromaPartHeight = partHeight >> 1;
448 ref = refPic->data + 256 * refPic->width * refPic->height;
449
450 if (xFrac && yFrac)
451 {
452 h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width,
453 height, xFrac, yFrac, chromaPartWidth, chromaPartHeight);
454 }
455 else if (xFrac)
456 {
457 h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width,
458 height, xFrac, chromaPartWidth, chromaPartHeight);
459 }
460 else if (yFrac)
461 {
462 h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width,
463 height, yFrac, chromaPartWidth, chromaPartHeight);
464 }
465 else
466 {
467 h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height,
468 chromaPartWidth, chromaPartHeight, 8);
469 ref += width * height;
470 h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height,
471 chromaPartWidth, chromaPartHeight, 8);
472 }
473
474}
475
476
477/*------------------------------------------------------------------------------
478
479 Function: h264bsdInterpolateVerHalf
480
481 Functional description:
482 Function to perform vertical interpolation of pixel position 'h'
483 for a block. Overfilling is done only if needed. Reference
484 image (ref) is read at correct position and the predicted part
485 is written to macroblock array (mb)
486
487------------------------------------------------------------------------------*/
488#ifndef H264DEC_ARM11
489void h264bsdInterpolateVerHalf(
490 u8 *ref,
491 u8 *mb,
492 i32 x0,
493 i32 y0,
494 u32 width,
495 u32 height,
496 u32 partWidth,
497 u32 partHeight)
498{
499 u32 p1[21*21/4+1];
500 u32 i, j;
501 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
502 u8 *ptrC, *ptrV;
503 const u8 *clp = h264bsdClip + 512;
504
505 /* Code */
506
507 ASSERT(ref);
508 ASSERT(mb);
509
510 if ((x0 < 0) || ((u32)x0+partWidth > width) ||
511 (y0 < 0) || ((u32)y0+partHeight+5 > height))
512 {
513 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
514 partWidth, partHeight+5, partWidth);
515
516 x0 = 0;
517 y0 = 0;
518 ref = (u8*)p1;
519 width = partWidth;
520 }
521
522 ref += (u32)y0 * width + (u32)x0;
523
524 ptrC = ref + width;
525 ptrV = ptrC + 5*width;
526
527 /* 4 pixels per iteration, interpolate using 5 vertical samples */
528 for (i = (partHeight >> 2); i; i--)
529 {
530 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
531 for (j = partWidth; j; j--)
532 {
533 tmp4 = ptrV[-(i32)width*2];
534 tmp5 = ptrV[-(i32)width];
535 tmp1 = ptrV[width];
536 tmp2 = ptrV[width*2];
537 tmp6 = *ptrV++;
538
539 tmp7 = tmp4 + tmp1;
540 tmp2 -= (tmp7 << 2);
541 tmp2 -= tmp7;
542 tmp2 += 16;
543 tmp7 = tmp5 + tmp6;
544 tmp3 = ptrC[width*2];
545 tmp2 += (tmp7 << 4);
546 tmp2 += (tmp7 << 2);
547 tmp2 += tmp3;
548 tmp2 = clp[tmp2>>5];
549 tmp1 += 16;
550 mb[48] = (u8)tmp2;
551
552 tmp7 = tmp3 + tmp6;
553 tmp1 -= (tmp7 << 2);
554 tmp1 -= tmp7;
555 tmp7 = tmp4 + tmp5;
556 tmp2 = ptrC[width];
557 tmp1 += (tmp7 << 4);
558 tmp1 += (tmp7 << 2);
559 tmp1 += tmp2;
560 tmp1 = clp[tmp1>>5];
561 tmp6 += 16;
562 mb[32] = (u8)tmp1;
563
564 tmp7 = tmp2 + tmp5;
565 tmp6 -= (tmp7 << 2);
566 tmp6 -= tmp7;
567 tmp7 = tmp4 + tmp3;
568 tmp1 = *ptrC;
569 tmp6 += (tmp7 << 4);
570 tmp6 += (tmp7 << 2);
571 tmp6 += tmp1;
572 tmp6 = clp[tmp6>>5];
573 tmp5 += 16;
574 mb[16] = (u8)tmp6;
575
576 tmp1 += tmp4;
577 tmp5 -= (tmp1 << 2);
578 tmp5 -= tmp1;
579 tmp3 += tmp2;
580 tmp6 = ptrC[-(i32)width];
581 tmp5 += (tmp3 << 4);
582 tmp5 += (tmp3 << 2);
583 tmp5 += tmp6;
584 tmp5 = clp[tmp5>>5];
585 *mb++ = (u8)tmp5;
586 ptrC++;
587 }
588 ptrC += 4*width - partWidth;
589 ptrV += 4*width - partWidth;
590 mb += 4*16 - partWidth;
591 }
592
593}
594
595/*------------------------------------------------------------------------------
596
597 Function: h264bsdInterpolateVerQuarter
598
599 Functional description:
600 Function to perform vertical interpolation of pixel position 'd'
601 or 'n' for a block. Overfilling is done only if needed. Reference
602 image (ref) is read at correct position and the predicted part
603 is written to macroblock array (mb)
604
605------------------------------------------------------------------------------*/
606
607void h264bsdInterpolateVerQuarter(
608 u8 *ref,
609 u8 *mb,
610 i32 x0,
611 i32 y0,
612 u32 width,
613 u32 height,
614 u32 partWidth,
615 u32 partHeight,
616 u32 verOffset) /* 0 for pixel d, 1 for pixel n */
617{
618 u32 p1[21*21/4+1];
619 u32 i, j;
620 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
621 u8 *ptrC, *ptrV, *ptrInt;
622 const u8 *clp = h264bsdClip + 512;
623
624 /* Code */
625
626 ASSERT(ref);
627 ASSERT(mb);
628
629 if ((x0 < 0) || ((u32)x0+partWidth > width) ||
630 (y0 < 0) || ((u32)y0+partHeight+5 > height))
631 {
632 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
633 partWidth, partHeight+5, partWidth);
634
635 x0 = 0;
636 y0 = 0;
637 ref = (u8*)p1;
638 width = partWidth;
639 }
640
641 ref += (u32)y0 * width + (u32)x0;
642
643 ptrC = ref + width;
644 ptrV = ptrC + 5*width;
645
646 /* Pointer to integer sample position, either M or R */
647 ptrInt = ptrC + (2+verOffset)*width;
648
649 /* 4 pixels per iteration
650 * interpolate using 5 vertical samples and average between
651 * interpolated value and integer sample value */
652 for (i = (partHeight >> 2); i; i--)
653 {
654 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
655 for (j = partWidth; j; j--)
656 {
657 tmp4 = ptrV[-(i32)width*2];
658 tmp5 = ptrV[-(i32)width];
659 tmp1 = ptrV[width];
660 tmp2 = ptrV[width*2];
661 tmp6 = *ptrV++;
662
663 tmp7 = tmp4 + tmp1;
664 tmp2 -= (tmp7 << 2);
665 tmp2 -= tmp7;
666 tmp2 += 16;
667 tmp7 = tmp5 + tmp6;
668 tmp3 = ptrC[width*2];
669 tmp2 += (tmp7 << 4);
670 tmp2 += (tmp7 << 2);
671 tmp2 += tmp3;
672 tmp2 = clp[tmp2>>5];
673 tmp7 = ptrInt[width*2];
674 tmp1 += 16;
675 tmp2++;
676 mb[48] = (u8)((tmp2 + tmp7) >> 1);
677
678 tmp7 = tmp3 + tmp6;
679 tmp1 -= (tmp7 << 2);
680 tmp1 -= tmp7;
681 tmp7 = tmp4 + tmp5;
682 tmp2 = ptrC[width];
683 tmp1 += (tmp7 << 4);
684 tmp1 += (tmp7 << 2);
685 tmp1 += tmp2;
686 tmp1 = clp[tmp1>>5];
687 tmp7 = ptrInt[width];
688 tmp6 += 16;
689 tmp1++;
690 mb[32] = (u8)((tmp1 + tmp7) >> 1);
691
692 tmp7 = tmp2 + tmp5;
693 tmp6 -= (tmp7 << 2);
694 tmp6 -= tmp7;
695 tmp7 = tmp4 + tmp3;
696 tmp1 = *ptrC;
697 tmp6 += (tmp7 << 4);
698 tmp6 += (tmp7 << 2);
699 tmp6 += tmp1;
700 tmp6 = clp[tmp6>>5];
701 tmp7 = *ptrInt;
702 tmp5 += 16;
703 tmp6++;
704 mb[16] = (u8)((tmp6 + tmp7) >> 1);
705
706 tmp1 += tmp4;
707 tmp5 -= (tmp1 << 2);
708 tmp5 -= tmp1;
709 tmp3 += tmp2;
710 tmp6 = ptrC[-(i32)width];
711 tmp5 += (tmp3 << 4);
712 tmp5 += (tmp3 << 2);
713 tmp5 += tmp6;
714 tmp5 = clp[tmp5>>5];
715 tmp7 = ptrInt[-(i32)width];
716 tmp5++;
717 *mb++ = (u8)((tmp5 + tmp7) >> 1);
718 ptrC++;
719 ptrInt++;
720 }
721 ptrC += 4*width - partWidth;
722 ptrV += 4*width - partWidth;
723 ptrInt += 4*width - partWidth;
724 mb += 4*16 - partWidth;
725 }
726
727}
728
729/*------------------------------------------------------------------------------
730
731 Function: h264bsdInterpolateHorHalf
732
733 Functional description:
734 Function to perform horizontal interpolation of pixel position 'b'
735 for a block. Overfilling is done only if needed. Reference
736 image (ref) is read at correct position and the predicted part
737 is written to macroblock array (mb)
738
739------------------------------------------------------------------------------*/
740
741void h264bsdInterpolateHorHalf(
742 u8 *ref,
743 u8 *mb,
744 i32 x0,
745 i32 y0,
746 u32 width,
747 u32 height,
748 u32 partWidth,
749 u32 partHeight)
750{
751 u32 p1[21*21/4+1];
752 u8 *ptrJ;
753 u32 x, y;
754 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
755 const u8 *clp = h264bsdClip + 512;
756
757 /* Code */
758
759 ASSERT(ref);
760 ASSERT(mb);
761 ASSERT((partWidth&0x3) == 0);
762 ASSERT((partHeight&0x3) == 0);
763
764 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
765 (y0 < 0) || ((u32)y0+partHeight > height))
766 {
767 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
768 partWidth+5, partHeight, partWidth+5);
769
770 x0 = 0;
771 y0 = 0;
772 ref = (u8*)p1;
773 width = partWidth + 5;
774 }
775
776 ref += (u32)y0 * width + (u32)x0;
777
778 ptrJ = ref + 5;
779
780 for (y = partHeight; y; y--)
781 {
782 tmp6 = *(ptrJ - 5);
783 tmp5 = *(ptrJ - 4);
784 tmp4 = *(ptrJ - 3);
785 tmp3 = *(ptrJ - 2);
786 tmp2 = *(ptrJ - 1);
787
788 /* calculate 4 pels per iteration */
789 for (x = (partWidth >> 2); x; x--)
790 {
791 /* First pixel */
792 tmp6 += 16;
793 tmp7 = tmp3 + tmp4;
794 tmp6 += (tmp7 << 4);
795 tmp6 += (tmp7 << 2);
796 tmp7 = tmp2 + tmp5;
797 tmp1 = *ptrJ++;
798 tmp6 -= (tmp7 << 2);
799 tmp6 -= tmp7;
800 tmp6 += tmp1;
801 tmp6 = clp[tmp6>>5];
802 /* Second pixel */
803 tmp5 += 16;
804 tmp7 = tmp2 + tmp3;
805 *mb++ = (u8)tmp6;
806 tmp5 += (tmp7 << 4);
807 tmp5 += (tmp7 << 2);
808 tmp7 = tmp1 + tmp4;
809 tmp6 = *ptrJ++;
810 tmp5 -= (tmp7 << 2);
811 tmp5 -= tmp7;
812 tmp5 += tmp6;
813 tmp5 = clp[tmp5>>5];
814 /* Third pixel */
815 tmp4 += 16;
816 tmp7 = tmp1 + tmp2;
817 *mb++ = (u8)tmp5;
818 tmp4 += (tmp7 << 4);
819 tmp4 += (tmp7 << 2);
820 tmp7 = tmp6 + tmp3;
821 tmp5 = *ptrJ++;
822 tmp4 -= (tmp7 << 2);
823 tmp4 -= tmp7;
824 tmp4 += tmp5;
825 tmp4 = clp[tmp4>>5];
826 /* Fourth pixel */
827 tmp3 += 16;
828 tmp7 = tmp6 + tmp1;
829 *mb++ = (u8)tmp4;
830 tmp3 += (tmp7 << 4);
831 tmp3 += (tmp7 << 2);
832 tmp7 = tmp5 + tmp2;
833 tmp4 = *ptrJ++;
834 tmp3 -= (tmp7 << 2);
835 tmp3 -= tmp7;
836 tmp3 += tmp4;
837 tmp3 = clp[tmp3>>5];
838 tmp7 = tmp4;
839 tmp4 = tmp6;
840 tmp6 = tmp2;
841 tmp2 = tmp7;
842 *mb++ = (u8)tmp3;
843 tmp3 = tmp5;
844 tmp5 = tmp1;
845 }
846 ptrJ += width - partWidth;
847 mb += 16 - partWidth;
848 }
849
850}
851
852/*------------------------------------------------------------------------------
853
854 Function: h264bsdInterpolateHorQuarter
855
856 Functional description:
857 Function to perform horizontal interpolation of pixel position 'a'
858 or 'c' for a block. Overfilling is done only if needed. Reference
859 image (ref) is read at correct position and the predicted part
860 is written to macroblock array (mb)
861
862------------------------------------------------------------------------------*/
863
864void h264bsdInterpolateHorQuarter(
865 u8 *ref,
866 u8 *mb,
867 i32 x0,
868 i32 y0,
869 u32 width,
870 u32 height,
871 u32 partWidth,
872 u32 partHeight,
873 u32 horOffset) /* 0 for pixel a, 1 for pixel c */
874{
875 u32 p1[21*21/4+1];
876 u8 *ptrJ;
877 u32 x, y;
878 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
879 const u8 *clp = h264bsdClip + 512;
880
881 /* Code */
882
883 ASSERT(ref);
884 ASSERT(mb);
885
886 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
887 (y0 < 0) || ((u32)y0+partHeight > height))
888 {
889 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
890 partWidth+5, partHeight, partWidth+5);
891
892 x0 = 0;
893 y0 = 0;
894 ref = (u8*)p1;
895 width = partWidth + 5;
896 }
897
898 ref += (u32)y0 * width + (u32)x0;
899
900 ptrJ = ref + 5;
901
902 for (y = partHeight; y; y--)
903 {
904 tmp6 = *(ptrJ - 5);
905 tmp5 = *(ptrJ - 4);
906 tmp4 = *(ptrJ - 3);
907 tmp3 = *(ptrJ - 2);
908 tmp2 = *(ptrJ - 1);
909
910 /* calculate 4 pels per iteration */
911 for (x = (partWidth >> 2); x; x--)
912 {
913 /* First pixel */
914 tmp6 += 16;
915 tmp7 = tmp3 + tmp4;
916 tmp6 += (tmp7 << 4);
917 tmp6 += (tmp7 << 2);
918 tmp7 = tmp2 + tmp5;
919 tmp1 = *ptrJ++;
920 tmp6 -= (tmp7 << 2);
921 tmp6 -= tmp7;
922 tmp6 += tmp1;
923 tmp6 = clp[tmp6>>5];
924 tmp5 += 16;
925 if (!horOffset)
926 tmp6 += tmp4;
927 else
928 tmp6 += tmp3;
929 *mb++ = (u8)((tmp6 + 1) >> 1);
930 /* Second pixel */
931 tmp7 = tmp2 + tmp3;
932 tmp5 += (tmp7 << 4);
933 tmp5 += (tmp7 << 2);
934 tmp7 = tmp1 + tmp4;
935 tmp6 = *ptrJ++;
936 tmp5 -= (tmp7 << 2);
937 tmp5 -= tmp7;
938 tmp5 += tmp6;
939 tmp5 = clp[tmp5>>5];
940 tmp4 += 16;
941 if (!horOffset)
942 tmp5 += tmp3;
943 else
944 tmp5 += tmp2;
945 *mb++ = (u8)((tmp5 + 1) >> 1);
946 /* Third pixel */
947 tmp7 = tmp1 + tmp2;
948 tmp4 += (tmp7 << 4);
949 tmp4 += (tmp7 << 2);
950 tmp7 = tmp6 + tmp3;
951 tmp5 = *ptrJ++;
952 tmp4 -= (tmp7 << 2);
953 tmp4 -= tmp7;
954 tmp4 += tmp5;
955 tmp4 = clp[tmp4>>5];
956 tmp3 += 16;
957 if (!horOffset)
958 tmp4 += tmp2;
959 else
960 tmp4 += tmp1;
961 *mb++ = (u8)((tmp4 + 1) >> 1);
962 /* Fourth pixel */
963 tmp7 = tmp6 + tmp1;
964 tmp3 += (tmp7 << 4);
965 tmp3 += (tmp7 << 2);
966 tmp7 = tmp5 + tmp2;
967 tmp4 = *ptrJ++;
968 tmp3 -= (tmp7 << 2);
969 tmp3 -= tmp7;
970 tmp3 += tmp4;
971 tmp3 = clp[tmp3>>5];
972 if (!horOffset)
973 tmp3 += tmp1;
974 else
975 tmp3 += tmp6;
976 *mb++ = (u8)((tmp3 + 1) >> 1);
977 tmp3 = tmp5;
978 tmp5 = tmp1;
979 tmp7 = tmp4;
980 tmp4 = tmp6;
981 tmp6 = tmp2;
982 tmp2 = tmp7;
983 }
984 ptrJ += width - partWidth;
985 mb += 16 - partWidth;
986 }
987
988}
989
990/*------------------------------------------------------------------------------
991
992 Function: h264bsdInterpolateHorVerQuarter
993
994 Functional description:
995 Function to perform horizontal and vertical interpolation of pixel
996 position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only
997 if needed. Reference image (ref) is read at correct position and
998 the predicted part is written to macroblock array (mb)
999
1000------------------------------------------------------------------------------*/
1001
1002void h264bsdInterpolateHorVerQuarter(
1003 u8 *ref,
1004 u8 *mb,
1005 i32 x0,
1006 i32 y0,
1007 u32 width,
1008 u32 height,
1009 u32 partWidth,
1010 u32 partHeight,
1011 u32 horVerOffset) /* 0 for pixel e, 1 for pixel g,
1012 2 for pixel p, 3 for pixel r */
1013{
1014 u32 p1[21*21/4+1];
1015 u8 *ptrC, *ptrJ, *ptrV;
1016 u32 x, y;
1017 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1018 const u8 *clp = h264bsdClip + 512;
1019
1020 /* Code */
1021
1022 ASSERT(ref);
1023 ASSERT(mb);
1024
1025 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1026 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1027 {
1028 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1029 partWidth+5, partHeight+5, partWidth+5);
1030
1031 x0 = 0;
1032 y0 = 0;
1033 ref = (u8*)p1;
1034 width = partWidth+5;
1035 }
1036
1037 /* Ref points to G + (-2, -2) */
1038 ref += (u32)y0 * width + (u32)x0;
1039
1040 /* ptrJ points to either J or Q, depending on vertical offset */
1041 ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5;
1042
1043 /* ptrC points to either C or D, depending on horizontal offset */
1044 ptrC = ref + width + 2 + (horVerOffset & 0x1);
1045
1046 for (y = partHeight; y; y--)
1047 {
1048 tmp6 = *(ptrJ - 5);
1049 tmp5 = *(ptrJ - 4);
1050 tmp4 = *(ptrJ - 3);
1051 tmp3 = *(ptrJ - 2);
1052 tmp2 = *(ptrJ - 1);
1053
1054 /* Horizontal interpolation, calculate 4 pels per iteration */
1055 for (x = (partWidth >> 2); x; x--)
1056 {
1057 /* First pixel */
1058 tmp6 += 16;
1059 tmp7 = tmp3 + tmp4;
1060 tmp6 += (tmp7 << 4);
1061 tmp6 += (tmp7 << 2);
1062 tmp7 = tmp2 + tmp5;
1063 tmp1 = *ptrJ++;
1064 tmp6 -= (tmp7 << 2);
1065 tmp6 -= tmp7;
1066 tmp6 += tmp1;
1067 tmp6 = clp[tmp6>>5];
1068 /* Second pixel */
1069 tmp5 += 16;
1070 tmp7 = tmp2 + tmp3;
1071 *mb++ = (u8)tmp6;
1072 tmp5 += (tmp7 << 4);
1073 tmp5 += (tmp7 << 2);
1074 tmp7 = tmp1 + tmp4;
1075 tmp6 = *ptrJ++;
1076 tmp5 -= (tmp7 << 2);
1077 tmp5 -= tmp7;
1078 tmp5 += tmp6;
1079 tmp5 = clp[tmp5>>5];
1080 /* Third pixel */
1081 tmp4 += 16;
1082 tmp7 = tmp1 + tmp2;
1083 *mb++ = (u8)tmp5;
1084 tmp4 += (tmp7 << 4);
1085 tmp4 += (tmp7 << 2);
1086 tmp7 = tmp6 + tmp3;
1087 tmp5 = *ptrJ++;
1088 tmp4 -= (tmp7 << 2);
1089 tmp4 -= tmp7;
1090 tmp4 += tmp5;
1091 tmp4 = clp[tmp4>>5];
1092 /* Fourth pixel */
1093 tmp3 += 16;
1094 tmp7 = tmp6 + tmp1;
1095 *mb++ = (u8)tmp4;
1096 tmp3 += (tmp7 << 4);
1097 tmp3 += (tmp7 << 2);
1098 tmp7 = tmp5 + tmp2;
1099 tmp4 = *ptrJ++;
1100 tmp3 -= (tmp7 << 2);
1101 tmp3 -= tmp7;
1102 tmp3 += tmp4;
1103 tmp3 = clp[tmp3>>5];
1104 tmp7 = tmp4;
1105 tmp4 = tmp6;
1106 tmp6 = tmp2;
1107 tmp2 = tmp7;
1108 *mb++ = (u8)tmp3;
1109 tmp3 = tmp5;
1110 tmp5 = tmp1;
1111 }
1112 ptrJ += width - partWidth;
1113 mb += 16 - partWidth;
1114 }
1115
1116 mb -= 16*partHeight;
1117 ptrV = ptrC + 5*width;
1118
1119 for (y = (partHeight >> 2); y; y--)
1120 {
1121 /* Vertical interpolation and averaging, 4 pels per iteration */
1122 for (x = partWidth; x; x--)
1123 {
1124 tmp4 = ptrV[-(i32)width*2];
1125 tmp5 = ptrV[-(i32)width];
1126 tmp1 = ptrV[width];
1127 tmp2 = ptrV[width*2];
1128 tmp6 = *ptrV++;
1129
1130 tmp7 = tmp4 + tmp1;
1131 tmp2 -= (tmp7 << 2);
1132 tmp2 -= tmp7;
1133 tmp2 += 16;
1134 tmp7 = tmp5 + tmp6;
1135 tmp3 = ptrC[width*2];
1136 tmp2 += (tmp7 << 4);
1137 tmp2 += (tmp7 << 2);
1138 tmp2 += tmp3;
1139 tmp7 = clp[tmp2>>5];
1140 tmp2 = mb[48];
1141 tmp1 += 16;
1142 tmp7++;
1143 mb[48] = (u8)((tmp2 + tmp7) >> 1);
1144
1145 tmp7 = tmp3 + tmp6;
1146 tmp1 -= (tmp7 << 2);
1147 tmp1 -= tmp7;
1148 tmp7 = tmp4 + tmp5;
1149 tmp2 = ptrC[width];
1150 tmp1 += (tmp7 << 4);
1151 tmp1 += (tmp7 << 2);
1152 tmp1 += tmp2;
1153 tmp7 = clp[tmp1>>5];
1154 tmp1 = mb[32];
1155 tmp6 += 16;
1156 tmp7++;
1157 mb[32] = (u8)((tmp1 + tmp7) >> 1);
1158
1159 tmp1 = *ptrC;
1160 tmp7 = tmp2 + tmp5;
1161 tmp6 -= (tmp7 << 2);
1162 tmp6 -= tmp7;
1163 tmp7 = tmp4 + tmp3;
1164 tmp6 += (tmp7 << 4);
1165 tmp6 += (tmp7 << 2);
1166 tmp6 += tmp1;
1167 tmp7 = clp[tmp6>>5];
1168 tmp6 = mb[16];
1169 tmp5 += 16;
1170 tmp7++;
1171 mb[16] = (u8)((tmp6 + tmp7) >> 1);
1172
1173 tmp6 = ptrC[-(i32)width];
1174 tmp1 += tmp4;
1175 tmp5 -= (tmp1 << 2);
1176 tmp5 -= tmp1;
1177 tmp3 += tmp2;
1178 tmp5 += (tmp3 << 4);
1179 tmp5 += (tmp3 << 2);
1180 tmp5 += tmp6;
1181 tmp7 = clp[tmp5>>5];
1182 tmp5 = *mb;
1183 tmp7++;
1184 *mb++ = (u8)((tmp5 + tmp7) >> 1);
1185 ptrC++;
1186
1187 }
1188 ptrC += 4*width - partWidth;
1189 ptrV += 4*width - partWidth;
1190 mb += 4*16 - partWidth;
1191 }
1192
1193}
1194#endif
1195
1196/*------------------------------------------------------------------------------
1197
1198 Function: h264bsdInterpolateMidHalf
1199
1200 Functional description:
1201 Function to perform horizontal and vertical interpolation of pixel
1202 position 'j' for a block. Overfilling is done only if needed.
1203 Reference image (ref) is read at correct position and the predicted
1204 part is written to macroblock array (mb)
1205
1206------------------------------------------------------------------------------*/
1207
1208void h264bsdInterpolateMidHalf(
1209 u8 *ref,
1210 u8 *mb,
1211 i32 x0,
1212 i32 y0,
1213 u32 width,
1214 u32 height,
1215 u32 partWidth,
1216 u32 partHeight)
1217{
1218 u32 p1[21*21/4+1];
1219 u32 x, y;
1220 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1221 i32 *ptrC, *ptrV, *b1;
1222 u8 *ptrJ;
1223 i32 table[21*16];
1224 const u8 *clp = h264bsdClip + 512;
1225
1226 /* Code */
1227
1228 ASSERT(ref);
1229 ASSERT(mb);
1230
1231 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1232 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1233 {
1234 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1235 partWidth+5, partHeight+5, partWidth+5);
1236
1237 x0 = 0;
1238 y0 = 0;
1239 ref = (u8*)p1;
1240 width = partWidth+5;
1241 }
1242
1243 ref += (u32)y0 * width + (u32)x0;
1244
1245 b1 = table;
1246 ptrJ = ref + 5;
1247
1248 /* First step: calculate intermediate values for
1249 * horizontal interpolation */
1250 for (y = partHeight + 5; y; y--)
1251 {
1252 tmp6 = *(ptrJ - 5);
1253 tmp5 = *(ptrJ - 4);
1254 tmp4 = *(ptrJ - 3);
1255 tmp3 = *(ptrJ - 2);
1256 tmp2 = *(ptrJ - 1);
1257
1258 /* 4 pels per iteration */
1259 for (x = (partWidth >> 2); x; x--)
1260 {
1261 /* First pixel */
1262 tmp7 = tmp3 + tmp4;
1263 tmp6 += (tmp7 << 4);
1264 tmp6 += (tmp7 << 2);
1265 tmp7 = tmp2 + tmp5;
1266 tmp1 = *ptrJ++;
1267 tmp6 -= (tmp7 << 2);
1268 tmp6 -= tmp7;
1269 tmp6 += tmp1;
1270 *b1++ = tmp6;
1271 /* Second pixel */
1272 tmp7 = tmp2 + tmp3;
1273 tmp5 += (tmp7 << 4);
1274 tmp5 += (tmp7 << 2);
1275 tmp7 = tmp1 + tmp4;
1276 tmp6 = *ptrJ++;
1277 tmp5 -= (tmp7 << 2);
1278 tmp5 -= tmp7;
1279 tmp5 += tmp6;
1280 *b1++ = tmp5;
1281 /* Third pixel */
1282 tmp7 = tmp1 + tmp2;
1283 tmp4 += (tmp7 << 4);
1284 tmp4 += (tmp7 << 2);
1285 tmp7 = tmp6 + tmp3;
1286 tmp5 = *ptrJ++;
1287 tmp4 -= (tmp7 << 2);
1288 tmp4 -= tmp7;
1289 tmp4 += tmp5;
1290 *b1++ = tmp4;
1291 /* Fourth pixel */
1292 tmp7 = tmp6 + tmp1;
1293 tmp3 += (tmp7 << 4);
1294 tmp3 += (tmp7 << 2);
1295 tmp7 = tmp5 + tmp2;
1296 tmp4 = *ptrJ++;
1297 tmp3 -= (tmp7 << 2);
1298 tmp3 -= tmp7;
1299 tmp3 += tmp4;
1300 *b1++ = tmp3;
1301 tmp7 = tmp4;
1302 tmp4 = tmp6;
1303 tmp6 = tmp2;
1304 tmp2 = tmp7;
1305 tmp3 = tmp5;
1306 tmp5 = tmp1;
1307 }
1308 ptrJ += width - partWidth;
1309 }
1310
1311 /* Second step: calculate vertical interpolation */
1312 ptrC = table + partWidth;
1313 ptrV = ptrC + 5*partWidth;
1314 for (y = (partHeight >> 2); y; y--)
1315 {
1316 /* 4 pels per iteration */
1317 for (x = partWidth; x; x--)
1318 {
1319 tmp4 = ptrV[-(i32)partWidth*2];
1320 tmp5 = ptrV[-(i32)partWidth];
1321 tmp1 = ptrV[partWidth];
1322 tmp2 = ptrV[partWidth*2];
1323 tmp6 = *ptrV++;
1324
1325 tmp7 = tmp4 + tmp1;
1326 tmp2 -= (tmp7 << 2);
1327 tmp2 -= tmp7;
1328 tmp2 += 512;
1329 tmp7 = tmp5 + tmp6;
1330 tmp3 = ptrC[partWidth*2];
1331 tmp2 += (tmp7 << 4);
1332 tmp2 += (tmp7 << 2);
1333 tmp2 += tmp3;
1334 tmp7 = clp[tmp2>>10];
1335 tmp1 += 512;
1336 mb[48] = (u8)tmp7;
1337
1338 tmp7 = tmp3 + tmp6;
1339 tmp1 -= (tmp7 << 2);
1340 tmp1 -= tmp7;
1341 tmp7 = tmp4 + tmp5;
1342 tmp2 = ptrC[partWidth];
1343 tmp1 += (tmp7 << 4);
1344 tmp1 += (tmp7 << 2);
1345 tmp1 += tmp2;
1346 tmp7 = clp[tmp1>>10];
1347 tmp6 += 512;
1348 mb[32] = (u8)tmp7;
1349
1350 tmp1 = *ptrC;
1351 tmp7 = tmp2 + tmp5;
1352 tmp6 -= (tmp7 << 2);
1353 tmp6 -= tmp7;
1354 tmp7 = tmp4 + tmp3;
1355 tmp6 += (tmp7 << 4);
1356 tmp6 += (tmp7 << 2);
1357 tmp6 += tmp1;
1358 tmp7 = clp[tmp6>>10];
1359 tmp5 += 512;
1360 mb[16] = (u8)tmp7;
1361
1362 tmp6 = ptrC[-(i32)partWidth];
1363 tmp1 += tmp4;
1364 tmp5 -= (tmp1 << 2);
1365 tmp5 -= tmp1;
1366 tmp3 += tmp2;
1367 tmp5 += (tmp3 << 4);
1368 tmp5 += (tmp3 << 2);
1369 tmp5 += tmp6;
1370 tmp7 = clp[tmp5>>10];
1371 *mb++ = (u8)tmp7;
1372 ptrC++;
1373 }
1374 mb += 4*16 - partWidth;
1375 ptrC += 3*partWidth;
1376 ptrV += 3*partWidth;
1377 }
1378
1379}
1380
1381
1382/*------------------------------------------------------------------------------
1383
1384 Function: h264bsdInterpolateMidVerQuarter
1385
1386 Functional description:
1387 Function to perform horizontal and vertical interpolation of pixel
1388 position 'f' or 'q' for a block. Overfilling is done only if needed.
1389 Reference image (ref) is read at correct position and the predicted
1390 part is written to macroblock array (mb)
1391
1392------------------------------------------------------------------------------*/
1393
1394void h264bsdInterpolateMidVerQuarter(
1395 u8 *ref,
1396 u8 *mb,
1397 i32 x0,
1398 i32 y0,
1399 u32 width,
1400 u32 height,
1401 u32 partWidth,
1402 u32 partHeight,
1403 u32 verOffset) /* 0 for pixel f, 1 for pixel q */
1404{
1405 u32 p1[21*21/4+1];
1406 u32 x, y;
1407 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1408 i32 *ptrC, *ptrV, *ptrInt, *b1;
1409 u8 *ptrJ;
1410 i32 table[21*16];
1411 const u8 *clp = h264bsdClip + 512;
1412
1413 /* Code */
1414
1415 ASSERT(ref);
1416 ASSERT(mb);
1417
1418 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1419 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1420 {
1421 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1422 partWidth+5, partHeight+5, partWidth+5);
1423
1424 x0 = 0;
1425 y0 = 0;
1426 ref = (u8*)p1;
1427 width = partWidth+5;
1428 }
1429
1430 ref += (u32)y0 * width + (u32)x0;
1431
1432 b1 = table;
1433 ptrJ = ref + 5;
1434
1435 /* First step: calculate intermediate values for
1436 * horizontal interpolation */
1437 for (y = partHeight + 5; y; y--)
1438 {
1439 tmp6 = *(ptrJ - 5);
1440 tmp5 = *(ptrJ - 4);
1441 tmp4 = *(ptrJ - 3);
1442 tmp3 = *(ptrJ - 2);
1443 tmp2 = *(ptrJ - 1);
1444 for (x = (partWidth >> 2); x; x--)
1445 {
1446 /* First pixel */
1447 tmp7 = tmp3 + tmp4;
1448 tmp6 += (tmp7 << 4);
1449 tmp6 += (tmp7 << 2);
1450 tmp7 = tmp2 + tmp5;
1451 tmp1 = *ptrJ++;
1452 tmp6 -= (tmp7 << 2);
1453 tmp6 -= tmp7;
1454 tmp6 += tmp1;
1455 *b1++ = tmp6;
1456 /* Second pixel */
1457 tmp7 = tmp2 + tmp3;
1458 tmp5 += (tmp7 << 4);
1459 tmp5 += (tmp7 << 2);
1460 tmp7 = tmp1 + tmp4;
1461 tmp6 = *ptrJ++;
1462 tmp5 -= (tmp7 << 2);
1463 tmp5 -= tmp7;
1464 tmp5 += tmp6;
1465 *b1++ = tmp5;
1466 /* Third pixel */
1467 tmp7 = tmp1 + tmp2;
1468 tmp4 += (tmp7 << 4);
1469 tmp4 += (tmp7 << 2);
1470 tmp7 = tmp6 + tmp3;
1471 tmp5 = *ptrJ++;
1472 tmp4 -= (tmp7 << 2);
1473 tmp4 -= tmp7;
1474 tmp4 += tmp5;
1475 *b1++ = tmp4;
1476 /* Fourth pixel */
1477 tmp7 = tmp6 + tmp1;
1478 tmp3 += (tmp7 << 4);
1479 tmp3 += (tmp7 << 2);
1480 tmp7 = tmp5 + tmp2;
1481 tmp4 = *ptrJ++;
1482 tmp3 -= (tmp7 << 2);
1483 tmp3 -= tmp7;
1484 tmp3 += tmp4;
1485 *b1++ = tmp3;
1486 tmp7 = tmp4;
1487 tmp4 = tmp6;
1488 tmp6 = tmp2;
1489 tmp2 = tmp7;
1490 tmp3 = tmp5;
1491 tmp5 = tmp1;
1492 }
1493 ptrJ += width - partWidth;
1494 }
1495
1496 /* Second step: calculate vertical interpolation and average */
1497 ptrC = table + partWidth;
1498 ptrV = ptrC + 5*partWidth;
1499 /* Pointer to integer sample position, either M or R */
1500 ptrInt = ptrC + (2+verOffset)*partWidth;
1501 for (y = (partHeight >> 2); y; y--)
1502 {
1503 for (x = partWidth; x; x--)
1504 {
1505 tmp4 = ptrV[-(i32)partWidth*2];
1506 tmp5 = ptrV[-(i32)partWidth];
1507 tmp1 = ptrV[partWidth];
1508 tmp2 = ptrV[partWidth*2];
1509 tmp6 = *ptrV++;
1510
1511 tmp7 = tmp4 + tmp1;
1512 tmp2 -= (tmp7 << 2);
1513 tmp2 -= tmp7;
1514 tmp2 += 512;
1515 tmp7 = tmp5 + tmp6;
1516 tmp3 = ptrC[partWidth*2];
1517 tmp2 += (tmp7 << 4);
1518 tmp2 += (tmp7 << 2);
1519 tmp7 = ptrInt[partWidth*2];
1520 tmp2 += tmp3;
1521 tmp2 = clp[tmp2>>10];
1522 tmp7 += 16;
1523 tmp7 = clp[tmp7>>5];
1524 tmp1 += 512;
1525 tmp2++;
1526 mb[48] = (u8)((tmp7 + tmp2) >> 1);
1527
1528 tmp7 = tmp3 + tmp6;
1529 tmp1 -= (tmp7 << 2);
1530 tmp1 -= tmp7;
1531 tmp7 = tmp4 + tmp5;
1532 tmp2 = ptrC[partWidth];
1533 tmp1 += (tmp7 << 4);
1534 tmp1 += (tmp7 << 2);
1535 tmp7 = ptrInt[partWidth];
1536 tmp1 += tmp2;
1537 tmp1 = clp[tmp1>>10];
1538 tmp7 += 16;
1539 tmp7 = clp[tmp7>>5];
1540 tmp6 += 512;
1541 tmp1++;
1542 mb[32] = (u8)((tmp7 + tmp1) >> 1);
1543
1544 tmp1 = *ptrC;
1545 tmp7 = tmp2 + tmp5;
1546 tmp6 -= (tmp7 << 2);
1547 tmp6 -= tmp7;
1548 tmp7 = tmp4 + tmp3;
1549 tmp6 += (tmp7 << 4);
1550 tmp6 += (tmp7 << 2);
1551 tmp7 = *ptrInt;
1552 tmp6 += tmp1;
1553 tmp6 = clp[tmp6>>10];
1554 tmp7 += 16;
1555 tmp7 = clp[tmp7>>5];
1556 tmp5 += 512;
1557 tmp6++;
1558 mb[16] = (u8)((tmp7 + tmp6) >> 1);
1559
1560 tmp6 = ptrC[-(i32)partWidth];
1561 tmp1 += tmp4;
1562 tmp5 -= (tmp1 << 2);
1563 tmp5 -= tmp1;
1564 tmp3 += tmp2;
1565 tmp5 += (tmp3 << 4);
1566 tmp5 += (tmp3 << 2);
1567 tmp7 = ptrInt[-(i32)partWidth];
1568 tmp5 += tmp6;
1569 tmp5 = clp[tmp5>>10];
1570 tmp7 += 16;
1571 tmp7 = clp[tmp7>>5];
1572 tmp5++;
1573 *mb++ = (u8)((tmp7 + tmp5) >> 1);
1574 ptrC++;
1575 ptrInt++;
1576 }
1577 mb += 4*16 - partWidth;
1578 ptrC += 3*partWidth;
1579 ptrV += 3*partWidth;
1580 ptrInt += 3*partWidth;
1581 }
1582
1583}
1584
1585
1586/*------------------------------------------------------------------------------
1587
1588 Function: h264bsdInterpolateMidHorQuarter
1589
1590 Functional description:
1591 Function to perform horizontal and vertical interpolation of pixel
1592 position 'i' or 'k' for a block. Overfilling is done only if needed.
1593 Reference image (ref) is read at correct position and the predicted
1594 part is written to macroblock array (mb)
1595
1596------------------------------------------------------------------------------*/
1597
1598void h264bsdInterpolateMidHorQuarter(
1599 u8 *ref,
1600 u8 *mb,
1601 i32 x0,
1602 i32 y0,
1603 u32 width,
1604 u32 height,
1605 u32 partWidth,
1606 u32 partHeight,
1607 u32 horOffset) /* 0 for pixel i, 1 for pixel k */
1608{
1609 u32 p1[21*21/4+1];
1610 u32 x, y;
1611 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1612 i32 *ptrJ, *ptrInt, *h1;
1613 u8 *ptrC, *ptrV;
1614 i32 table[21*16];
1615 i32 tableWidth = (i32)partWidth+5;
1616 const u8 *clp = h264bsdClip + 512;
1617
1618 /* Code */
1619
1620 ASSERT(ref);
1621 ASSERT(mb);
1622
1623 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
1624 (y0 < 0) || ((u32)y0+partHeight+5 > height))
1625 {
1626 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
1627 partWidth+5, partHeight+5, partWidth+5);
1628
1629 x0 = 0;
1630 y0 = 0;
1631 ref = (u8*)p1;
1632 width = partWidth+5;
1633 }
1634
1635 ref += (u32)y0 * width + (u32)x0;
1636
1637 h1 = table + tableWidth;
1638 ptrC = ref + width;
1639 ptrV = ptrC + 5*width;
1640
1641 /* First step: calculate intermediate values for
1642 * vertical interpolation */
1643 for (y = (partHeight >> 2); y; y--)
1644 {
1645 for (x = (u32)tableWidth; x; x--)
1646 {
1647 tmp4 = ptrV[-(i32)width*2];
1648 tmp5 = ptrV[-(i32)width];
1649 tmp1 = ptrV[width];
1650 tmp2 = ptrV[width*2];
1651 tmp6 = *ptrV++;
1652
1653 tmp7 = tmp4 + tmp1;
1654 tmp2 -= (tmp7 << 2);
1655 tmp2 -= tmp7;
1656 tmp7 = tmp5 + tmp6;
1657 tmp3 = ptrC[width*2];
1658 tmp2 += (tmp7 << 4);
1659 tmp2 += (tmp7 << 2);
1660 tmp2 += tmp3;
1661 h1[tableWidth*2] = tmp2;
1662
1663 tmp7 = tmp3 + tmp6;
1664 tmp1 -= (tmp7 << 2);
1665 tmp1 -= tmp7;
1666 tmp7 = tmp4 + tmp5;
1667 tmp2 = ptrC[width];
1668 tmp1 += (tmp7 << 4);
1669 tmp1 += (tmp7 << 2);
1670 tmp1 += tmp2;
1671 h1[tableWidth] = tmp1;
1672
1673 tmp1 = *ptrC;
1674 tmp7 = tmp2 + tmp5;
1675 tmp6 -= (tmp7 << 2);
1676 tmp6 -= tmp7;
1677 tmp7 = tmp4 + tmp3;
1678 tmp6 += (tmp7 << 4);
1679 tmp6 += (tmp7 << 2);
1680 tmp6 += tmp1;
1681 *h1 = tmp6;
1682
1683 tmp6 = ptrC[-(i32)width];
1684 tmp1 += tmp4;
1685 tmp5 -= (tmp1 << 2);
1686 tmp5 -= tmp1;
1687 tmp3 += tmp2;
1688 tmp5 += (tmp3 << 4);
1689 tmp5 += (tmp3 << 2);
1690 tmp5 += tmp6;
1691 h1[-tableWidth] = tmp5;
1692 h1++;
1693 ptrC++;
1694 }
1695 ptrC += 4*width - partWidth - 5;
1696 ptrV += 4*width - partWidth - 5;
1697 h1 += 3*tableWidth;
1698 }
1699
1700 /* Second step: calculate horizontal interpolation and average */
1701 ptrJ = table + 5;
1702 /* Pointer to integer sample position, either G or H */
1703 ptrInt = table + 2 + horOffset;
1704 for (y = partHeight; y; y--)
1705 {
1706 tmp6 = *(ptrJ - 5);
1707 tmp5 = *(ptrJ - 4);
1708 tmp4 = *(ptrJ - 3);
1709 tmp3 = *(ptrJ - 2);
1710 tmp2 = *(ptrJ - 1);
1711 for (x = (partWidth>>2); x; x--)
1712 {
1713 /* First pixel */
1714 tmp6 += 512;
1715 tmp7 = tmp3 + tmp4;
1716 tmp6 += (tmp7 << 4);
1717 tmp6 += (tmp7 << 2);
1718 tmp7 = tmp2 + tmp5;
1719 tmp1 = *ptrJ++;
1720 tmp6 -= (tmp7 << 2);
1721 tmp6 -= tmp7;
1722 tmp7 = *ptrInt++;
1723 tmp6 += tmp1;
1724 tmp6 = clp[tmp6 >> 10];
1725 tmp7 += 16;
1726 tmp7 = clp[tmp7 >> 5];
1727 tmp5 += 512;
1728 tmp6++;
1729 *mb++ = (u8)((tmp6 + tmp7) >> 1);
1730 /* Second pixel */
1731 tmp7 = tmp2 + tmp3;
1732 tmp5 += (tmp7 << 4);
1733 tmp5 += (tmp7 << 2);
1734 tmp7 = tmp1 + tmp4;
1735 tmp6 = *ptrJ++;
1736 tmp5 -= (tmp7 << 2);
1737 tmp5 -= tmp7;
1738 tmp7 = *ptrInt++;
1739 tmp5 += tmp6;
1740 tmp5 = clp[tmp5 >> 10];
1741 tmp7 += 16;
1742 tmp7 = clp[tmp7 >> 5];
1743 tmp4 += 512;
1744 tmp5++;
1745 *mb++ = (u8)((tmp5 + tmp7) >> 1);
1746 /* Third pixel */
1747 tmp7 = tmp1 + tmp2;
1748 tmp4 += (tmp7 << 4);
1749 tmp4 += (tmp7 << 2);
1750 tmp7 = tmp6 + tmp3;
1751 tmp5 = *ptrJ++;
1752 tmp4 -= (tmp7 << 2);
1753 tmp4 -= tmp7;
1754 tmp7 = *ptrInt++;
1755 tmp4 += tmp5;
1756 tmp4 = clp[tmp4 >> 10];
1757 tmp7 += 16;
1758 tmp7 = clp[tmp7 >> 5];
1759 tmp3 += 512;
1760 tmp4++;
1761 *mb++ = (u8)((tmp4 + tmp7) >> 1);
1762 /* Fourth pixel */
1763 tmp7 = tmp6 + tmp1;
1764 tmp3 += (tmp7 << 4);
1765 tmp3 += (tmp7 << 2);
1766 tmp7 = tmp5 + tmp2;
1767 tmp4 = *ptrJ++;
1768 tmp3 -= (tmp7 << 2);
1769 tmp3 -= tmp7;
1770 tmp7 = *ptrInt++;
1771 tmp3 += tmp4;
1772 tmp3 = clp[tmp3 >> 10];
1773 tmp7 += 16;
1774 tmp7 = clp[tmp7 >> 5];
1775 tmp3++;
1776 *mb++ = (u8)((tmp3 + tmp7) >> 1);
1777 tmp3 = tmp5;
1778 tmp5 = tmp1;
1779 tmp7 = tmp4;
1780 tmp4 = tmp6;
1781 tmp6 = tmp2;
1782 tmp2 = tmp7;
1783 }
1784 ptrJ += 5;
1785 ptrInt += 5;
1786 mb += 16 - partWidth;
1787 }
1788
1789}
1790
1791
1792/*------------------------------------------------------------------------------
1793
1794 Function: h264bsdPredictSamples
1795
1796 Functional description:
1797 This function reconstructs a prediction for a macroblock partition.
1798 The prediction is either copied or interpolated using the reference
1799 frame and the motion vector. Both luminance and chrominance parts are
1800 predicted. The prediction is stored in given macroblock array (data).
1801 Inputs:
1802 data pointer to macroblock array (384 bytes) for output
1803 mv pointer to motion vector used for prediction
1804 refPic pointer to reference picture structure
1805 xA x-coordinate for current macroblock
1806 yA y-coordinate for current macroblock
1807 partX x-offset for partition in macroblock
1808 partY y-offset for partition in macroblock
1809 partWidth width of partition
1810 partHeight height of partition
1811 Outputs:
1812 data macroblock array (16x16+8x8+8x8) where predicted
1813 partition is stored at correct position
1814
1815------------------------------------------------------------------------------*/
1816
1817void h264bsdPredictSamples(
1818 u8 *data,
1819 mv_t *mv,
1820 image_t *refPic,
1821 u32 xA,
1822 u32 yA,
1823 u32 partX,
1824 u32 partY,
1825 u32 partWidth,
1826 u32 partHeight)
1827
1828{
1829
1830/* Variables */
1831
1832 u32 xFrac, yFrac, width, height;
1833 i32 xInt, yInt;
1834 u8 *lumaPartData;
1835
1836/* Code */
1837
1838 ASSERT(data);
1839 ASSERT(mv);
1840 ASSERT(partWidth);
1841 ASSERT(partHeight);
1842 ASSERT(refPic);
1843 ASSERT(refPic->data);
1844 ASSERT(refPic->width);
1845 ASSERT(refPic->height);
1846
1847 /* luma */
1848 lumaPartData = data + 16*partY + partX;
1849
1850 xFrac = mv->hor & 0x3;
1851 yFrac = mv->ver & 0x3;
1852
1853 width = 16 * refPic->width;
1854 height = 16 * refPic->height;
1855
1856 xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
1857 yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
1858
1859 ASSERT(lumaFracPos[xFrac][yFrac] < 16);
1860
1861 switch (lumaFracPos[xFrac][yFrac])
1862 {
1863 case 0: /* G */
1864 h264bsdFillBlock(refPic->data, lumaPartData,
1865 xInt,yInt,width,height,partWidth,partHeight,16);
1866 break;
1867 case 1: /* d */
1868 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
1869 xInt, yInt-2, width, height, partWidth, partHeight, 0);
1870 break;
1871 case 2: /* h */
1872 h264bsdInterpolateVerHalf(refPic->data, lumaPartData,
1873 xInt, yInt-2, width, height, partWidth, partHeight);
1874 break;
1875 case 3: /* n */
1876 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
1877 xInt, yInt-2, width, height, partWidth, partHeight, 1);
1878 break;
1879 case 4: /* a */
1880 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
1881 xInt-2, yInt, width, height, partWidth, partHeight, 0);
1882 break;
1883 case 5: /* e */
1884 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1885 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1886 break;
1887 case 6: /* i */
1888 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
1889 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1890 break;
1891 case 7: /* p */
1892 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1893 xInt-2, yInt-2, width, height, partWidth, partHeight, 2);
1894 break;
1895 case 8: /* b */
1896 h264bsdInterpolateHorHalf(refPic->data, lumaPartData,
1897 xInt-2, yInt, width, height, partWidth, partHeight);
1898 break;
1899 case 9: /* f */
1900 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
1901 xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
1902 break;
1903 case 10: /* j */
1904 h264bsdInterpolateMidHalf(refPic->data, lumaPartData,
1905 xInt-2, yInt-2, width, height, partWidth, partHeight);
1906 break;
1907 case 11: /* q */
1908 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
1909 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1910 break;
1911 case 12: /* c */
1912 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
1913 xInt-2, yInt, width, height, partWidth, partHeight, 1);
1914 break;
1915 case 13: /* g */
1916 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1917 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1918 break;
1919 case 14: /* k */
1920 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
1921 xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
1922 break;
1923 default: /* case 15, r */
1924 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
1925 xInt-2, yInt-2, width, height, partWidth, partHeight, 3);
1926 break;
1927 }
1928
1929 /* chroma */
1930 PredictChroma(
1931 data + 16*16 + (partY>>1)*8 + (partX>>1),
1932 xA + partX,
1933 yA + partY,
1934 partWidth,
1935 partHeight,
1936 mv,
1937 refPic);
1938
1939}
1940
1941#else /* H264DEC_OMXDL */
1942/*------------------------------------------------------------------------------
1943
1944 Function: h264bsdPredictSamples
1945
1946 Functional description:
1947 This function reconstructs a prediction for a macroblock partition.
1948 The prediction is either copied or interpolated using the reference
1949 frame and the motion vector. Both luminance and chrominance parts are
1950 predicted. The prediction is stored in given macroblock array (data).
1951 Inputs:
1952 data pointer to macroblock array (384 bytes) for output
1953 mv pointer to motion vector used for prediction
1954 refPic pointer to reference picture structure
1955 xA x-coordinate for current macroblock
1956 yA y-coordinate for current macroblock
1957 partX x-offset for partition in macroblock
1958 partY y-offset for partition in macroblock
1959 partWidth width of partition
1960 partHeight height of partition
1961 Outputs:
1962 data macroblock array (16x16+8x8+8x8) where predicted
1963 partition is stored at correct position
1964
1965------------------------------------------------------------------------------*/
1966
1967/*lint -e{550} Symbol 'res' not accessed */
1968void h264bsdPredictSamples(
1969 u8 *data,
1970 mv_t *mv,
1971 image_t *refPic,
1972 u32 colAndRow,
1973 u32 part,
1974 u8 *pFill)
1975
1976{
1977
1978/* Variables */
1979
1980 u32 xFrac, yFrac;
1981 u32 width, height;
1982 i32 xInt, yInt, x0, y0;
1983 u8 *partData, *ref;
1984 OMXSize roi;
1985 u32 fillWidth;
1986 u32 fillHeight;
1987 OMXResult res;
1988 u32 xA, yA;
1989 u32 partX, partY;
1990 u32 partWidth, partHeight;
1991
1992/* Code */
1993
1994 ASSERT(data);
1995 ASSERT(mv);
1996 ASSERT(refPic);
1997 ASSERT(refPic->data);
1998 ASSERT(refPic->width);
1999 ASSERT(refPic->height);
2000
2001 xA = (colAndRow & 0xFFFF0000) >> 16;
2002 yA = (colAndRow & 0x0000FFFF);
2003
2004 partX = (part & 0xFF000000) >> 24;
2005 partY = (part & 0x00FF0000) >> 16;
2006 partWidth = (part & 0x0000FF00) >> 8;
2007 partHeight = (part & 0x000000FF);
2008
2009 ASSERT(partWidth);
2010 ASSERT(partHeight);
2011
2012 /* luma */
2013 partData = data + 16*partY + partX;
2014
2015 xFrac = mv->hor & 0x3;
2016 yFrac = mv->ver & 0x3;
2017
2018 width = 16 * refPic->width;
2019 height = 16 * refPic->height;
2020
2021 xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
2022 yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
2023
2024 x0 = (xFrac) ? xInt-2 : xInt;
2025 y0 = (yFrac) ? yInt-2 : yInt;
2026
2027 if (xFrac)
2028 {
2029 if (partWidth == 16)
2030 fillWidth = 32;
2031 else
2032 fillWidth = 16;
2033 }
2034 else
2035 fillWidth = (partWidth*2);
2036 if (yFrac)
2037 fillHeight = partHeight+5;
2038 else
2039 fillHeight = partHeight;
2040
2041
2042 if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
2043 (y0 < 0) || ((u32)y0+fillHeight > height))
2044 {
2045 h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height,
2046 fillWidth, fillHeight, fillWidth);
2047
2048 x0 = 0;
2049 y0 = 0;
2050 ref = pFill;
2051 width = fillWidth;
2052 if (yFrac)
2053 ref += 2*width;
2054 if (xFrac)
2055 ref += 2;
2056 }
2057 else
2058 {
2059 /*lint --e(737) Loss of sign */
2060 ref = refPic->data + yInt*width + xInt;
2061 }
2062 /* Luma interpolation */
2063 roi.width = (i32)partWidth;
2064 roi.height = (i32)partHeight;
2065
2066 res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16,
2067 (i32)xFrac, (i32)yFrac, roi);
2068 ASSERT(res == 0);
2069
2070 /* Chroma */
2071 width = 8 * refPic->width;
2072 height = 8 * refPic->height;
2073
2074 x0 = ((xA + partX) >> 1) + (mv->hor >> 3);
2075 y0 = ((yA + partY) >> 1) + (mv->ver >> 3);
2076 xFrac = mv->hor & 0x7;
2077 yFrac = mv->ver & 0x7;
2078
2079 ref = refPic->data + 256 * refPic->width * refPic->height;
2080
2081 roi.width = (i32)(partWidth >> 1);
2082 fillWidth = ((partWidth >> 1) + 8) & ~0x7;
2083 roi.height = (i32)(partHeight >> 1);
2084 fillHeight = (partHeight >> 1) + 1;
2085
2086 if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
2087 (y0 < 0) || ((u32)y0+fillHeight > height))
2088 {
2089 h264bsdFillBlock(ref, pFill, x0, y0, width, height,
2090 fillWidth, fillHeight, fillWidth);
2091 ref += width * height;
2092 h264bsdFillBlock(ref, pFill + fillWidth*fillHeight,
2093 x0, y0, width, height, fillWidth,
2094 fillHeight, fillWidth);
2095
2096 ref = pFill;
2097 x0 = 0;
2098 y0 = 0;
2099 width = fillWidth;
2100 height = fillHeight;
2101 }
2102
2103 partData = data + 16*16 + (partY>>1)*8 + (partX>>1);
2104
2105 /* Chroma interpolation */
2106 /*lint --e(737) Loss of sign */
2107 ref += y0 * width + x0;
2108 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
2109 (u32)roi.width, (u32)roi.height, xFrac, yFrac);
2110 ASSERT(res == 0);
2111 partData += 8 * 8;
2112 ref += height * width;
2113 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
2114 (u32)roi.width, (u32)roi.height, xFrac, yFrac);
2115 ASSERT(res == 0);
2116
2117}
2118
2119#endif /* H264DEC_OMXDL */
2120
2121
2122/*------------------------------------------------------------------------------
2123
2124 Function: FillRow1
2125
2126 Functional description:
2127 This function gets a row of reference pels in a 'normal' case when no
2128 overfilling is necessary.
2129
2130------------------------------------------------------------------------------*/
2131
2132static void FillRow1(
2133 u8 *ref,
2134 u8 *fill,
2135 i32 left,
2136 i32 center,
2137 i32 right)
2138{
2139
2140 ASSERT(ref);
2141 ASSERT(fill);
2142
2143 H264SwDecMemcpy(fill, ref, (u32)center);
2144
2145 /*lint -e(715) */
2146}
2147
2148
2149/*------------------------------------------------------------------------------
2150
2151 Function: h264bsdFillRow7
2152
2153 Functional description:
2154 This function gets a row of reference pels when horizontal coordinate
2155 is partly negative or partly greater than reference picture width
2156 (overfilling some pels on left and/or right edge).
2157 Inputs:
2158 ref pointer to reference samples
2159 left amount of pixels to overfill on left-edge
2160 center amount of pixels to copy
2161 right amount of pixels to overfill on right-edge
2162 Outputs:
2163 fill pointer where samples are stored
2164
2165------------------------------------------------------------------------------*/
2166#ifndef H264DEC_NEON
2167void h264bsdFillRow7(
2168 u8 *ref,
2169 u8 *fill,
2170 i32 left,
2171 i32 center,
2172 i32 right)
2173{
2174 u8 tmp;
2175
2176 ASSERT(ref);
2177 ASSERT(fill);
2178
2179 if (left)
2180 tmp = *ref;
2181
2182 for ( ; left; left--)
2183 /*lint -esym(644,tmp) tmp is initialized if used */
2184 *fill++ = tmp;
2185
2186 for ( ; center; center--)
2187 *fill++ = *ref++;
2188
2189 if (right)
2190 tmp = ref[-1];
2191
2192 for ( ; right; right--)
2193 /*lint -esym(644,tmp) tmp is initialized if used */
2194 *fill++ = tmp;
2195}
2196#endif
2197/*------------------------------------------------------------------------------
2198
2199 Function: h264bsdFillBlock
2200
2201 Functional description:
2202 This function gets a block of reference pels. It determines whether
2203 overfilling is needed or not and repeatedly calls an appropriate
2204 function (by using a function pointer) that fills one row the block.
2205 Inputs:
2206 ref pointer to reference frame
2207 x0 x-coordinate for block
2208 y0 y-coordinate for block
2209 width width of reference frame
2210 height height of reference frame
2211 blockWidth width of block
2212 blockHeight height of block
2213 fillScanLength length of a line in output array (pixels)
2214 Outputs:
2215 fill pointer to array where output block is written
2216
2217------------------------------------------------------------------------------*/
2218
2219void h264bsdFillBlock(
2220 u8 *ref,
2221 u8 *fill,
2222 i32 x0,
2223 i32 y0,
2224 u32 width,
2225 u32 height,
2226 u32 blockWidth,
2227 u32 blockHeight,
2228 u32 fillScanLength)
2229
2230{
2231
2232/* Variables */
2233
2234 i32 xstop, ystop;
2235 void (*fp)(u8*, u8*, i32, i32, i32);
2236 i32 left, x, right;
2237 i32 top, y, bottom;
2238
2239/* Code */
2240
2241 ASSERT(ref);
2242 ASSERT(fill);
2243 ASSERT(width);
2244 ASSERT(height);
2245 ASSERT(fill);
2246 ASSERT(blockWidth);
2247 ASSERT(blockHeight);
2248
2249 xstop = x0 + (i32)blockWidth;
2250 ystop = y0 + (i32)blockHeight;
2251
2252 /* Choose correct function whether overfilling on left-edge or right-edge
2253 * is needed or not */
2254 if (x0 >= 0 && xstop <= (i32)width)
2255 fp = FillRow1;
2256 else
2257 fp = h264bsdFillRow7;
2258
2259 if (ystop < 0)
2260 y0 = -(i32)blockHeight;
2261
2262 if (xstop < 0)
2263 x0 = -(i32)blockWidth;
2264
2265 if (y0 > (i32)height)
2266 y0 = (i32)height;
2267
2268 if (x0 > (i32)width)
2269 x0 = (i32)width;
2270
2271 xstop = x0 + (i32)blockWidth;
2272 ystop = y0 + (i32)blockHeight;
2273
2274 if (x0 > 0)
2275 ref += x0;
2276
2277 if (y0 > 0)
2278 ref += y0 * (i32)width;
2279
2280 left = x0 < 0 ? -x0 : 0;
2281 right = xstop > (i32)width ? xstop - (i32)width : 0;
2282 x = (i32)blockWidth - left - right;
2283
2284 top = y0 < 0 ? -y0 : 0;
2285 bottom = ystop > (i32)height ? ystop - (i32)height : 0;
2286 y = (i32)blockHeight - top - bottom;
2287
2288 /* Top-overfilling */
2289 for ( ; top; top-- )
2290 {
2291 (*fp)(ref, fill, left, x, right);
2292 fill += fillScanLength;
2293 }
2294
2295 /* Lines inside reference image */
2296 for ( ; y; y-- )
2297 {
2298 (*fp)(ref, fill, left, x, right);
2299 ref += width;
2300 fill += fillScanLength;
2301 }
2302
2303 ref -= width;
2304
2305 /* Bottom-overfilling */
2306 for ( ; bottom; bottom-- )
2307 {
2308 (*fp)(ref, fill, left, x, right);
2309 fill += fillScanLength;
2310 }
2311}
2312
2313/*lint +e701 +e702 */
2314
2315