PageRenderTime 66ms CodeModel.GetById 23ms app.highlight 37ms RepoModel.GetById 2ms app.codeStats 0ms

/libavcodec/x86/h264dsp_mmx.c

http://github.com/FFmpeg/FFmpeg
C | 514 lines | 438 code | 49 blank | 27 comment | 40 complexity | ebf147f8f5e99f0bb41536aa2deb0998 MD5 | raw file
  1/*
  2 * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
  3 *
  4 * This file is part of FFmpeg.
  5 *
  6 * FFmpeg is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU Lesser General Public
  8 * License as published by the Free Software Foundation; either
  9 * version 2.1 of the License, or (at your option) any later version.
 10 *
 11 * FFmpeg is distributed in the hope that it will be useful,
 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 * Lesser General Public License for more details.
 15 *
 16 * You should have received a copy of the GNU Lesser General Public
 17 * License along with FFmpeg; if not, write to the Free Software
 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 19 */
 20
 21#include "libavutil/cpu.h"
 22#include "libavutil/x86_cpu.h"
 23#include "libavcodec/h264dsp.h"
 24#include "dsputil_mmx.h"
 25
 26DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
 27
 28/***********************************/
 29/* IDCT */
 30#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
 31void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
 32
 33IDCT_ADD_FUNC(, 8, mmx)
 34IDCT_ADD_FUNC(, 10, sse2)
 35IDCT_ADD_FUNC(_dc, 8, mmx2)
 36IDCT_ADD_FUNC(_dc, 10, mmx2)
 37IDCT_ADD_FUNC(8_dc, 8, mmx2)
 38IDCT_ADD_FUNC(8_dc, 10, sse2)
 39IDCT_ADD_FUNC(8, 8, mmx)
 40IDCT_ADD_FUNC(8, 8, sse2)
 41IDCT_ADD_FUNC(8, 10, sse2)
 42#if HAVE_AVX
 43IDCT_ADD_FUNC(, 10, avx)
 44IDCT_ADD_FUNC(8_dc, 10, avx)
 45IDCT_ADD_FUNC(8, 10, avx)
 46#endif
 47
 48
 49#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \
 50void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
 51                              (uint8_t *dst, const int *block_offset, \
 52                              DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 53
 54IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
 55IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
 56IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
 57IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
 58IDCT_ADD_REP_FUNC(8, 4, 10, avx)
 59IDCT_ADD_REP_FUNC(, 16, 8, mmx)
 60IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
 61IDCT_ADD_REP_FUNC(, 16, 8, sse2)
 62IDCT_ADD_REP_FUNC(, 16, 10, sse2)
 63IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
 64IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
 65IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
 66IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
 67#if HAVE_AVX
 68IDCT_ADD_REP_FUNC(, 16, 10, avx)
 69IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
 70#endif
 71
 72
 73#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \
 74void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
 75                              (uint8_t **dst, const int *block_offset, \
 76                              DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 77IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
 78IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
 79IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
 80IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
 81#if HAVE_AVX
 82IDCT_ADD_REP_FUNC2(, 8, 10, avx)
 83#endif
 84
 85void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul);
 86void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
 87
 88/***********************************/
 89/* deblocking */
 90
 91#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
 92    do { \
 93        x86_reg b_idx; \
 94        mask_mv <<= 3; \
 95        for( b_idx=0; b_idx<edges; b_idx+=step ) { \
 96            if (!mask_dir) \
 97            __asm__ volatile( \
 98                    "pxor %%mm0, %%mm0 \n\t" \
 99                    :: \
100            ); \
101            if(!(mask_mv & b_idx)) { \
102                if(bidir) { \
103                    __asm__ volatile( \
104                        "movd         %a3(%0,%2), %%mm2 \n" \
105                        "punpckldq    %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
106                        "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
107                        "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
108                        "pshufw $0x4E, %%mm2, %%mm3 \n" \
109                        "psubb         %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
110                        "psubb         %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
111 \
112                        "por           %%mm1, %%mm0 \n" \
113                        "movq   %a5(%1,%2,4), %%mm1 \n" \
114                        "movq   %a6(%1,%2,4), %%mm2 \n" \
115                        "movq          %%mm1, %%mm3 \n" \
116                        "movq          %%mm2, %%mm4 \n" \
117                        "psubw   48(%1,%2,4), %%mm1 \n" \
118                        "psubw   56(%1,%2,4), %%mm2 \n" \
119                        "psubw  208(%1,%2,4), %%mm3 \n" \
120                        "psubw  216(%1,%2,4), %%mm4 \n" \
121                        "packsswb      %%mm2, %%mm1 \n" \
122                        "packsswb      %%mm4, %%mm3 \n" \
123                        "paddb         %%mm6, %%mm1 \n" \
124                        "paddb         %%mm6, %%mm3 \n" \
125                        "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
126                        "psubusb       %%mm5, %%mm3 \n" \
127                        "packsswb      %%mm3, %%mm1 \n" \
128 \
129                        "por           %%mm1, %%mm0 \n" \
130                        "movq   %a7(%1,%2,4), %%mm1 \n" \
131                        "movq   %a8(%1,%2,4), %%mm2 \n" \
132                        "movq          %%mm1, %%mm3 \n" \
133                        "movq          %%mm2, %%mm4 \n" \
134                        "psubw   48(%1,%2,4), %%mm1 \n" \
135                        "psubw   56(%1,%2,4), %%mm2 \n" \
136                        "psubw  208(%1,%2,4), %%mm3 \n" \
137                        "psubw  216(%1,%2,4), %%mm4 \n" \
138                        "packsswb      %%mm2, %%mm1 \n" \
139                        "packsswb      %%mm4, %%mm3 \n" \
140                        "paddb         %%mm6, %%mm1 \n" \
141                        "paddb         %%mm6, %%mm3 \n" \
142                        "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
143                        "psubusb       %%mm5, %%mm3 \n" \
144                        "packsswb      %%mm3, %%mm1 \n" \
145 \
146                        "pshufw $0x4E, %%mm1, %%mm1 \n" \
147                        "por           %%mm1, %%mm0 \n" \
148                        "pshufw $0x4E, %%mm0, %%mm1 \n" \
149                        "pminub        %%mm1, %%mm0 \n" \
150                        ::"r"(ref), \
151                          "r"(mv), \
152                          "r"(b_idx), \
153                          "i"(d_idx+12), \
154                          "i"(d_idx+52), \
155                          "i"(d_idx*4+48), \
156                          "i"(d_idx*4+56), \
157                          "i"(d_idx*4+208), \
158                          "i"(d_idx*4+216) \
159                    ); \
160                } else { \
161                    __asm__ volatile( \
162                        "movd   12(%0,%2), %%mm0 \n" \
163                        "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
164                        "movq   48(%1,%2,4), %%mm1 \n" \
165                        "movq   56(%1,%2,4), %%mm2 \n" \
166                        "psubw %a4(%1,%2,4), %%mm1 \n" \
167                        "psubw %a5(%1,%2,4), %%mm2 \n" \
168                        "packsswb   %%mm2, %%mm1 \n" \
169                        "paddb      %%mm6, %%mm1 \n" \
170                        "psubusb    %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
171                        "packsswb   %%mm1, %%mm1 \n" \
172                        "por        %%mm1, %%mm0 \n" \
173                        ::"r"(ref), \
174                          "r"(mv), \
175                          "r"(b_idx), \
176                          "i"(d_idx+12), \
177                          "i"(d_idx*4+48), \
178                          "i"(d_idx*4+56) \
179                    ); \
180                } \
181            } \
182            __asm__ volatile( \
183                "movd 12(%0,%1), %%mm1 \n" \
184                "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
185                ::"r"(nnz), \
186                  "r"(b_idx), \
187                  "i"(d_idx+12) \
188            ); \
189            __asm__ volatile( \
190                "pminub    %%mm7, %%mm1 \n" \
191                "pminub    %%mm7, %%mm0 \n" \
192                "psllw        $1, %%mm1 \n" \
193                "pxor      %%mm2, %%mm2 \n" \
194                "pmaxub    %%mm0, %%mm1 \n" \
195                "punpcklbw %%mm2, %%mm1 \n" \
196                "movq      %%mm1, %a1(%0,%2) \n" \
197                ::"r"(bS), \
198                  "i"(32*dir), \
199                  "r"(b_idx) \
200                :"memory" \
201            ); \
202        } \
203    } while (0)
204
205static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
206                                            int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
207    __asm__ volatile(
208        "movq %0, %%mm7 \n"
209        "movq %1, %%mm6 \n"
210        ::"m"(ff_pb_1), "m"(ff_pb_3)
211    );
212    if(field)
213        __asm__ volatile(
214            "movq %0, %%mm6 \n"
215            ::"m"(ff_pb_3_1)
216        );
217    __asm__ volatile(
218        "movq  %%mm6, %%mm5 \n"
219        "paddb %%mm5, %%mm5 \n"
220    :);
221
222    // could do a special case for dir==0 && edges==1, but it only reduces the
223    // average filter time by 1.2%
224    step  <<= 3;
225    edges <<= 3;
226    h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8,  0);
227    h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir,    32,    8, mask_mv0, 0, -1, -1);
228
229    __asm__ volatile(
230        "movq   (%0), %%mm0 \n\t"
231        "movq  8(%0), %%mm1 \n\t"
232        "movq 16(%0), %%mm2 \n\t"
233        "movq 24(%0), %%mm3 \n\t"
234        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)
235        "movq %%mm0,   (%0) \n\t"
236        "movq %%mm3,  8(%0) \n\t"
237        "movq %%mm4, 16(%0) \n\t"
238        "movq %%mm2, 24(%0) \n\t"
239        ::"r"(bS[0])
240        :"memory"
241    );
242}
243
244#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
245void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
246                                                                int alpha, int beta, int8_t *tc0);
247#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
248void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
249                                                                int alpha, int beta);
250
251#define LF_FUNCS(type, depth)\
252LF_FUNC (h,  chroma,       depth, mmxext)\
253LF_IFUNC(h,  chroma_intra, depth, mmxext)\
254LF_FUNC (v,  chroma,       depth, mmxext)\
255LF_IFUNC(v,  chroma_intra, depth, mmxext)\
256LF_FUNC (h,  luma,         depth, mmxext)\
257LF_IFUNC(h,  luma_intra,   depth, mmxext)\
258LF_FUNC (h,  luma,         depth, sse2)\
259LF_IFUNC(h,  luma_intra,   depth, sse2)\
260LF_FUNC (v,  luma,         depth, sse2)\
261LF_IFUNC(v,  luma_intra,   depth, sse2)\
262LF_FUNC (h,  chroma,       depth, sse2)\
263LF_IFUNC(h,  chroma_intra, depth, sse2)\
264LF_FUNC (v,  chroma,       depth, sse2)\
265LF_IFUNC(v,  chroma_intra, depth, sse2)\
266LF_FUNC (h,  luma,         depth,  avx)\
267LF_IFUNC(h,  luma_intra,   depth,  avx)\
268LF_FUNC (v,  luma,         depth,  avx)\
269LF_IFUNC(v,  luma_intra,   depth,  avx)\
270LF_FUNC (h,  chroma,       depth,  avx)\
271LF_IFUNC(h,  chroma_intra, depth,  avx)\
272LF_FUNC (v,  chroma,       depth,  avx)\
273LF_IFUNC(v,  chroma_intra, depth,  avx)
274
275LF_FUNCS( uint8_t,  8)
276LF_FUNCS(uint16_t, 10)
277
278#if ARCH_X86_32
279LF_FUNC (v8, luma,             8, mmxext)
280static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
281{
282    if((tc0[0] & tc0[1]) >= 0)
283        ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
284    if((tc0[2] & tc0[3]) >= 0)
285        ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
286}
287LF_IFUNC(v8, luma_intra,        8, mmxext)
288static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
289{
290    ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
291    ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
292}
293#endif /* ARCH_X86_32 */
294
295LF_FUNC (v,  luma,            10, mmxext)
296LF_IFUNC(v,  luma_intra,      10, mmxext)
297
298/***********************************/
299/* weighted prediction */
300
301#define H264_WEIGHT(W, OPT) \
302void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \
303    int stride, int height, int log2_denom, int weight, int offset);
304
305#define H264_BIWEIGHT(W, OPT) \
306void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \
307    uint8_t *src, int stride, int height, int log2_denom, int weightd, \
308    int weights, int offset);
309
310#define H264_BIWEIGHT_MMX(W) \
311H264_WEIGHT  (W, mmx2) \
312H264_BIWEIGHT(W, mmx2)
313
314#define H264_BIWEIGHT_MMX_SSE(W) \
315H264_BIWEIGHT_MMX(W) \
316H264_WEIGHT      (W, sse2) \
317H264_BIWEIGHT    (W, sse2) \
318H264_BIWEIGHT    (W, ssse3)
319
320H264_BIWEIGHT_MMX_SSE(16)
321H264_BIWEIGHT_MMX_SSE( 8)
322H264_BIWEIGHT_MMX    ( 4)
323
324#define H264_WEIGHT_10(W, DEPTH, OPT) \
325void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
326    int stride, int height, int log2_denom, int weight, int offset);
327
328#define H264_BIWEIGHT_10(W, DEPTH, OPT) \
329void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \
330    (uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \
331     int weightd, int weights, int offset);
332
333#define H264_BIWEIGHT_10_SSE(W, DEPTH) \
334H264_WEIGHT_10  (W, DEPTH, sse2) \
335H264_WEIGHT_10  (W, DEPTH, sse4) \
336H264_BIWEIGHT_10(W, DEPTH, sse2) \
337H264_BIWEIGHT_10(W, DEPTH, sse4)
338
339H264_BIWEIGHT_10_SSE(16, 10)
340H264_BIWEIGHT_10_SSE( 8, 10)
341H264_BIWEIGHT_10_SSE( 4, 10)
342
343void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
344{
345    int mm_flags = av_get_cpu_flags();
346
347    if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) {
348        c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
349    }
350
351    if (bit_depth == 8) {
352#if HAVE_YASM
353    if (mm_flags & AV_CPU_FLAG_MMX) {
354        c->h264_idct_dc_add         =
355        c->h264_idct_add            = ff_h264_idct_add_8_mmx;
356        c->h264_idct8_dc_add        =
357        c->h264_idct8_add           = ff_h264_idct8_add_8_mmx;
358
359        c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
360        c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
361        if (chroma_format_idc == 1)
362            c->h264_idct_add8       = ff_h264_idct_add8_8_mmx;
363        c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
364        c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
365
366        if (mm_flags & AV_CPU_FLAG_MMX2) {
367            c->h264_idct_dc_add    = ff_h264_idct_dc_add_8_mmx2;
368            c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
369            c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
370            c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
371            if (chroma_format_idc == 1)
372                c->h264_idct_add8  = ff_h264_idct_add8_8_mmx2;
373            c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
374
375            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
376            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
377            if (chroma_format_idc == 1) {
378                c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
379                c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
380            }
381#if ARCH_X86_32
382            c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
383            c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
384            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
385            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
386#endif
387            c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
388            c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
389            c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2;
390
391            c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2;
392            c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2;
393            c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2;
394
395            if (mm_flags&AV_CPU_FLAG_SSE2) {
396                c->h264_idct8_add           = ff_h264_idct8_add_8_sse2;
397
398                c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
399                c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
400                if (chroma_format_idc == 1)
401                    c->h264_idct_add8       = ff_h264_idct_add8_8_sse2;
402                c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
403                c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
404
405                c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2;
406                c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2;
407
408                c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2;
409                c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2;
410
411#if HAVE_ALIGNED_STACK
412                c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
413                c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
414                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
415                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
416#endif
417            }
418            if (mm_flags&AV_CPU_FLAG_SSSE3) {
419                c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3;
420                c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3;
421            }
422            if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) {
423#if HAVE_ALIGNED_STACK
424                c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
425                c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
426                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
427                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
428#endif
429            }
430        }
431    }
432#endif
433    } else if (bit_depth == 10) {
434#if HAVE_YASM
435    if (mm_flags & AV_CPU_FLAG_MMX) {
436        if (mm_flags & AV_CPU_FLAG_MMX2) {
437#if ARCH_X86_32
438            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext;
439            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext;
440            c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
441            c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
442            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
443            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
444#endif
445            c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2;
446            if (mm_flags&AV_CPU_FLAG_SSE2) {
447                c->h264_idct_add       = ff_h264_idct_add_10_sse2;
448                c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
449
450                c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
451                if (chroma_format_idc == 1)
452                    c->h264_idct_add8  = ff_h264_idct_add8_10_sse2;
453                c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
454#if HAVE_ALIGNED_STACK
455                c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
456                c->h264_idct8_add4     = ff_h264_idct8_add4_10_sse2;
457#endif
458
459                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
460                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
461                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
462
463                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
464                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
465                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
466
467                c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
468                c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
469#if HAVE_ALIGNED_STACK
470                c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
471                c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
472                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
473                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
474#endif
475            }
476            if (mm_flags&AV_CPU_FLAG_SSE4) {
477                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
478                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
479                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
480
481                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
482                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
483                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
484            }
485#if HAVE_AVX
486            if (mm_flags&AV_CPU_FLAG_AVX) {
487                c->h264_idct_dc_add    =
488                c->h264_idct_add       = ff_h264_idct_add_10_avx;
489                c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
490
491                c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
492                if (chroma_format_idc == 1)
493                    c->h264_idct_add8  = ff_h264_idct_add8_10_avx;
494                c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
495#if HAVE_ALIGNED_STACK
496                c->h264_idct8_add      = ff_h264_idct8_add_10_avx;
497                c->h264_idct8_add4     = ff_h264_idct8_add4_10_avx;
498#endif
499
500                c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx;
501                c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx;
502#if HAVE_ALIGNED_STACK
503                c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
504                c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
505                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
506                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
507#endif
508            }
509#endif /* HAVE_AVX */
510        }
511    }
512#endif
513    }
514}