PageRenderTime 28ms CodeModel.GetById 14ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/libavcodec/arm/ac3dsp_neon.S

http://github.com/FFmpeg/FFmpeg
Assembly | 177 lines | 166 code | 11 blank | 0 comment | 1 complexity | 98d464642c23ca6fa0337f8993263521 MD5 | raw file
  1/*
  2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  3 *
  4 * This file is part of FFmpeg.
  5 *
  6 * FFmpeg is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU Lesser General Public
  8 * License as published by the Free Software Foundation; either
  9 * version 2.1 of the License, or (at your option) any later version.
 10 *
 11 * FFmpeg is distributed in the hope that it will be useful,
 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 * Lesser General Public License for more details.
 15 *
 16 * You should have received a copy of the GNU Lesser General Public
 17 * License along with FFmpeg; if not, write to the Free Software
 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 19 */
 20
 21#include "libavutil/arm/asm.S"
 22
 23function ff_ac3_max_msb_abs_int16_neon, export=1
 24        vmov.i16        q0,  #0
 25        vmov.i16        q2,  #0
 261:      vld1.16         {q1},     [r0,:128]!
 27        vabs.s16        q1,  q1
 28        vld1.16         {q3},     [r0,:128]!
 29        vabs.s16        q3,  q3
 30        vorr            q0,  q0,  q1
 31        vorr            q2,  q2,  q3
 32        subs            r1,  r1,  #16
 33        bgt             1b
 34        vorr            q0,  q0,  q2
 35        vorr            d0,  d0,  d1
 36        vpmax.u16       d0,  d0,  d0
 37        vpmax.u16       d0,  d0,  d0
 38        vmov.u16        r0,  d0[0]
 39        bx              lr
 40endfunc
 41
 42function ff_ac3_exponent_min_neon, export=1
 43        cmp             r1,  #0
 44        it              eq
 45        bxeq            lr
 46        push            {lr}
 47        mov             r12, #256
 481:
 49        vld1.8          {q0},     [r0,:128]
 50        mov             lr,  r1
 51        add             r3,  r0,  #256
 522:      vld1.8          {q1},     [r3,:128], r12
 53        subs            lr,  lr,  #1
 54        vmin.u8         q0,  q0,  q1
 55        bgt             2b
 56        subs            r2,  r2,  #16
 57        vst1.8          {q0},     [r0,:128]!
 58        bgt             1b
 59        pop             {pc}
 60endfunc
 61
 62function ff_ac3_lshift_int16_neon, export=1
 63        vdup.16         q0,  r2
 641:      vld1.16         {q1},     [r0,:128]
 65        vshl.s16        q1,  q1,  q0
 66        vst1.16         {q1},     [r0,:128]!
 67        subs            r1,  r1,  #8
 68        bgt             1b
 69        bx              lr
 70endfunc
 71
 72function ff_ac3_rshift_int32_neon, export=1
 73        rsb             r2,  r2,  #0
 74        vdup.32         q0,  r2
 751:      vld1.32         {q1},     [r0,:128]
 76        vshl.s32        q1,  q1,  q0
 77        vst1.32         {q1},     [r0,:128]!
 78        subs            r1,  r1,  #4
 79        bgt             1b
 80        bx              lr
 81endfunc
 82
 83function ff_float_to_fixed24_neon, export=1
 841:      vld1.32         {q0-q1},  [r1,:128]!
 85        vcvt.s32.f32    q0,  q0,  #24
 86        vld1.32         {q2-q3},  [r1,:128]!
 87        vcvt.s32.f32    q1,  q1,  #24
 88        vcvt.s32.f32    q2,  q2,  #24
 89        vst1.32         {q0-q1},  [r0,:128]!
 90        vcvt.s32.f32    q3,  q3,  #24
 91        vst1.32         {q2-q3},  [r0,:128]!
 92        subs            r2,  r2,  #16
 93        bgt             1b
 94        bx              lr
 95endfunc
 96
 97function ff_ac3_extract_exponents_neon, export=1
 98        vmov.i32        q15, #8
 991:
100        vld1.32         {q0},     [r1,:128]!
101        vabs.s32        q1,  q0
102        vclz.i32        q3,  q1
103        vsub.i32        q3,  q3,  q15
104        vmovn.i32       d6,  q3
105        vmovn.i16       d6,  q3
106        vst1.32         {d6[0]},  [r0,:32]!
107        subs            r2,  r2,  #4
108        bgt             1b
109        bx              lr
110endfunc
111
112function ff_apply_window_int16_neon, export=1
113        push            {r4,lr}
114        add             r4,  r1,  r3,  lsl #1
115        add             lr,  r0,  r3,  lsl #1
116        sub             r4,  r4,  #16
117        sub             lr,  lr,  #16
118        mov             r12, #-16
1191:
120        vld1.16         {q0},     [r1,:128]!
121        vld1.16         {q2},     [r2,:128]!
122        vld1.16         {q1},     [r4,:128], r12
123        vrev64.16       q3,  q2
124        vqrdmulh.s16    q0,  q0,  q2
125        vqrdmulh.s16    d2,  d2,  d7
126        vqrdmulh.s16    d3,  d3,  d6
127        vst1.16         {q0},     [r0,:128]!
128        vst1.16         {q1},     [lr,:128], r12
129        subs            r3,  r3,  #16
130        bgt             1b
131
132        pop             {r4,pc}
133endfunc
134
135function ff_ac3_sum_square_butterfly_int32_neon, export=1
136        vmov.i64        q0,  #0
137        vmov.i64        q1,  #0
138        vmov.i64        q2,  #0
139        vmov.i64        q3,  #0
1401:
141        vld1.32         {d16},    [r1]!
142        vld1.32         {d17},    [r2]!
143        vadd.s32        d18, d16, d17
144        vsub.s32        d19, d16, d17
145        vmlal.s32       q0,  d16, d16
146        vmlal.s32       q1,  d17, d17
147        vmlal.s32       q2,  d18, d18
148        vmlal.s32       q3,  d19, d19
149        subs            r3,  r3,  #2
150        bgt             1b
151        vadd.s64        d0,  d0,  d1
152        vadd.s64        d1,  d2,  d3
153        vadd.s64        d2,  d4,  d5
154        vadd.s64        d3,  d6,  d7
155        vst1.64         {q0-q1},  [r0]
156        bx              lr
157endfunc
158
159function ff_ac3_sum_square_butterfly_float_neon, export=1
160        vmov.f32        q0,  #0.0
161        vmov.f32        q1,  #0.0
1621:
163        vld1.32         {d16},    [r1]!
164        vld1.32         {d17},    [r2]!
165        vadd.f32        d18, d16, d17
166        vsub.f32        d19, d16, d17
167        vmla.f32        d0,  d16, d16
168        vmla.f32        d1,  d17, d17
169        vmla.f32        d2,  d18, d18
170        vmla.f32        d3,  d19, d19
171        subs            r3,  r3,  #2
172        bgt             1b
173        vpadd.f32       d0,  d0,  d1
174        vpadd.f32       d1,  d2,  d3
175        vst1.32         {q0},     [r0]
176        bx              lr
177endfunc