/media/libvpx/vp8/decoder/arm/armv6/dequant_idct_v6.asm
http://github.com/zpao/v8monkey · Assembly · 196 lines · 154 code · 23 blank · 19 comment · 0 complexity · d2ff014a10c7d05e6433e72b32fbc91d MD5 · raw file
- ;
- ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- ;
- ; Use of this source code is governed by a BSD-style license and patent
- ; grant that can be found in the LICENSE file in the root of the source
- ; tree. All contributing project authors may be found in the AUTHORS
- ; file in the root of the source tree.
- ;
- EXPORT |vp8_dequant_idct_add_v6|
- AREA |.text|, CODE, READONLY
- ;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
- ; unsigned char *dest, int pitch, int stride)
- ; r0 = input
- ; r1 = dq
- ; r2 = pred
- ; r3 = dest
- ; sp + 36 = pitch ; +4 = 40
- ; sp + 40 = stride ; +4 = 44
- |vp8_dequant_idct_add_v6| PROC
- stmdb sp!, {r4-r11, lr}
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
- sub sp, sp, #4
- str r3, [sp]
- mov r12, #4
- vp8_dequant_add_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
- strh r6, [r0], #2
- strh r7, [r0], #2
- smulbb r6, r4, r5
- smultt r7, r4, r5
- subs r12, r12, #1
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
- strh r6, [r0], #2
- strh r7, [r0], #2
- bne vp8_dequant_add_loop
- sub r0, r0, #32
- mov r1, r0
- ; short_idct4x4llm_v6_dual
- ldr r3, cospi8sqrt2minus1
- ldr r4, sinpi8sqrt2
- ldr r6, [r0, #8]
- mov r5, #2
- vp8_dequant_idct_loop1_v6
- ldr r12, [r0, #24]
- ldr r14, [r0, #16]
- smulwt r9, r3, r6
- smulwb r7, r3, r6
- smulwt r10, r4, r6
- smulwb r8, r4, r6
- pkhbt r7, r7, r9, lsl #16
- smulwt r11, r3, r12
- pkhbt r8, r8, r10, lsl #16
- uadd16 r6, r6, r7
- smulwt r7, r4, r12
- smulwb r9, r3, r12
- smulwb r10, r4, r12
- subs r5, r5, #1
- pkhbt r9, r9, r11, lsl #16
- ldr r11, [r0], #4
- pkhbt r10, r10, r7, lsl #16
- uadd16 r7, r12, r9
- usub16 r7, r8, r7
- uadd16 r6, r6, r10
- uadd16 r10, r11, r14
- usub16 r8, r11, r14
- uadd16 r9, r10, r6
- usub16 r10, r10, r6
- uadd16 r6, r8, r7
- usub16 r7, r8, r7
- str r6, [r1, #8]
- ldrne r6, [r0, #8]
- str r7, [r1, #16]
- str r10, [r1, #24]
- str r9, [r1], #4
- bne vp8_dequant_idct_loop1_v6
- mov r5, #2
- sub r0, r1, #8
- vp8_dequant_idct_loop2_v6
- ldr r6, [r0], #4
- ldr r7, [r0], #4
- ldr r8, [r0], #4
- ldr r9, [r0], #4
- smulwt r1, r3, r6
- smulwt r12, r4, r6
- smulwt lr, r3, r8
- smulwt r10, r4, r8
- pkhbt r11, r8, r6, lsl #16
- pkhbt r1, lr, r1, lsl #16
- pkhbt r12, r10, r12, lsl #16
- pkhtb r6, r6, r8, asr #16
- uadd16 r6, r1, r6
- pkhbt lr, r9, r7, lsl #16
- uadd16 r10, r11, lr
- usub16 lr, r11, lr
- pkhtb r8, r7, r9, asr #16
- subs r5, r5, #1
- smulwt r1, r3, r8
- smulwb r7, r3, r8
- smulwt r11, r4, r8
- smulwb r9, r4, r8
- pkhbt r1, r7, r1, lsl #16
- uadd16 r8, r1, r8
- pkhbt r11, r9, r11, lsl #16
- usub16 r1, r12, r8
- uadd16 r8, r11, r6
- ldr r9, c0x00040004
- ldr r12, [sp, #40]
- uadd16 r6, r10, r8
- usub16 r7, r10, r8
- uadd16 r7, r7, r9
- uadd16 r6, r6, r9
- uadd16 r10, r14, r1
- usub16 r1, r14, r1
- uadd16 r10, r10, r9
- uadd16 r1, r1, r9
- ldr r11, [r2], r12
- mov r8, r7, asr #3
- pkhtb r9, r8, r10, asr #19
- mov r8, r1, asr #3
- pkhtb r8, r8, r6, asr #19
- uxtb16 lr, r11, ror #8
- qadd16 r9, r9, lr
- uxtb16 lr, r11
- qadd16 r8, r8, lr
- usat16 r9, #8, r9
- usat16 r8, #8, r8
- orr r9, r8, r9, lsl #8
- ldr r11, [r2], r12
- ldr lr, [sp]
- ldr r12, [sp, #44]
- mov r7, r7, lsl #16
- mov r1, r1, lsl #16
- mov r10, r10, lsl #16
- mov r6, r6, lsl #16
- mov r7, r7, asr #3
- pkhtb r7, r7, r10, asr #19
- mov r1, r1, asr #3
- pkhtb r1, r1, r6, asr #19
- uxtb16 r8, r11, ror #8
- qadd16 r7, r7, r8
- uxtb16 r8, r11
- qadd16 r1, r1, r8
- usat16 r7, #8, r7
- usat16 r1, #8, r1
- orr r1, r1, r7, lsl #8
- str r9, [lr], r12
- str r1, [lr], r12
- str lr, [sp]
- bne vp8_dequant_idct_loop2_v6
- ; vpx_memset
- sub r0, r0, #32
- add sp, sp, #4
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
- ldmia sp!, {r4 - r11, pc}
- ENDP ; |vp8_dequant_idct_add_v6|
- ; Constant Pool
- cospi8sqrt2minus1 DCD 0x00004E7B
- sinpi8sqrt2 DCD 0x00008A8C
- c0x00040004 DCD 0x00040004
- END