/media/libvpx/vp8/decoder/arm/armv6/dequant_idct_v6.asm

http://github.com/zpao/v8monkey · Assembly · 196 lines · 154 code · 23 blank · 19 comment · 0 complexity · d2ff014a10c7d05e6433e72b32fbc91d MD5 · raw file

  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license and patent
  5. ; grant that can be found in the LICENSE file in the root of the source
  6. ; tree. All contributing project authors may be found in the AUTHORS
  7. ; file in the root of the source tree.
  8. ;
  9. EXPORT |vp8_dequant_idct_add_v6|
  10. AREA |.text|, CODE, READONLY
  11. ;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
  12. ; unsigned char *dest, int pitch, int stride)
  13. ; r0 = input
  14. ; r1 = dq
  15. ; r2 = pred
  16. ; r3 = dest
  17. ; sp + 36 = pitch ; +4 = 40
  18. ; sp + 40 = stride ; +4 = 44
  19. |vp8_dequant_idct_add_v6| PROC
  20. stmdb sp!, {r4-r11, lr}
  21. ldr r4, [r0] ;input
  22. ldr r5, [r1], #4 ;dq
  23. sub sp, sp, #4
  24. str r3, [sp]
  25. mov r12, #4
  26. vp8_dequant_add_loop
  27. smulbb r6, r4, r5
  28. smultt r7, r4, r5
  29. ldr r4, [r0, #4] ;input
  30. ldr r5, [r1], #4 ;dq
  31. strh r6, [r0], #2
  32. strh r7, [r0], #2
  33. smulbb r6, r4, r5
  34. smultt r7, r4, r5
  35. subs r12, r12, #1
  36. ldrne r4, [r0, #4]
  37. ldrne r5, [r1], #4
  38. strh r6, [r0], #2
  39. strh r7, [r0], #2
  40. bne vp8_dequant_add_loop
  41. sub r0, r0, #32
  42. mov r1, r0
  43. ; short_idct4x4llm_v6_dual
  44. ldr r3, cospi8sqrt2minus1
  45. ldr r4, sinpi8sqrt2
  46. ldr r6, [r0, #8]
  47. mov r5, #2
  48. vp8_dequant_idct_loop1_v6
  49. ldr r12, [r0, #24]
  50. ldr r14, [r0, #16]
  51. smulwt r9, r3, r6
  52. smulwb r7, r3, r6
  53. smulwt r10, r4, r6
  54. smulwb r8, r4, r6
  55. pkhbt r7, r7, r9, lsl #16
  56. smulwt r11, r3, r12
  57. pkhbt r8, r8, r10, lsl #16
  58. uadd16 r6, r6, r7
  59. smulwt r7, r4, r12
  60. smulwb r9, r3, r12
  61. smulwb r10, r4, r12
  62. subs r5, r5, #1
  63. pkhbt r9, r9, r11, lsl #16
  64. ldr r11, [r0], #4
  65. pkhbt r10, r10, r7, lsl #16
  66. uadd16 r7, r12, r9
  67. usub16 r7, r8, r7
  68. uadd16 r6, r6, r10
  69. uadd16 r10, r11, r14
  70. usub16 r8, r11, r14
  71. uadd16 r9, r10, r6
  72. usub16 r10, r10, r6
  73. uadd16 r6, r8, r7
  74. usub16 r7, r8, r7
  75. str r6, [r1, #8]
  76. ldrne r6, [r0, #8]
  77. str r7, [r1, #16]
  78. str r10, [r1, #24]
  79. str r9, [r1], #4
  80. bne vp8_dequant_idct_loop1_v6
  81. mov r5, #2
  82. sub r0, r1, #8
  83. vp8_dequant_idct_loop2_v6
  84. ldr r6, [r0], #4
  85. ldr r7, [r0], #4
  86. ldr r8, [r0], #4
  87. ldr r9, [r0], #4
  88. smulwt r1, r3, r6
  89. smulwt r12, r4, r6
  90. smulwt lr, r3, r8
  91. smulwt r10, r4, r8
  92. pkhbt r11, r8, r6, lsl #16
  93. pkhbt r1, lr, r1, lsl #16
  94. pkhbt r12, r10, r12, lsl #16
  95. pkhtb r6, r6, r8, asr #16
  96. uadd16 r6, r1, r6
  97. pkhbt lr, r9, r7, lsl #16
  98. uadd16 r10, r11, lr
  99. usub16 lr, r11, lr
  100. pkhtb r8, r7, r9, asr #16
  101. subs r5, r5, #1
  102. smulwt r1, r3, r8
  103. smulwb r7, r3, r8
  104. smulwt r11, r4, r8
  105. smulwb r9, r4, r8
  106. pkhbt r1, r7, r1, lsl #16
  107. uadd16 r8, r1, r8
  108. pkhbt r11, r9, r11, lsl #16
  109. usub16 r1, r12, r8
  110. uadd16 r8, r11, r6
  111. ldr r9, c0x00040004
  112. ldr r12, [sp, #40]
  113. uadd16 r6, r10, r8
  114. usub16 r7, r10, r8
  115. uadd16 r7, r7, r9
  116. uadd16 r6, r6, r9
  117. uadd16 r10, r14, r1
  118. usub16 r1, r14, r1
  119. uadd16 r10, r10, r9
  120. uadd16 r1, r1, r9
  121. ldr r11, [r2], r12
  122. mov r8, r7, asr #3
  123. pkhtb r9, r8, r10, asr #19
  124. mov r8, r1, asr #3
  125. pkhtb r8, r8, r6, asr #19
  126. uxtb16 lr, r11, ror #8
  127. qadd16 r9, r9, lr
  128. uxtb16 lr, r11
  129. qadd16 r8, r8, lr
  130. usat16 r9, #8, r9
  131. usat16 r8, #8, r8
  132. orr r9, r8, r9, lsl #8
  133. ldr r11, [r2], r12
  134. ldr lr, [sp]
  135. ldr r12, [sp, #44]
  136. mov r7, r7, lsl #16
  137. mov r1, r1, lsl #16
  138. mov r10, r10, lsl #16
  139. mov r6, r6, lsl #16
  140. mov r7, r7, asr #3
  141. pkhtb r7, r7, r10, asr #19
  142. mov r1, r1, asr #3
  143. pkhtb r1, r1, r6, asr #19
  144. uxtb16 r8, r11, ror #8
  145. qadd16 r7, r7, r8
  146. uxtb16 r8, r11
  147. qadd16 r1, r1, r8
  148. usat16 r7, #8, r7
  149. usat16 r1, #8, r1
  150. orr r1, r1, r7, lsl #8
  151. str r9, [lr], r12
  152. str r1, [lr], r12
  153. str lr, [sp]
  154. bne vp8_dequant_idct_loop2_v6
  155. ; vpx_memset
  156. sub r0, r0, #32
  157. add sp, sp, #4
  158. mov r12, #0
  159. str r12, [r0]
  160. str r12, [r0, #4]
  161. str r12, [r0, #8]
  162. str r12, [r0, #12]
  163. str r12, [r0, #16]
  164. str r12, [r0, #20]
  165. str r12, [r0, #24]
  166. str r12, [r0, #28]
  167. ldmia sp!, {r4 - r11, pc}
  168. ENDP ; |vp8_dequant_idct_add_v6|
  169. ; Constant Pool
  170. cospi8sqrt2minus1 DCD 0x00004E7B
  171. sinpi8sqrt2 DCD 0x00008A8C
  172. c0x00040004 DCD 0x00040004
  173. END