/media/libvpx/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm

http://github.com/zpao/v8monkey · Assembly · 218 lines · 166 code · 32 blank · 20 comment · 0 complexity · 84b49dee3520b11f506f653f109be25f MD5 · raw file

  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license and patent
  5. ; grant that can be found in the LICENSE file in the root of the source
  6. ; tree. All contributing project authors may be found in the AUTHORS
  7. ; file in the root of the source tree.
  8. ;
  9. EXPORT |vp8_dequant_dc_idct_add_v6|
  10. AREA |.text|, CODE, READONLY
  11. ;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
  12. ; unsigned char *dest, int pitch, int stride, int Dc)
  13. ; r0 = input
  14. ; r1 = dq
  15. ; r2 = pred
  16. ; r3 = dest
  17. ; sp + 36 = pitch ; +4 = 40
  18. ; sp + 40 = stride ; +4 = 44
  19. ; sp + 44 = Dc ; +4 = 48
  20. |vp8_dequant_dc_idct_add_v6| PROC
  21. stmdb sp!, {r4-r11, lr}
  22. ldr r6, [sp, #44]
  23. ldr r4, [r0] ;input
  24. ldr r5, [r1], #4 ;dq
  25. sub sp, sp, #4
  26. str r3, [sp]
  27. smultt r7, r4, r5
  28. ldr r4, [r0, #4] ;input
  29. ldr r5, [r1], #4 ;dq
  30. strh r6, [r0], #2
  31. strh r7, [r0], #2
  32. smulbb r6, r4, r5
  33. smultt r7, r4, r5
  34. ldr r4, [r0, #4] ;input
  35. ldr r5, [r1], #4 ;dq
  36. strh r6, [r0], #2
  37. strh r7, [r0], #2
  38. mov r12, #3
  39. vp8_dequant_dc_add_loop
  40. smulbb r6, r4, r5
  41. smultt r7, r4, r5
  42. ldr r4, [r0, #4] ;input
  43. ldr r5, [r1], #4 ;dq
  44. strh r6, [r0], #2
  45. strh r7, [r0], #2
  46. smulbb r6, r4, r5
  47. smultt r7, r4, r5
  48. subs r12, r12, #1
  49. ldrne r4, [r0, #4]
  50. ldrne r5, [r1], #4
  51. strh r6, [r0], #2
  52. strh r7, [r0], #2
  53. bne vp8_dequant_dc_add_loop
  54. sub r0, r0, #32
  55. mov r1, r0
  56. ; short_idct4x4llm_v6_dual
  57. ldr r3, cospi8sqrt2minus1
  58. ldr r4, sinpi8sqrt2
  59. ldr r6, [r0, #8]
  60. mov r5, #2
  61. vp8_dequant_dc_idct_loop1_v6
  62. ldr r12, [r0, #24]
  63. ldr r14, [r0, #16]
  64. smulwt r9, r3, r6
  65. smulwb r7, r3, r6
  66. smulwt r10, r4, r6
  67. smulwb r8, r4, r6
  68. pkhbt r7, r7, r9, lsl #16
  69. smulwt r11, r3, r12
  70. pkhbt r8, r8, r10, lsl #16
  71. uadd16 r6, r6, r7
  72. smulwt r7, r4, r12
  73. smulwb r9, r3, r12
  74. smulwb r10, r4, r12
  75. subs r5, r5, #1
  76. pkhbt r9, r9, r11, lsl #16
  77. ldr r11, [r0], #4
  78. pkhbt r10, r10, r7, lsl #16
  79. uadd16 r7, r12, r9
  80. usub16 r7, r8, r7
  81. uadd16 r6, r6, r10
  82. uadd16 r10, r11, r14
  83. usub16 r8, r11, r14
  84. uadd16 r9, r10, r6
  85. usub16 r10, r10, r6
  86. uadd16 r6, r8, r7
  87. usub16 r7, r8, r7
  88. str r6, [r1, #8]
  89. ldrne r6, [r0, #8]
  90. str r7, [r1, #16]
  91. str r10, [r1, #24]
  92. str r9, [r1], #4
  93. bne vp8_dequant_dc_idct_loop1_v6
  94. mov r5, #2
  95. sub r0, r1, #8
  96. vp8_dequant_dc_idct_loop2_v6
  97. ldr r6, [r0], #4
  98. ldr r7, [r0], #4
  99. ldr r8, [r0], #4
  100. ldr r9, [r0], #4
  101. smulwt r1, r3, r6
  102. smulwt r12, r4, r6
  103. smulwt lr, r3, r8
  104. smulwt r10, r4, r8
  105. pkhbt r11, r8, r6, lsl #16
  106. pkhbt r1, lr, r1, lsl #16
  107. pkhbt r12, r10, r12, lsl #16
  108. pkhtb r6, r6, r8, asr #16
  109. uadd16 r6, r1, r6
  110. pkhbt lr, r9, r7, lsl #16
  111. uadd16 r10, r11, lr
  112. usub16 lr, r11, lr
  113. pkhtb r8, r7, r9, asr #16
  114. subs r5, r5, #1
  115. smulwt r1, r3, r8
  116. smulwb r7, r3, r8
  117. smulwt r11, r4, r8
  118. smulwb r9, r4, r8
  119. pkhbt r1, r7, r1, lsl #16
  120. uadd16 r8, r1, r8
  121. pkhbt r11, r9, r11, lsl #16
  122. usub16 r1, r12, r8
  123. uadd16 r8, r11, r6
  124. ldr r9, c0x00040004
  125. ldr r12, [sp, #40]
  126. uadd16 r6, r10, r8
  127. usub16 r7, r10, r8
  128. uadd16 r7, r7, r9
  129. uadd16 r6, r6, r9
  130. uadd16 r10, r14, r1
  131. usub16 r1, r14, r1
  132. uadd16 r10, r10, r9
  133. uadd16 r1, r1, r9
  134. ldr r11, [r2], r12
  135. mov r8, r7, asr #3
  136. pkhtb r9, r8, r10, asr #19
  137. mov r8, r1, asr #3
  138. pkhtb r8, r8, r6, asr #19
  139. uxtb16 lr, r11, ror #8
  140. qadd16 r9, r9, lr
  141. uxtb16 lr, r11
  142. qadd16 r8, r8, lr
  143. usat16 r9, #8, r9
  144. usat16 r8, #8, r8
  145. orr r9, r8, r9, lsl #8
  146. ldr r11, [r2], r12
  147. ldr lr, [sp]
  148. ldr r12, [sp, #44]
  149. mov r7, r7, lsl #16
  150. mov r1, r1, lsl #16
  151. mov r10, r10, lsl #16
  152. mov r6, r6, lsl #16
  153. mov r7, r7, asr #3
  154. pkhtb r7, r7, r10, asr #19
  155. mov r1, r1, asr #3
  156. pkhtb r1, r1, r6, asr #19
  157. uxtb16 r8, r11, ror #8
  158. qadd16 r7, r7, r8
  159. uxtb16 r8, r11
  160. qadd16 r1, r1, r8
  161. usat16 r7, #8, r7
  162. usat16 r1, #8, r1
  163. orr r1, r1, r7, lsl #8
  164. str r9, [lr], r12
  165. str r1, [lr], r12
  166. str lr, [sp]
  167. bne vp8_dequant_dc_idct_loop2_v6
  168. ; vpx_memset
  169. sub r0, r0, #32
  170. add sp, sp, #4
  171. mov r12, #0
  172. str r12, [r0]
  173. str r12, [r0, #4]
  174. str r12, [r0, #8]
  175. str r12, [r0, #12]
  176. str r12, [r0, #16]
  177. str r12, [r0, #20]
  178. str r12, [r0, #24]
  179. str r12, [r0, #28]
  180. ldmia sp!, {r4 - r11, pc}
  181. ENDP ; |vp8_dequant_dc_idct_add_v6|
  182. ; Constant Pool
  183. cospi8sqrt2minus1 DCD 0x00004E7B
  184. sinpi8sqrt2 DCD 0x00008A8C
  185. c0x00040004 DCD 0x00040004
  186. END