/media/libvpx/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm

http://github.com/zpao/v8monkey · Assembly · 291 lines · 209 code · 48 blank · 34 comment · 0 complexity · 7d1323b815f4155fd6d7fc0ef60bbfe3 MD5 · raw file

  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. EXPORT |vp8cx_pack_tokens_armv5|
  11. INCLUDE asm_enc_offsets.asm
  12. ARM
  13. REQUIRE8
  14. PRESERVE8
  15. AREA |.text|, CODE, READONLY
  16. ; r0 vp8_writer *w
  17. ; r1 const TOKENEXTRA *p
  18. ; r2 int xcount
  19. ; r3 vp8_coef_encodings
  20. ; s0 vp8_extra_bits
  21. ; s1 vp8_coef_tree
  22. |vp8cx_pack_tokens_armv5| PROC
  23. push {r4-r11, lr}
  24. ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
  25. ; sizeof (TOKENEXTRA) is 8
  26. sub sp, sp, #12
  27. add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
  28. str r2, [sp, #0]
  29. str r3, [sp, #8] ; save vp8_coef_encodings
  30. ldr r2, [r0, #vp8_writer_lowvalue]
  31. ldr r5, [r0, #vp8_writer_range]
  32. ldr r3, [r0, #vp8_writer_count]
  33. b check_p_lt_stop
  34. while_p_lt_stop
  35. ldrb r6, [r1, #tokenextra_token] ; t
  36. ldr r4, [sp, #8] ; vp8_coef_encodings
  37. mov lr, #0
  38. add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
  39. ldr r9, [r1, #tokenextra_context_tree] ; pp
  40. ldrb r7, [r1, #tokenextra_skip_eob_node]
  41. ldr r6, [r4, #vp8_token_value] ; v
  42. ldr r8, [r4, #vp8_token_len] ; n
  43. ; vp8 specific skip_eob_node
  44. cmp r7, #0
  45. movne lr, #2 ; i = 2
  46. subne r8, r8, #1 ; --n
  47. rsb r4, r8, #32 ; 32-n
  48. ldr r10, [sp, #52] ; vp8_coef_tree
  49. ; v is kept in r12 during the token pack loop
  50. lsl r12, r6, r4 ; r12 = v << 32 - n
  51. ; loop start
  52. token_loop
  53. ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
  54. sub r7, r5, #1 ; range-1
  55. ; Decisions are made based on the bit value shifted
  56. ; off of v, so set a flag here based on this.
  57. ; This value is refered to as "bb"
  58. lsls r12, r12, #1 ; bb = v >> n
  59. mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
  60. ; bb can only be 0 or 1. So only execute this statement
  61. ; if bb == 1, otherwise it will act like i + 0
  62. addcs lr, lr, #1 ; i + bb
  63. mov r7, #1
  64. ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
  65. add r4, r7, r4, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
  66. addcs r2, r2, r4 ; if (bb) lowvalue += split
  67. subcs r4, r5, r4 ; if (bb) range = range-split
  68. ; Counting the leading zeros is used to normalize range.
  69. clz r6, r4
  70. sub r6, r6, #24 ; shift
  71. ; Flag is set on the sum of count. This flag is used later
  72. ; to determine if count >= 0
  73. adds r3, r3, r6 ; count += shift
  74. lsl r5, r4, r6 ; range <<= shift
  75. bmi token_count_lt_zero ; if(count >= 0)
  76. sub r6, r6, r3 ; offset = shift - count
  77. sub r4, r6, #1 ; offset-1
  78. lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
  79. bpl token_high_bit_not_set
  80. ldr r4, [r0, #vp8_writer_pos] ; x
  81. sub r4, r4, #1 ; x = w->pos-1
  82. b token_zero_while_start
  83. token_zero_while_loop
  84. mov r10, #0
  85. strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
  86. sub r4, r4, #1 ; x--
  87. token_zero_while_start
  88. cmp r4, #0
  89. ldrge r7, [r0, #vp8_writer_buffer]
  90. ldrb r11, [r7, r4]
  91. cmpge r11, #0xff
  92. beq token_zero_while_loop
  93. ldr r7, [r0, #vp8_writer_buffer]
  94. ldrb r10, [r7, r4] ; w->buffer[x]
  95. add r10, r10, #1
  96. strb r10, [r7, r4] ; w->buffer[x] + 1
  97. token_high_bit_not_set
  98. rsb r4, r6, #24 ; 24-offset
  99. ldr r10, [r0, #vp8_writer_buffer]
  100. lsr r7, r2, r4 ; lowvalue >> (24-offset)
  101. ldr r4, [r0, #vp8_writer_pos] ; w->pos
  102. lsl r2, r2, r6 ; lowvalue <<= offset
  103. mov r6, r3 ; shift = count
  104. add r11, r4, #1 ; w->pos++
  105. bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
  106. str r11, [r0, #vp8_writer_pos]
  107. sub r3, r3, #8 ; count -= 8
  108. strb r7, [r10, r4] ; w->buffer[w->pos++]
  109. ; r10 is used earlier in the loop, but r10 is used as
  110. ; temp variable here. So after r10 is used, reload
  111. ; vp8_coef_tree_dcd into r10
  112. ldr r10, [sp, #52] ; vp8_coef_tree
  113. token_count_lt_zero
  114. lsl r2, r2, r6 ; lowvalue <<= shift
  115. subs r8, r8, #1 ; --n
  116. bne token_loop
  117. ldrb r6, [r1, #tokenextra_token] ; t
  118. ldr r7, [sp, #48] ; vp8_extra_bits
  119. ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
  120. ; element. Here vp8_extra_bit_struct == 16
  121. add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
  122. ldr r4, [r12, #vp8_extra_bit_struct_base_val]
  123. cmp r4, #0
  124. beq skip_extra_bits
  125. ; if( b->base_val)
  126. ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
  127. ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
  128. cmp r8, #0 ; if( L)
  129. beq no_extra_bits
  130. ldr r9, [r12, #vp8_extra_bit_struct_prob]
  131. asr r7, lr, #1 ; v=e>>1
  132. ldr r10, [r12, #vp8_extra_bit_struct_tree]
  133. str r10, [sp, #4] ; b->tree
  134. rsb r4, r8, #32
  135. lsl r12, r7, r4
  136. mov lr, #0 ; i = 0
  137. extra_bits_loop
  138. ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
  139. sub r7, r5, #1 ; range-1
  140. lsls r12, r12, #1 ; v >> n
  141. mul r4, r4, r7 ; (range-1) * pp[i>>1]
  142. addcs lr, lr, #1 ; i + bb
  143. mov r7, #1
  144. ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
  145. add r4, r7, r4, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
  146. addcs r2, r2, r4 ; if (bb) lowvalue += split
  147. subcs r4, r5, r4 ; if (bb) range = range-split
  148. clz r6, r4
  149. sub r6, r6, #24
  150. adds r3, r3, r6 ; count += shift
  151. lsl r5, r4, r6 ; range <<= shift
  152. bmi extra_count_lt_zero ; if(count >= 0)
  153. sub r6, r6, r3 ; offset= shift - count
  154. sub r4, r6, #1 ; offset-1
  155. lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
  156. bpl extra_high_bit_not_set
  157. ldr r4, [r0, #vp8_writer_pos] ; x
  158. sub r4, r4, #1 ; x = w->pos - 1
  159. b extra_zero_while_start
  160. extra_zero_while_loop
  161. mov r10, #0
  162. strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
  163. sub r4, r4, #1 ; x--
  164. extra_zero_while_start
  165. cmp r4, #0
  166. ldrge r7, [r0, #vp8_writer_buffer]
  167. ldrb r11, [r7, r4]
  168. cmpge r11, #0xff
  169. beq extra_zero_while_loop
  170. ldr r7, [r0, #vp8_writer_buffer]
  171. ldrb r10, [r7, r4]
  172. add r10, r10, #1
  173. strb r10, [r7, r4]
  174. extra_high_bit_not_set
  175. rsb r4, r6, #24 ; 24-offset
  176. ldr r10, [r0, #vp8_writer_buffer]
  177. lsr r7, r2, r4 ; lowvalue >> (24-offset)
  178. ldr r4, [r0, #vp8_writer_pos]
  179. lsl r2, r2, r6 ; lowvalue <<= offset
  180. mov r6, r3 ; shift = count
  181. add r11, r4, #1 ; w->pos++
  182. bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
  183. str r11, [r0, #vp8_writer_pos]
  184. sub r3, r3, #8 ; count -= 8
  185. strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
  186. ldr r10, [sp, #4] ; b->tree
  187. extra_count_lt_zero
  188. lsl r2, r2, r6
  189. subs r8, r8, #1 ; --n
  190. bne extra_bits_loop ; while (n)
  191. no_extra_bits
  192. ldr lr, [r1, #4] ; e = p->Extra
  193. add r4, r5, #1 ; range + 1
  194. tst lr, #1
  195. lsr r4, r4, #1 ; split = (range + 1) >> 1
  196. addne r2, r2, r4 ; lowvalue += split
  197. subne r4, r5, r4 ; range = range-split
  198. tst r2, #0x80000000 ; lowvalue & 0x80000000
  199. lsl r5, r4, #1 ; range <<= 1
  200. beq end_high_bit_not_set
  201. ldr r4, [r0, #vp8_writer_pos]
  202. mov r7, #0
  203. sub r4, r4, #1
  204. b end_zero_while_start
  205. end_zero_while_loop
  206. strb r7, [r6, r4]
  207. sub r4, r4, #1 ; x--
  208. end_zero_while_start
  209. cmp r4, #0
  210. ldrge r6, [r0, #vp8_writer_buffer]
  211. ldrb r12, [r6, r4]
  212. cmpge r12, #0xff
  213. beq end_zero_while_loop
  214. ldr r6, [r0, #vp8_writer_buffer]
  215. ldrb r7, [r6, r4]
  216. add r7, r7, #1
  217. strb r7, [r6, r4]
  218. end_high_bit_not_set
  219. adds r3, r3, #1 ; ++count
  220. lsl r2, r2, #1 ; lowvalue <<= 1
  221. bne end_count_zero
  222. ldr r4, [r0, #vp8_writer_pos]
  223. mvn r3, #7
  224. ldr r7, [r0, #vp8_writer_buffer]
  225. lsr r6, r2, #24 ; lowvalue >> 24
  226. add r12, r4, #1 ; w->pos++
  227. bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
  228. str r12, [r0, #0x10]
  229. strb r6, [r7, r4]
  230. end_count_zero
  231. skip_extra_bits
  232. add r1, r1, #TOKENEXTRA_SZ ; ++p
  233. check_p_lt_stop
  234. ldr r4, [sp, #0] ; stop
  235. cmp r1, r4 ; while( p < stop)
  236. bcc while_p_lt_stop
  237. str r2, [r0, #vp8_writer_lowvalue]
  238. str r5, [r0, #vp8_writer_range]
  239. str r3, [r0, #vp8_writer_count]
  240. add sp, sp, #12
  241. pop {r4-r11, pc}
  242. ENDP
  243. END