/media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm

http://github.com/zpao/v8monkey · Assembly · 68 lines · 37 code · 12 blank · 19 comment · 0 complexity · 9ee0c1055164de3bfe818a8d116747cd MD5 · raw file

  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. EXPORT |vp8_memcpy_neon|
  11. ARM
  12. REQUIRE8
  13. PRESERVE8
  14. AREA ||.text||, CODE, READONLY, ALIGN=2
  15. ;=========================================
  16. ;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
  17. |vp8_memcpy_neon| PROC
  18. ;pld [r1] ;preload pred data
  19. ;pld [r1, #128]
  20. ;pld [r1, #256]
  21. ;pld [r1, #384]
  22. mov r12, r2, lsr #8 ;copy 256 bytes data at one time
  23. memcpy_neon_loop
  24. vld1.8 {q0, q1}, [r1]! ;load src data
  25. subs r12, r12, #1
  26. vld1.8 {q2, q3}, [r1]!
  27. vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr
  28. vld1.8 {q4, q5}, [r1]!
  29. vst1.8 {q2, q3}, [r0]!
  30. vld1.8 {q6, q7}, [r1]!
  31. vst1.8 {q4, q5}, [r0]!
  32. vld1.8 {q8, q9}, [r1]!
  33. vst1.8 {q6, q7}, [r0]!
  34. vld1.8 {q10, q11}, [r1]!
  35. vst1.8 {q8, q9}, [r0]!
  36. vld1.8 {q12, q13}, [r1]!
  37. vst1.8 {q10, q11}, [r0]!
  38. vld1.8 {q14, q15}, [r1]!
  39. vst1.8 {q12, q13}, [r0]!
  40. vst1.8 {q14, q15}, [r0]!
  41. ;pld [r1] ;preload pred data -- need to adjust for real device
  42. ;pld [r1, #128]
  43. ;pld [r1, #256]
  44. ;pld [r1, #384]
  45. bne memcpy_neon_loop
  46. ands r3, r2, #0xff ;extra copy
  47. beq done_copy_neon_loop
  48. extra_copy_neon_loop
  49. vld1.8 {q0}, [r1]! ;load src data
  50. subs r3, r3, #16
  51. vst1.8 {q0}, [r0]!
  52. bne extra_copy_neon_loop
  53. done_copy_neon_loop
  54. bx lr
  55. ENDP
  56. END