sad_media.asm 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. ;
  2. ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. EXPORT |vpx_sad16x16_media|
  11. ARM
  12. REQUIRE8
  13. PRESERVE8
  14. AREA ||.text||, CODE, READONLY, ALIGN=2
  15. ; r0 const unsigned char *src_ptr
  16. ; r1 int src_stride
  17. ; r2 const unsigned char *ref_ptr
  18. ; r3 int ref_stride
  19. |vpx_sad16x16_media| PROC
  20. stmfd sp!, {r4-r12, lr}
  21. pld [r0, r1, lsl #0]
  22. pld [r2, r3, lsl #0]
  23. pld [r0, r1, lsl #1]
  24. pld [r2, r3, lsl #1]
  25. mov r4, #0 ; sad = 0;
  26. mov r5, #8 ; loop count
  27. loop
  28. ; 1st row
  29. ldr r6, [r0, #0x0] ; load 4 src pixels (1A)
  30. ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
  31. ldr r7, [r0, #0x4] ; load 4 src pixels (1A)
  32. ldr r9, [r2, #0x4] ; load 4 ref pixels (1A)
  33. ldr r10, [r0, #0x8] ; load 4 src pixels (1B)
  34. ldr r11, [r0, #0xC] ; load 4 src pixels (1B)
  35. usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
  36. usad8 r8, r7, r9 ; calculate sad for 4 pixels
  37. ldr r12, [r2, #0x8] ; load 4 ref pixels (1B)
  38. ldr lr, [r2, #0xC] ; load 4 ref pixels (1B)
  39. add r0, r0, r1 ; set src pointer to next row
  40. add r2, r2, r3 ; set dst pointer to next row
  41. pld [r0, r1, lsl #1]
  42. pld [r2, r3, lsl #1]
  43. usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
  44. usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
  45. ldr r6, [r0, #0x0] ; load 4 src pixels (2A)
  46. ldr r7, [r0, #0x4] ; load 4 src pixels (2A)
  47. add r4, r4, r8 ; add partial sad values
  48. ; 2nd row
  49. ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
  50. ldr r9, [r2, #0x4] ; load 4 ref pixels (2A)
  51. ldr r10, [r0, #0x8] ; load 4 src pixels (2B)
  52. ldr r11, [r0, #0xC] ; load 4 src pixels (2B)
  53. usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
  54. usad8 r8, r7, r9 ; calculate sad for 4 pixels
  55. ldr r12, [r2, #0x8] ; load 4 ref pixels (2B)
  56. ldr lr, [r2, #0xC] ; load 4 ref pixels (2B)
  57. add r0, r0, r1 ; set src pointer to next row
  58. add r2, r2, r3 ; set dst pointer to next row
  59. usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
  60. usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
  61. pld [r0, r1, lsl #1]
  62. pld [r2, r3, lsl #1]
  63. subs r5, r5, #1 ; decrement loop counter
  64. add r4, r4, r8 ; add partial sad values
  65. bne loop
  66. mov r0, r4 ; return sad
  67. ldmfd sp!, {r4-r12, pc}
  68. ENDP
  69. END