xor.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * arch/arm/include/asm/xor.h
  3. *
  4. * Copyright (C) 2001 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/hardirq.h>
  11. #include <asm-generic/xor.h>
  12. #include <asm/hwcap.h>
  13. #include <asm/neon.h>
  14. #define __XOR(a1, a2) a1 ^= a2
  15. #define GET_BLOCK_2(dst) \
  16. __asm__("ldmia %0, {%1, %2}" \
  17. : "=r" (dst), "=r" (a1), "=r" (a2) \
  18. : "0" (dst))
  19. #define GET_BLOCK_4(dst) \
  20. __asm__("ldmia %0, {%1, %2, %3, %4}" \
  21. : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
  22. : "0" (dst))
  23. #define XOR_BLOCK_2(src) \
  24. __asm__("ldmia %0!, {%1, %2}" \
  25. : "=r" (src), "=r" (b1), "=r" (b2) \
  26. : "0" (src)); \
  27. __XOR(a1, b1); __XOR(a2, b2);
  28. #define XOR_BLOCK_4(src) \
  29. __asm__("ldmia %0!, {%1, %2, %3, %4}" \
  30. : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
  31. : "0" (src)); \
  32. __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
  33. #define PUT_BLOCK_2(dst) \
  34. __asm__ __volatile__("stmia %0!, {%2, %3}" \
  35. : "=r" (dst) \
  36. : "0" (dst), "r" (a1), "r" (a2))
  37. #define PUT_BLOCK_4(dst) \
  38. __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \
  39. : "=r" (dst) \
  40. : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
  41. static void
  42. xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
  43. {
  44. unsigned int lines = bytes / sizeof(unsigned long) / 4;
  45. register unsigned int a1 __asm__("r4");
  46. register unsigned int a2 __asm__("r5");
  47. register unsigned int a3 __asm__("r6");
  48. register unsigned int a4 __asm__("r7");
  49. register unsigned int b1 __asm__("r8");
  50. register unsigned int b2 __asm__("r9");
  51. register unsigned int b3 __asm__("ip");
  52. register unsigned int b4 __asm__("lr");
  53. do {
  54. GET_BLOCK_4(p1);
  55. XOR_BLOCK_4(p2);
  56. PUT_BLOCK_4(p1);
  57. } while (--lines);
  58. }
  59. static void
  60. xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  61. unsigned long *p3)
  62. {
  63. unsigned int lines = bytes / sizeof(unsigned long) / 4;
  64. register unsigned int a1 __asm__("r4");
  65. register unsigned int a2 __asm__("r5");
  66. register unsigned int a3 __asm__("r6");
  67. register unsigned int a4 __asm__("r7");
  68. register unsigned int b1 __asm__("r8");
  69. register unsigned int b2 __asm__("r9");
  70. register unsigned int b3 __asm__("ip");
  71. register unsigned int b4 __asm__("lr");
  72. do {
  73. GET_BLOCK_4(p1);
  74. XOR_BLOCK_4(p2);
  75. XOR_BLOCK_4(p3);
  76. PUT_BLOCK_4(p1);
  77. } while (--lines);
  78. }
  79. static void
  80. xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  81. unsigned long *p3, unsigned long *p4)
  82. {
  83. unsigned int lines = bytes / sizeof(unsigned long) / 2;
  84. register unsigned int a1 __asm__("r8");
  85. register unsigned int a2 __asm__("r9");
  86. register unsigned int b1 __asm__("ip");
  87. register unsigned int b2 __asm__("lr");
  88. do {
  89. GET_BLOCK_2(p1);
  90. XOR_BLOCK_2(p2);
  91. XOR_BLOCK_2(p3);
  92. XOR_BLOCK_2(p4);
  93. PUT_BLOCK_2(p1);
  94. } while (--lines);
  95. }
  96. static void
  97. xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  98. unsigned long *p3, unsigned long *p4, unsigned long *p5)
  99. {
  100. unsigned int lines = bytes / sizeof(unsigned long) / 2;
  101. register unsigned int a1 __asm__("r8");
  102. register unsigned int a2 __asm__("r9");
  103. register unsigned int b1 __asm__("ip");
  104. register unsigned int b2 __asm__("lr");
  105. do {
  106. GET_BLOCK_2(p1);
  107. XOR_BLOCK_2(p2);
  108. XOR_BLOCK_2(p3);
  109. XOR_BLOCK_2(p4);
  110. XOR_BLOCK_2(p5);
  111. PUT_BLOCK_2(p1);
  112. } while (--lines);
  113. }
  114. static struct xor_block_template xor_block_arm4regs = {
  115. .name = "arm4regs",
  116. .do_2 = xor_arm4regs_2,
  117. .do_3 = xor_arm4regs_3,
  118. .do_4 = xor_arm4regs_4,
  119. .do_5 = xor_arm4regs_5,
  120. };
  121. #undef XOR_TRY_TEMPLATES
  122. #define XOR_TRY_TEMPLATES \
  123. do { \
  124. xor_speed(&xor_block_arm4regs); \
  125. xor_speed(&xor_block_8regs); \
  126. xor_speed(&xor_block_32regs); \
  127. NEON_TEMPLATES; \
  128. } while (0)
  129. #ifdef CONFIG_KERNEL_MODE_NEON
  130. extern struct xor_block_template const xor_block_neon_inner;
  131. static void
  132. xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
  133. {
  134. if (in_interrupt()) {
  135. xor_arm4regs_2(bytes, p1, p2);
  136. } else {
  137. kernel_neon_begin();
  138. xor_block_neon_inner.do_2(bytes, p1, p2);
  139. kernel_neon_end();
  140. }
  141. }
  142. static void
  143. xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  144. unsigned long *p3)
  145. {
  146. if (in_interrupt()) {
  147. xor_arm4regs_3(bytes, p1, p2, p3);
  148. } else {
  149. kernel_neon_begin();
  150. xor_block_neon_inner.do_3(bytes, p1, p2, p3);
  151. kernel_neon_end();
  152. }
  153. }
  154. static void
  155. xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  156. unsigned long *p3, unsigned long *p4)
  157. {
  158. if (in_interrupt()) {
  159. xor_arm4regs_4(bytes, p1, p2, p3, p4);
  160. } else {
  161. kernel_neon_begin();
  162. xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
  163. kernel_neon_end();
  164. }
  165. }
  166. static void
  167. xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  168. unsigned long *p3, unsigned long *p4, unsigned long *p5)
  169. {
  170. if (in_interrupt()) {
  171. xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
  172. } else {
  173. kernel_neon_begin();
  174. xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
  175. kernel_neon_end();
  176. }
  177. }
  178. static struct xor_block_template xor_block_neon = {
  179. .name = "neon",
  180. .do_2 = xor_neon_2,
  181. .do_3 = xor_neon_3,
  182. .do_4 = xor_neon_4,
  183. .do_5 = xor_neon_5
  184. };
  185. #define NEON_TEMPLATES \
  186. do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
  187. #else
  188. #define NEON_TEMPLATES
  189. #endif