string_64.S 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License as published by
  4. * the Free Software Foundation; either version 2 of the License, or
  5. * (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program; if not, write to the Free Software
  14. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15. *
  16. * Copyright (C) IBM Corporation, 2012
  17. *
  18. * Author: Anton Blanchard <anton@au.ibm.com>
  19. */
  20. #include <asm/ppc_asm.h>
  21. #include <asm/asm-offsets.h>
  22. #include <asm/export.h>
  23. .section ".toc","aw"
  24. PPC64_CACHES:
  25. .tc ppc64_caches[TC],ppc64_caches
  26. .section ".text"
  27. /**
  28. * __clear_user: - Zero a block of memory in user space, with less checking.
  29. * @to: Destination address, in user space.
  30. * @n: Number of bytes to zero.
  31. *
  32. * Zero a block of memory in user space. Caller must check
  33. * the specified block with access_ok() before calling this function.
  34. *
  35. * Returns number of bytes that could not be cleared.
  36. * On success, this will be zero.
  37. */
  38. .macro err1
  39. 100:
  40. .section __ex_table,"a"
  41. .align 3
  42. .llong 100b,.Ldo_err1
  43. .previous
  44. .endm
  45. .macro err2
  46. 200:
  47. .section __ex_table,"a"
  48. .align 3
  49. .llong 200b,.Ldo_err2
  50. .previous
  51. .endm
  52. .macro err3
  53. 300:
  54. .section __ex_table,"a"
  55. .align 3
  56. .llong 300b,.Ldo_err3
  57. .previous
  58. .endm
  59. .Ldo_err1:
  60. mr r3,r8
  61. .Ldo_err2:
  62. mtctr r4
  63. 1:
  64. err3; stb r0,0(r3)
  65. addi r3,r3,1
  66. addi r4,r4,-1
  67. bdnz 1b
  68. .Ldo_err3:
  69. mr r3,r4
  70. blr
  71. _GLOBAL_TOC(__clear_user)
  72. cmpdi r4,32
  73. neg r6,r3
  74. li r0,0
  75. blt .Lshort_clear
  76. mr r8,r3
  77. mtocrf 0x01,r6
  78. clrldi r6,r6,(64-3)
  79. /* Get the destination 8 byte aligned */
  80. bf cr7*4+3,1f
  81. err1; stb r0,0(r3)
  82. addi r3,r3,1
  83. 1: bf cr7*4+2,2f
  84. err1; sth r0,0(r3)
  85. addi r3,r3,2
  86. 2: bf cr7*4+1,3f
  87. err1; stw r0,0(r3)
  88. addi r3,r3,4
  89. 3: sub r4,r4,r6
  90. cmpdi r4,32
  91. cmpdi cr1,r4,512
  92. blt .Lshort_clear
  93. bgt cr1,.Llong_clear
  94. .Lmedium_clear:
  95. srdi r6,r4,5
  96. mtctr r6
  97. /* Do 32 byte chunks */
  98. 4:
  99. err2; std r0,0(r3)
  100. err2; std r0,8(r3)
  101. err2; std r0,16(r3)
  102. err2; std r0,24(r3)
  103. addi r3,r3,32
  104. addi r4,r4,-32
  105. bdnz 4b
  106. .Lshort_clear:
  107. /* up to 31 bytes to go */
  108. cmpdi r4,16
  109. blt 6f
  110. err2; std r0,0(r3)
  111. err2; std r0,8(r3)
  112. addi r3,r3,16
  113. addi r4,r4,-16
  114. /* Up to 15 bytes to go */
  115. 6: mr r8,r3
  116. clrldi r4,r4,(64-4)
  117. mtocrf 0x01,r4
  118. bf cr7*4+0,7f
  119. err1; std r0,0(r3)
  120. addi r3,r3,8
  121. 7: bf cr7*4+1,8f
  122. err1; stw r0,0(r3)
  123. addi r3,r3,4
  124. 8: bf cr7*4+2,9f
  125. err1; sth r0,0(r3)
  126. addi r3,r3,2
  127. 9: bf cr7*4+3,10f
  128. err1; stb r0,0(r3)
  129. 10: li r3,0
  130. blr
  131. .Llong_clear:
  132. ld r5,PPC64_CACHES@toc(r2)
  133. bf cr7*4+0,11f
  134. err2; std r0,0(r3)
  135. addi r3,r3,8
  136. addi r4,r4,-8
  137. /* Destination is 16 byte aligned, need to get it cacheline aligned */
  138. 11: lwz r7,DCACHEL1LOGLINESIZE(r5)
  139. lwz r9,DCACHEL1LINESIZE(r5)
  140. /*
  141. * With worst case alignment the long clear loop takes a minimum
  142. * of 1 byte less than 2 cachelines.
  143. */
  144. sldi r10,r9,2
  145. cmpd r4,r10
  146. blt .Lmedium_clear
  147. neg r6,r3
  148. addi r10,r9,-1
  149. and. r5,r6,r10
  150. beq 13f
  151. srdi r6,r5,4
  152. mtctr r6
  153. mr r8,r3
  154. 12:
  155. err1; std r0,0(r3)
  156. err1; std r0,8(r3)
  157. addi r3,r3,16
  158. bdnz 12b
  159. sub r4,r4,r5
  160. 13: srd r6,r4,r7
  161. mtctr r6
  162. mr r8,r3
  163. 14:
  164. err1; dcbz r0,r3
  165. add r3,r3,r9
  166. bdnz 14b
  167. and r4,r4,r10
  168. cmpdi r4,32
  169. blt .Lshort_clear
  170. b .Lmedium_clear
  171. EXPORT_SYMBOL(__clear_user)