copy_user_64.S 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/dwarf2.h>
  10. #define FIX_ALIGNMENT 1
  11. #include <asm/current.h>
  12. #include <asm/asm-offsets.h>
  13. #include <asm/thread_info.h>
  14. #include <asm/cpufeature.h>
  15. #include <asm/alternative-asm.h>
  16. /*
  17. * By placing feature2 after feature1 in altinstructions section, we logically
  18. * implement:
  19. * If CPU has feature2, jmp to alt2 is used
  20. * else if CPU has feature1, jmp to alt1 is used
  21. * else jmp to orig is used.
  22. */
  23. .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
  24. 0:
  25. .byte 0xe9 /* 32bit jump */
  26. .long \orig-1f /* by default jump to orig */
  27. 1:
  28. .section .altinstr_replacement,"ax"
  29. 2: .byte 0xe9 /* near jump with 32bit immediate */
  30. .long \alt1-1b /* offset */ /* or alternatively to alt1 */
  31. 3: .byte 0xe9 /* near jump with 32bit immediate */
  32. .long \alt2-1b /* offset */ /* or alternatively to alt2 */
  33. .previous
  34. .section .altinstructions,"a"
  35. altinstruction_entry 0b,2b,\feature1,5,5
  36. altinstruction_entry 0b,3b,\feature2,5,5
  37. .previous
  38. .endm
  39. .macro ALIGN_DESTINATION
  40. #ifdef FIX_ALIGNMENT
  41. /* check for bad alignment of destination */
  42. movl %edi,%ecx
  43. andl $7,%ecx
  44. jz 102f /* already aligned */
  45. subl $8,%ecx
  46. negl %ecx
  47. subl %ecx,%edx
  48. 100: movb (%rsi),%al
  49. 101: movb %al,(%rdi)
  50. incq %rsi
  51. incq %rdi
  52. decl %ecx
  53. jnz 100b
  54. 102:
  55. .section .fixup,"ax"
  56. 103: addl %ecx,%edx /* ecx is zerorest also */
  57. jmp copy_user_handle_tail
  58. .previous
  59. .section __ex_table,"a"
  60. .align 8
  61. .quad 100b,103b
  62. .quad 101b,103b
  63. .previous
  64. #endif
  65. .endm
  66. /* Standard copy_to_user with segment limit checking */
  67. ENTRY(_copy_to_user)
  68. CFI_STARTPROC
  69. GET_THREAD_INFO(%rax)
  70. movq %rdi,%rcx
  71. addq %rdx,%rcx
  72. jc bad_to_user
  73. cmpq TI_addr_limit(%rax),%rcx
  74. ja bad_to_user
  75. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
  76. copy_user_generic_unrolled,copy_user_generic_string, \
  77. copy_user_enhanced_fast_string
  78. CFI_ENDPROC
  79. ENDPROC(_copy_to_user)
  80. /* Standard copy_from_user with segment limit checking */
  81. ENTRY(_copy_from_user)
  82. CFI_STARTPROC
  83. GET_THREAD_INFO(%rax)
  84. movq %rsi,%rcx
  85. addq %rdx,%rcx
  86. jc bad_from_user
  87. cmpq TI_addr_limit(%rax),%rcx
  88. ja bad_from_user
  89. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
  90. copy_user_generic_unrolled,copy_user_generic_string, \
  91. copy_user_enhanced_fast_string
  92. CFI_ENDPROC
  93. ENDPROC(_copy_from_user)
  94. .section .fixup,"ax"
  95. /* must zero dest */
  96. ENTRY(bad_from_user)
  97. bad_from_user:
  98. CFI_STARTPROC
  99. movl %edx,%ecx
  100. xorl %eax,%eax
  101. rep
  102. stosb
  103. bad_to_user:
  104. movl %edx,%eax
  105. ret
  106. CFI_ENDPROC
  107. ENDPROC(bad_from_user)
  108. .previous
  109. /*
  110. * copy_user_generic_unrolled - memory copy with exception handling.
  111. * This version is for CPUs like P4 that don't have efficient micro
  112. * code for rep movsq
  113. *
  114. * Input:
  115. * rdi destination
  116. * rsi source
  117. * rdx count
  118. *
  119. * Output:
  120. * eax uncopied bytes or 0 if successful.
  121. */
  122. ENTRY(copy_user_generic_unrolled)
  123. CFI_STARTPROC
  124. cmpl $8,%edx
  125. jb 20f /* less then 8 bytes, go to byte copy loop */
  126. ALIGN_DESTINATION
  127. movl %edx,%ecx
  128. andl $63,%edx
  129. shrl $6,%ecx
  130. jz 17f
  131. 1: movq (%rsi),%r8
  132. 2: movq 1*8(%rsi),%r9
  133. 3: movq 2*8(%rsi),%r10
  134. 4: movq 3*8(%rsi),%r11
  135. 5: movq %r8,(%rdi)
  136. 6: movq %r9,1*8(%rdi)
  137. 7: movq %r10,2*8(%rdi)
  138. 8: movq %r11,3*8(%rdi)
  139. 9: movq 4*8(%rsi),%r8
  140. 10: movq 5*8(%rsi),%r9
  141. 11: movq 6*8(%rsi),%r10
  142. 12: movq 7*8(%rsi),%r11
  143. 13: movq %r8,4*8(%rdi)
  144. 14: movq %r9,5*8(%rdi)
  145. 15: movq %r10,6*8(%rdi)
  146. 16: movq %r11,7*8(%rdi)
  147. leaq 64(%rsi),%rsi
  148. leaq 64(%rdi),%rdi
  149. decl %ecx
  150. jnz 1b
  151. 17: movl %edx,%ecx
  152. andl $7,%edx
  153. shrl $3,%ecx
  154. jz 20f
  155. 18: movq (%rsi),%r8
  156. 19: movq %r8,(%rdi)
  157. leaq 8(%rsi),%rsi
  158. leaq 8(%rdi),%rdi
  159. decl %ecx
  160. jnz 18b
  161. 20: andl %edx,%edx
  162. jz 23f
  163. movl %edx,%ecx
  164. 21: movb (%rsi),%al
  165. 22: movb %al,(%rdi)
  166. incq %rsi
  167. incq %rdi
  168. decl %ecx
  169. jnz 21b
  170. 23: xor %eax,%eax
  171. ret
  172. .section .fixup,"ax"
  173. 30: shll $6,%ecx
  174. addl %ecx,%edx
  175. jmp 60f
  176. 40: lea (%rdx,%rcx,8),%rdx
  177. jmp 60f
  178. 50: movl %ecx,%edx
  179. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  180. .previous
  181. .section __ex_table,"a"
  182. .align 8
  183. .quad 1b,30b
  184. .quad 2b,30b
  185. .quad 3b,30b
  186. .quad 4b,30b
  187. .quad 5b,30b
  188. .quad 6b,30b
  189. .quad 7b,30b
  190. .quad 8b,30b
  191. .quad 9b,30b
  192. .quad 10b,30b
  193. .quad 11b,30b
  194. .quad 12b,30b
  195. .quad 13b,30b
  196. .quad 14b,30b
  197. .quad 15b,30b
  198. .quad 16b,30b
  199. .quad 18b,40b
  200. .quad 19b,40b
  201. .quad 21b,50b
  202. .quad 22b,50b
  203. .previous
  204. CFI_ENDPROC
  205. ENDPROC(copy_user_generic_unrolled)
  206. /* Some CPUs run faster using the string copy instructions.
  207. * This is also a lot simpler. Use them when possible.
  208. *
  209. * Only 4GB of copy is supported. This shouldn't be a problem
  210. * because the kernel normally only writes from/to page sized chunks
  211. * even if user space passed a longer buffer.
  212. * And more would be dangerous because both Intel and AMD have
  213. * errata with rep movsq > 4GB. If someone feels the need to fix
  214. * this please consider this.
  215. *
  216. * Input:
  217. * rdi destination
  218. * rsi source
  219. * rdx count
  220. *
  221. * Output:
  222. * eax uncopied bytes or 0 if successful.
  223. */
  224. ENTRY(copy_user_generic_string)
  225. CFI_STARTPROC
  226. andl %edx,%edx
  227. jz 4f
  228. cmpl $8,%edx
  229. jb 2f /* less than 8 bytes, go to byte copy loop */
  230. ALIGN_DESTINATION
  231. movl %edx,%ecx
  232. shrl $3,%ecx
  233. andl $7,%edx
  234. 1: rep
  235. movsq
  236. 2: movl %edx,%ecx
  237. 3: rep
  238. movsb
  239. 4: xorl %eax,%eax
  240. ret
  241. .section .fixup,"ax"
  242. 11: lea (%rdx,%rcx,8),%rcx
  243. 12: movl %ecx,%edx /* ecx is zerorest also */
  244. jmp copy_user_handle_tail
  245. .previous
  246. .section __ex_table,"a"
  247. .align 8
  248. .quad 1b,11b
  249. .quad 3b,12b
  250. .previous
  251. CFI_ENDPROC
  252. ENDPROC(copy_user_generic_string)
  253. /*
  254. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  255. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  256. *
  257. * Input:
  258. * rdi destination
  259. * rsi source
  260. * rdx count
  261. *
  262. * Output:
  263. * eax uncopied bytes or 0 if successful.
  264. */
  265. ENTRY(copy_user_enhanced_fast_string)
  266. CFI_STARTPROC
  267. andl %edx,%edx
  268. jz 2f
  269. movl %edx,%ecx
  270. 1: rep
  271. movsb
  272. 2: xorl %eax,%eax
  273. ret
  274. .section .fixup,"ax"
  275. 12: movl %ecx,%edx /* ecx is zerorest also */
  276. jmp copy_user_handle_tail
  277. .previous
  278. .section __ex_table,"a"
  279. .align 8
  280. .quad 1b,12b
  281. .previous
  282. CFI_ENDPROC
  283. ENDPROC(copy_user_enhanced_fast_string)