copy_user_64.S 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeatures.h>
  13. #include <asm/alternative-asm.h>
  14. #include <asm/asm.h>
  15. #include <asm/smap.h>
  16. #include <asm/export.h>
  17. /* Standard copy_to_user with segment limit checking */
  18. ENTRY(_copy_to_user)
  19. mov PER_CPU_VAR(current_task), %rax
  20. movq %rdi,%rcx
  21. addq %rdx,%rcx
  22. jc bad_to_user
  23. cmpq TASK_addr_limit(%rax),%rcx
  24. ja bad_to_user
  25. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  26. "jmp copy_user_generic_string", \
  27. X86_FEATURE_REP_GOOD, \
  28. "jmp copy_user_enhanced_fast_string", \
  29. X86_FEATURE_ERMS
  30. ENDPROC(_copy_to_user)
  31. EXPORT_SYMBOL(_copy_to_user)
  32. /* Standard copy_from_user with segment limit checking */
  33. ENTRY(_copy_from_user)
  34. mov PER_CPU_VAR(current_task), %rax
  35. movq %rsi,%rcx
  36. addq %rdx,%rcx
  37. jc bad_from_user
  38. cmpq TASK_addr_limit(%rax),%rcx
  39. ja bad_from_user
  40. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  41. "jmp copy_user_generic_string", \
  42. X86_FEATURE_REP_GOOD, \
  43. "jmp copy_user_enhanced_fast_string", \
  44. X86_FEATURE_ERMS
  45. ENDPROC(_copy_from_user)
  46. EXPORT_SYMBOL(_copy_from_user)
  47. .section .fixup,"ax"
  48. /* must zero dest */
  49. ENTRY(bad_from_user)
  50. bad_from_user:
  51. movl %edx,%ecx
  52. xorl %eax,%eax
  53. rep
  54. stosb
  55. bad_to_user:
  56. movl %edx,%eax
  57. ret
  58. ENDPROC(bad_from_user)
  59. .previous
  60. /*
  61. * copy_user_generic_unrolled - memory copy with exception handling.
  62. * This version is for CPUs like P4 that don't have efficient micro
  63. * code for rep movsq
  64. *
  65. * Input:
  66. * rdi destination
  67. * rsi source
  68. * rdx count
  69. *
  70. * Output:
  71. * eax uncopied bytes or 0 if successful.
  72. */
  73. ENTRY(copy_user_generic_unrolled)
  74. ASM_STAC
  75. cmpl $8,%edx
  76. jb 20f /* less then 8 bytes, go to byte copy loop */
  77. ALIGN_DESTINATION
  78. movl %edx,%ecx
  79. andl $63,%edx
  80. shrl $6,%ecx
  81. jz .L_copy_short_string
  82. 1: movq (%rsi),%r8
  83. 2: movq 1*8(%rsi),%r9
  84. 3: movq 2*8(%rsi),%r10
  85. 4: movq 3*8(%rsi),%r11
  86. 5: movq %r8,(%rdi)
  87. 6: movq %r9,1*8(%rdi)
  88. 7: movq %r10,2*8(%rdi)
  89. 8: movq %r11,3*8(%rdi)
  90. 9: movq 4*8(%rsi),%r8
  91. 10: movq 5*8(%rsi),%r9
  92. 11: movq 6*8(%rsi),%r10
  93. 12: movq 7*8(%rsi),%r11
  94. 13: movq %r8,4*8(%rdi)
  95. 14: movq %r9,5*8(%rdi)
  96. 15: movq %r10,6*8(%rdi)
  97. 16: movq %r11,7*8(%rdi)
  98. leaq 64(%rsi),%rsi
  99. leaq 64(%rdi),%rdi
  100. decl %ecx
  101. jnz 1b
  102. .L_copy_short_string:
  103. movl %edx,%ecx
  104. andl $7,%edx
  105. shrl $3,%ecx
  106. jz 20f
  107. 18: movq (%rsi),%r8
  108. 19: movq %r8,(%rdi)
  109. leaq 8(%rsi),%rsi
  110. leaq 8(%rdi),%rdi
  111. decl %ecx
  112. jnz 18b
  113. 20: andl %edx,%edx
  114. jz 23f
  115. movl %edx,%ecx
  116. 21: movb (%rsi),%al
  117. 22: movb %al,(%rdi)
  118. incq %rsi
  119. incq %rdi
  120. decl %ecx
  121. jnz 21b
  122. 23: xor %eax,%eax
  123. ASM_CLAC
  124. ret
  125. .section .fixup,"ax"
  126. 30: shll $6,%ecx
  127. addl %ecx,%edx
  128. jmp 60f
  129. 40: leal (%rdx,%rcx,8),%edx
  130. jmp 60f
  131. 50: movl %ecx,%edx
  132. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  133. .previous
  134. _ASM_EXTABLE(1b,30b)
  135. _ASM_EXTABLE(2b,30b)
  136. _ASM_EXTABLE(3b,30b)
  137. _ASM_EXTABLE(4b,30b)
  138. _ASM_EXTABLE(5b,30b)
  139. _ASM_EXTABLE(6b,30b)
  140. _ASM_EXTABLE(7b,30b)
  141. _ASM_EXTABLE(8b,30b)
  142. _ASM_EXTABLE(9b,30b)
  143. _ASM_EXTABLE(10b,30b)
  144. _ASM_EXTABLE(11b,30b)
  145. _ASM_EXTABLE(12b,30b)
  146. _ASM_EXTABLE(13b,30b)
  147. _ASM_EXTABLE(14b,30b)
  148. _ASM_EXTABLE(15b,30b)
  149. _ASM_EXTABLE(16b,30b)
  150. _ASM_EXTABLE(18b,40b)
  151. _ASM_EXTABLE(19b,40b)
  152. _ASM_EXTABLE(21b,50b)
  153. _ASM_EXTABLE(22b,50b)
  154. ENDPROC(copy_user_generic_unrolled)
  155. EXPORT_SYMBOL(copy_user_generic_unrolled)
  156. /* Some CPUs run faster using the string copy instructions.
  157. * This is also a lot simpler. Use them when possible.
  158. *
  159. * Only 4GB of copy is supported. This shouldn't be a problem
  160. * because the kernel normally only writes from/to page sized chunks
  161. * even if user space passed a longer buffer.
  162. * And more would be dangerous because both Intel and AMD have
  163. * errata with rep movsq > 4GB. If someone feels the need to fix
  164. * this please consider this.
  165. *
  166. * Input:
  167. * rdi destination
  168. * rsi source
  169. * rdx count
  170. *
  171. * Output:
  172. * eax uncopied bytes or 0 if successful.
  173. */
  174. ENTRY(copy_user_generic_string)
  175. ASM_STAC
  176. cmpl $8,%edx
  177. jb 2f /* less than 8 bytes, go to byte copy loop */
  178. ALIGN_DESTINATION
  179. movl %edx,%ecx
  180. shrl $3,%ecx
  181. andl $7,%edx
  182. 1: rep
  183. movsq
  184. 2: movl %edx,%ecx
  185. 3: rep
  186. movsb
  187. xorl %eax,%eax
  188. ASM_CLAC
  189. ret
  190. .section .fixup,"ax"
  191. 11: leal (%rdx,%rcx,8),%ecx
  192. 12: movl %ecx,%edx /* ecx is zerorest also */
  193. jmp copy_user_handle_tail
  194. .previous
  195. _ASM_EXTABLE(1b,11b)
  196. _ASM_EXTABLE(3b,12b)
  197. ENDPROC(copy_user_generic_string)
  198. EXPORT_SYMBOL(copy_user_generic_string)
  199. /*
  200. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  201. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  202. *
  203. * Input:
  204. * rdi destination
  205. * rsi source
  206. * rdx count
  207. *
  208. * Output:
  209. * eax uncopied bytes or 0 if successful.
  210. */
  211. ENTRY(copy_user_enhanced_fast_string)
  212. ASM_STAC
  213. cmpl $64,%edx
  214. jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
  215. movl %edx,%ecx
  216. 1: rep
  217. movsb
  218. xorl %eax,%eax
  219. ASM_CLAC
  220. ret
  221. .section .fixup,"ax"
  222. 12: movl %ecx,%edx /* ecx is zerorest also */
  223. jmp copy_user_handle_tail
  224. .previous
  225. _ASM_EXTABLE(1b,12b)
  226. ENDPROC(copy_user_enhanced_fast_string)
  227. EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  228. /*
  229. * copy_user_nocache - Uncached memory copy with exception handling
  230. * This will force destination out of cache for more performance.
  231. *
  232. * Note: Cached memory copy is used when destination or size is not
  233. * naturally aligned. That is:
  234. * - Require 8-byte alignment when size is 8 bytes or larger.
  235. * - Require 4-byte alignment when size is 4 bytes.
  236. */
  237. ENTRY(__copy_user_nocache)
  238. ASM_STAC
  239. /* If size is less than 8 bytes, go to 4-byte copy */
  240. cmpl $8,%edx
  241. jb .L_4b_nocache_copy_entry
  242. /* If destination is not 8-byte aligned, "cache" copy to align it */
  243. ALIGN_DESTINATION
  244. /* Set 4x8-byte copy count and remainder */
  245. movl %edx,%ecx
  246. andl $63,%edx
  247. shrl $6,%ecx
  248. jz .L_8b_nocache_copy_entry /* jump if count is 0 */
  249. /* Perform 4x8-byte nocache loop-copy */
  250. .L_4x8b_nocache_copy_loop:
  251. 1: movq (%rsi),%r8
  252. 2: movq 1*8(%rsi),%r9
  253. 3: movq 2*8(%rsi),%r10
  254. 4: movq 3*8(%rsi),%r11
  255. 5: movnti %r8,(%rdi)
  256. 6: movnti %r9,1*8(%rdi)
  257. 7: movnti %r10,2*8(%rdi)
  258. 8: movnti %r11,3*8(%rdi)
  259. 9: movq 4*8(%rsi),%r8
  260. 10: movq 5*8(%rsi),%r9
  261. 11: movq 6*8(%rsi),%r10
  262. 12: movq 7*8(%rsi),%r11
  263. 13: movnti %r8,4*8(%rdi)
  264. 14: movnti %r9,5*8(%rdi)
  265. 15: movnti %r10,6*8(%rdi)
  266. 16: movnti %r11,7*8(%rdi)
  267. leaq 64(%rsi),%rsi
  268. leaq 64(%rdi),%rdi
  269. decl %ecx
  270. jnz .L_4x8b_nocache_copy_loop
  271. /* Set 8-byte copy count and remainder */
  272. .L_8b_nocache_copy_entry:
  273. movl %edx,%ecx
  274. andl $7,%edx
  275. shrl $3,%ecx
  276. jz .L_4b_nocache_copy_entry /* jump if count is 0 */
  277. /* Perform 8-byte nocache loop-copy */
  278. .L_8b_nocache_copy_loop:
  279. 20: movq (%rsi),%r8
  280. 21: movnti %r8,(%rdi)
  281. leaq 8(%rsi),%rsi
  282. leaq 8(%rdi),%rdi
  283. decl %ecx
  284. jnz .L_8b_nocache_copy_loop
  285. /* If no byte left, we're done */
  286. .L_4b_nocache_copy_entry:
  287. andl %edx,%edx
  288. jz .L_finish_copy
  289. /* If destination is not 4-byte aligned, go to byte copy: */
  290. movl %edi,%ecx
  291. andl $3,%ecx
  292. jnz .L_1b_cache_copy_entry
  293. /* Set 4-byte copy count (1 or 0) and remainder */
  294. movl %edx,%ecx
  295. andl $3,%edx
  296. shrl $2,%ecx
  297. jz .L_1b_cache_copy_entry /* jump if count is 0 */
  298. /* Perform 4-byte nocache copy: */
  299. 30: movl (%rsi),%r8d
  300. 31: movnti %r8d,(%rdi)
  301. leaq 4(%rsi),%rsi
  302. leaq 4(%rdi),%rdi
  303. /* If no bytes left, we're done: */
  304. andl %edx,%edx
  305. jz .L_finish_copy
  306. /* Perform byte "cache" loop-copy for the remainder */
  307. .L_1b_cache_copy_entry:
  308. movl %edx,%ecx
  309. .L_1b_cache_copy_loop:
  310. 40: movb (%rsi),%al
  311. 41: movb %al,(%rdi)
  312. incq %rsi
  313. incq %rdi
  314. decl %ecx
  315. jnz .L_1b_cache_copy_loop
  316. /* Finished copying; fence the prior stores */
  317. .L_finish_copy:
  318. xorl %eax,%eax
  319. ASM_CLAC
  320. sfence
  321. ret
  322. .section .fixup,"ax"
  323. .L_fixup_4x8b_copy:
  324. shll $6,%ecx
  325. addl %ecx,%edx
  326. jmp .L_fixup_handle_tail
  327. .L_fixup_8b_copy:
  328. lea (%rdx,%rcx,8),%rdx
  329. jmp .L_fixup_handle_tail
  330. .L_fixup_4b_copy:
  331. lea (%rdx,%rcx,4),%rdx
  332. jmp .L_fixup_handle_tail
  333. .L_fixup_1b_copy:
  334. movl %ecx,%edx
  335. .L_fixup_handle_tail:
  336. sfence
  337. jmp copy_user_handle_tail
  338. .previous
  339. _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
  340. _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
  341. _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
  342. _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
  343. _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
  344. _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
  345. _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
  346. _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
  347. _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
  348. _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
  349. _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
  350. _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
  351. _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
  352. _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
  353. _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
  354. _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
  355. _ASM_EXTABLE(20b,.L_fixup_8b_copy)
  356. _ASM_EXTABLE(21b,.L_fixup_8b_copy)
  357. _ASM_EXTABLE(30b,.L_fixup_4b_copy)
  358. _ASM_EXTABLE(31b,.L_fixup_4b_copy)
  359. _ASM_EXTABLE(40b,.L_fixup_1b_copy)
  360. _ASM_EXTABLE(41b,.L_fixup_1b_copy)
  361. ENDPROC(__copy_user_nocache)
  362. EXPORT_SYMBOL(__copy_user_nocache)