process_vm_access.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. /*
  2. * linux/mm/process_vm_access.c
  3. *
  4. * Copyright (C) 2010-2011 Christopher Yeoh <cyeoh@au1.ibm.com>, IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/mm.h>
  12. #include <linux/uio.h>
  13. #include <linux/sched.h>
  14. #include <linux/highmem.h>
  15. #include <linux/ptrace.h>
  16. #include <linux/slab.h>
  17. #include <linux/syscalls.h>
  18. #ifdef CONFIG_COMPAT
  19. #include <linux/compat.h>
  20. #endif
  21. /**
  22. * process_vm_rw_pages - read/write pages from task specified
  23. * @task: task to read/write from
  24. * @mm: mm for task
  25. * @process_pages: struct pages area that can store at least
  26. * nr_pages_to_copy struct page pointers
  27. * @pa: address of page in task to start copying from/to
  28. * @start_offset: offset in page to start copying from/to
  29. * @len: number of bytes to copy
  30. * @lvec: iovec array specifying where to copy to/from
  31. * @lvec_cnt: number of elements in iovec array
  32. * @lvec_current: index in iovec array we are up to
  33. * @lvec_offset: offset in bytes from current iovec iov_base we are up to
  34. * @vm_write: 0 means copy from, 1 means copy to
  35. * @nr_pages_to_copy: number of pages to copy
  36. * @bytes_copied: returns number of bytes successfully copied
  37. * Returns 0 on success, error code otherwise
  38. */
  39. static int process_vm_rw_pages(struct task_struct *task,
  40. struct mm_struct *mm,
  41. struct page **process_pages,
  42. unsigned long pa,
  43. unsigned long start_offset,
  44. unsigned long len,
  45. const struct iovec *lvec,
  46. unsigned long lvec_cnt,
  47. unsigned long *lvec_current,
  48. size_t *lvec_offset,
  49. int vm_write,
  50. unsigned int nr_pages_to_copy,
  51. ssize_t *bytes_copied)
  52. {
  53. int pages_pinned;
  54. void *target_kaddr;
  55. int pgs_copied = 0;
  56. int j;
  57. int ret;
  58. ssize_t bytes_to_copy;
  59. ssize_t rc = 0;
  60. *bytes_copied = 0;
  61. /* Get the pages we're interested in */
  62. down_read(&mm->mmap_sem);
  63. pages_pinned = get_user_pages(task, mm, pa,
  64. nr_pages_to_copy,
  65. vm_write, 0, process_pages, NULL);
  66. up_read(&mm->mmap_sem);
  67. if (pages_pinned != nr_pages_to_copy) {
  68. rc = -EFAULT;
  69. goto end;
  70. }
  71. /* Do the copy for each page */
  72. for (pgs_copied = 0;
  73. (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt);
  74. pgs_copied++) {
  75. /* Make sure we have a non zero length iovec */
  76. while (*lvec_current < lvec_cnt
  77. && lvec[*lvec_current].iov_len == 0)
  78. (*lvec_current)++;
  79. if (*lvec_current == lvec_cnt)
  80. break;
  81. /*
  82. * Will copy smallest of:
  83. * - bytes remaining in page
  84. * - bytes remaining in destination iovec
  85. */
  86. bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
  87. len - *bytes_copied);
  88. bytes_to_copy = min_t(ssize_t, bytes_to_copy,
  89. lvec[*lvec_current].iov_len
  90. - *lvec_offset);
  91. target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
  92. if (vm_write)
  93. ret = copy_from_user(target_kaddr,
  94. lvec[*lvec_current].iov_base
  95. + *lvec_offset,
  96. bytes_to_copy);
  97. else
  98. ret = copy_to_user(lvec[*lvec_current].iov_base
  99. + *lvec_offset,
  100. target_kaddr, bytes_to_copy);
  101. kunmap(process_pages[pgs_copied]);
  102. if (ret) {
  103. *bytes_copied += bytes_to_copy - ret;
  104. pgs_copied++;
  105. rc = -EFAULT;
  106. goto end;
  107. }
  108. *bytes_copied += bytes_to_copy;
  109. *lvec_offset += bytes_to_copy;
  110. if (*lvec_offset == lvec[*lvec_current].iov_len) {
  111. /*
  112. * Need to copy remaining part of page into the
  113. * next iovec if there are any bytes left in page
  114. */
  115. (*lvec_current)++;
  116. *lvec_offset = 0;
  117. start_offset = (start_offset + bytes_to_copy)
  118. % PAGE_SIZE;
  119. if (start_offset)
  120. pgs_copied--;
  121. } else {
  122. start_offset = 0;
  123. }
  124. }
  125. end:
  126. if (vm_write) {
  127. for (j = 0; j < pages_pinned; j++) {
  128. if (j < pgs_copied)
  129. set_page_dirty_lock(process_pages[j]);
  130. put_page(process_pages[j]);
  131. }
  132. } else {
  133. for (j = 0; j < pages_pinned; j++)
  134. put_page(process_pages[j]);
  135. }
  136. return rc;
  137. }
  138. /* Maximum number of pages kmalloc'd to hold struct page's during copy */
  139. #define PVM_MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
  140. /**
  141. * process_vm_rw_single_vec - read/write pages from task specified
  142. * @addr: start memory address of target process
  143. * @len: size of area to copy to/from
  144. * @lvec: iovec array specifying where to copy to/from locally
  145. * @lvec_cnt: number of elements in iovec array
  146. * @lvec_current: index in iovec array we are up to
  147. * @lvec_offset: offset in bytes from current iovec iov_base we are up to
  148. * @process_pages: struct pages area that can store at least
  149. * nr_pages_to_copy struct page pointers
  150. * @mm: mm for task
  151. * @task: task to read/write from
  152. * @vm_write: 0 means copy from, 1 means copy to
  153. * @bytes_copied: returns number of bytes successfully copied
  154. * Returns 0 on success or on failure error code
  155. */
  156. static int process_vm_rw_single_vec(unsigned long addr,
  157. unsigned long len,
  158. const struct iovec *lvec,
  159. unsigned long lvec_cnt,
  160. unsigned long *lvec_current,
  161. size_t *lvec_offset,
  162. struct page **process_pages,
  163. struct mm_struct *mm,
  164. struct task_struct *task,
  165. int vm_write,
  166. ssize_t *bytes_copied)
  167. {
  168. unsigned long pa = addr & PAGE_MASK;
  169. unsigned long start_offset = addr - pa;
  170. unsigned long nr_pages;
  171. ssize_t bytes_copied_loop;
  172. ssize_t rc = 0;
  173. unsigned long nr_pages_copied = 0;
  174. unsigned long nr_pages_to_copy;
  175. unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
  176. / sizeof(struct pages *);
  177. *bytes_copied = 0;
  178. /* Work out address and page range required */
  179. if (len == 0)
  180. return 0;
  181. nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
  182. while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) {
  183. nr_pages_to_copy = min(nr_pages - nr_pages_copied,
  184. max_pages_per_loop);
  185. rc = process_vm_rw_pages(task, mm, process_pages, pa,
  186. start_offset, len,
  187. lvec, lvec_cnt,
  188. lvec_current, lvec_offset,
  189. vm_write, nr_pages_to_copy,
  190. &bytes_copied_loop);
  191. start_offset = 0;
  192. *bytes_copied += bytes_copied_loop;
  193. if (rc < 0) {
  194. return rc;
  195. } else {
  196. len -= bytes_copied_loop;
  197. nr_pages_copied += nr_pages_to_copy;
  198. pa += nr_pages_to_copy * PAGE_SIZE;
  199. }
  200. }
  201. return rc;
  202. }
  203. /* Maximum number of entries for process pages array
  204. which lives on stack */
  205. #define PVM_MAX_PP_ARRAY_COUNT 16
  206. /**
  207. * process_vm_rw_core - core of reading/writing pages from task specified
  208. * @pid: PID of process to read/write from/to
  209. * @lvec: iovec array specifying where to copy to/from locally
  210. * @liovcnt: size of lvec array
  211. * @rvec: iovec array specifying where to copy to/from in the other process
  212. * @riovcnt: size of rvec array
  213. * @flags: currently unused
  214. * @vm_write: 0 if reading from other process, 1 if writing to other process
  215. * Returns the number of bytes read/written or error code. May
  216. * return less bytes than expected if an error occurs during the copying
  217. * process.
  218. */
  219. static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
  220. unsigned long liovcnt,
  221. const struct iovec *rvec,
  222. unsigned long riovcnt,
  223. unsigned long flags, int vm_write)
  224. {
  225. struct task_struct *task;
  226. struct page *pp_stack[PVM_MAX_PP_ARRAY_COUNT];
  227. struct page **process_pages = pp_stack;
  228. struct mm_struct *mm;
  229. unsigned long i;
  230. ssize_t rc = 0;
  231. ssize_t bytes_copied_loop;
  232. ssize_t bytes_copied = 0;
  233. unsigned long nr_pages = 0;
  234. unsigned long nr_pages_iov;
  235. unsigned long iov_l_curr_idx = 0;
  236. size_t iov_l_curr_offset = 0;
  237. ssize_t iov_len;
  238. /*
  239. * Work out how many pages of struct pages we're going to need
  240. * when eventually calling get_user_pages
  241. */
  242. for (i = 0; i < riovcnt; i++) {
  243. iov_len = rvec[i].iov_len;
  244. if (iov_len > 0) {
  245. nr_pages_iov = ((unsigned long)rvec[i].iov_base
  246. + iov_len)
  247. / PAGE_SIZE - (unsigned long)rvec[i].iov_base
  248. / PAGE_SIZE + 1;
  249. nr_pages = max(nr_pages, nr_pages_iov);
  250. }
  251. }
  252. if (nr_pages == 0)
  253. return 0;
  254. if (nr_pages > PVM_MAX_PP_ARRAY_COUNT) {
  255. /* For reliability don't try to kmalloc more than
  256. 2 pages worth */
  257. process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES,
  258. sizeof(struct pages *)*nr_pages),
  259. GFP_KERNEL);
  260. if (!process_pages)
  261. return -ENOMEM;
  262. }
  263. /* Get process information */
  264. rcu_read_lock();
  265. task = find_task_by_vpid(pid);
  266. if (task)
  267. get_task_struct(task);
  268. rcu_read_unlock();
  269. if (!task) {
  270. rc = -ESRCH;
  271. goto free_proc_pages;
  272. }
  273. mm = mm_access(task, PTRACE_MODE_ATTACH);
  274. if (!mm || IS_ERR(mm)) {
  275. rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
  276. /*
  277. * Explicitly map EACCES to EPERM as EPERM is a more a
  278. * appropriate error code for process_vw_readv/writev
  279. */
  280. if (rc == -EACCES)
  281. rc = -EPERM;
  282. goto put_task_struct;
  283. }
  284. for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
  285. rc = process_vm_rw_single_vec(
  286. (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
  287. lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset,
  288. process_pages, mm, task, vm_write, &bytes_copied_loop);
  289. bytes_copied += bytes_copied_loop;
  290. if (rc != 0) {
  291. /* If we have managed to copy any data at all then
  292. we return the number of bytes copied. Otherwise
  293. we return the error code */
  294. if (bytes_copied)
  295. rc = bytes_copied;
  296. goto put_mm;
  297. }
  298. }
  299. rc = bytes_copied;
  300. put_mm:
  301. mmput(mm);
  302. put_task_struct:
  303. put_task_struct(task);
  304. free_proc_pages:
  305. if (process_pages != pp_stack)
  306. kfree(process_pages);
  307. return rc;
  308. }
  309. /**
  310. * process_vm_rw - check iovecs before calling core routine
  311. * @pid: PID of process to read/write from/to
  312. * @lvec: iovec array specifying where to copy to/from locally
  313. * @liovcnt: size of lvec array
  314. * @rvec: iovec array specifying where to copy to/from in the other process
  315. * @riovcnt: size of rvec array
  316. * @flags: currently unused
  317. * @vm_write: 0 if reading from other process, 1 if writing to other process
  318. * Returns the number of bytes read/written or error code. May
  319. * return less bytes than expected if an error occurs during the copying
  320. * process.
  321. */
  322. static ssize_t process_vm_rw(pid_t pid,
  323. const struct iovec __user *lvec,
  324. unsigned long liovcnt,
  325. const struct iovec __user *rvec,
  326. unsigned long riovcnt,
  327. unsigned long flags, int vm_write)
  328. {
  329. struct iovec iovstack_l[UIO_FASTIOV];
  330. struct iovec iovstack_r[UIO_FASTIOV];
  331. struct iovec *iov_l = iovstack_l;
  332. struct iovec *iov_r = iovstack_r;
  333. ssize_t rc;
  334. if (flags != 0)
  335. return -EINVAL;
  336. /* Check iovecs */
  337. if (vm_write)
  338. rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
  339. iovstack_l, &iov_l);
  340. else
  341. rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
  342. iovstack_l, &iov_l);
  343. if (rc <= 0)
  344. goto free_iovecs;
  345. rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
  346. iovstack_r, &iov_r);
  347. if (rc <= 0)
  348. goto free_iovecs;
  349. rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
  350. vm_write);
  351. free_iovecs:
  352. if (iov_r != iovstack_r)
  353. kfree(iov_r);
  354. if (iov_l != iovstack_l)
  355. kfree(iov_l);
  356. return rc;
  357. }
  358. SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec,
  359. unsigned long, liovcnt, const struct iovec __user *, rvec,
  360. unsigned long, riovcnt, unsigned long, flags)
  361. {
  362. return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 0);
  363. }
  364. SYSCALL_DEFINE6(process_vm_writev, pid_t, pid,
  365. const struct iovec __user *, lvec,
  366. unsigned long, liovcnt, const struct iovec __user *, rvec,
  367. unsigned long, riovcnt, unsigned long, flags)
  368. {
  369. return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1);
  370. }
  371. #ifdef CONFIG_COMPAT
  372. asmlinkage ssize_t
  373. compat_process_vm_rw(compat_pid_t pid,
  374. const struct compat_iovec __user *lvec,
  375. unsigned long liovcnt,
  376. const struct compat_iovec __user *rvec,
  377. unsigned long riovcnt,
  378. unsigned long flags, int vm_write)
  379. {
  380. struct iovec iovstack_l[UIO_FASTIOV];
  381. struct iovec iovstack_r[UIO_FASTIOV];
  382. struct iovec *iov_l = iovstack_l;
  383. struct iovec *iov_r = iovstack_r;
  384. ssize_t rc = -EFAULT;
  385. if (flags != 0)
  386. return -EINVAL;
  387. if (vm_write)
  388. rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
  389. UIO_FASTIOV, iovstack_l,
  390. &iov_l);
  391. else
  392. rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
  393. UIO_FASTIOV, iovstack_l,
  394. &iov_l);
  395. if (rc <= 0)
  396. goto free_iovecs;
  397. rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
  398. UIO_FASTIOV, iovstack_r,
  399. &iov_r);
  400. if (rc <= 0)
  401. goto free_iovecs;
  402. rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
  403. vm_write);
  404. free_iovecs:
  405. if (iov_r != iovstack_r)
  406. kfree(iov_r);
  407. if (iov_l != iovstack_l)
  408. kfree(iov_l);
  409. return rc;
  410. }
  411. asmlinkage ssize_t
  412. compat_sys_process_vm_readv(compat_pid_t pid,
  413. const struct compat_iovec __user *lvec,
  414. unsigned long liovcnt,
  415. const struct compat_iovec __user *rvec,
  416. unsigned long riovcnt,
  417. unsigned long flags)
  418. {
  419. return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
  420. riovcnt, flags, 0);
  421. }
  422. asmlinkage ssize_t
  423. compat_sys_process_vm_writev(compat_pid_t pid,
  424. const struct compat_iovec __user *lvec,
  425. unsigned long liovcnt,
  426. const struct compat_iovec __user *rvec,
  427. unsigned long riovcnt,
  428. unsigned long flags)
  429. {
  430. return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
  431. riovcnt, flags, 1);
  432. }
  433. #endif