aio.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  3. * Licensed under the GPL
  4. */
  5. #include <unistd.h>
  6. #include <sched.h>
  7. #include <signal.h>
  8. #include <errno.h>
  9. #include <sys/time.h>
  10. #include <asm/unistd.h>
  11. #include "aio.h"
  12. #include "init.h"
  13. #include "kern_constants.h"
  14. #include "kern_util.h"
  15. #include "os.h"
  16. #include "user.h"
  17. struct aio_thread_req {
  18. enum aio_type type;
  19. int io_fd;
  20. unsigned long long offset;
  21. char *buf;
  22. int len;
  23. struct aio_context *aio;
  24. };
  25. #if defined(HAVE_AIO_ABI)
  26. #include <linux/aio_abi.h>
  27. /*
  28. * If we have the headers, we are going to build with AIO enabled.
  29. * If we don't have aio in libc, we define the necessary stubs here.
  30. */
  31. #if !defined(HAVE_AIO_LIBC)
  32. static long io_setup(int n, aio_context_t *ctxp)
  33. {
  34. return syscall(__NR_io_setup, n, ctxp);
  35. }
  36. static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
  37. {
  38. return syscall(__NR_io_submit, ctx, nr, iocbpp);
  39. }
  40. static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
  41. struct io_event *events, struct timespec *timeout)
  42. {
  43. return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
  44. }
  45. #endif
  46. /*
  47. * The AIO_MMAP cases force the mmapped page into memory here
  48. * rather than in whatever place first touches the data. I used
  49. * to do this by touching the page, but that's delicate because
  50. * gcc is prone to optimizing that away. So, what's done here
  51. * is we read from the descriptor from which the page was
  52. * mapped. The caller is required to pass an offset which is
  53. * inside the page that was mapped. Thus, when the read
  54. * returns, we know that the page is in the page cache, and
  55. * that it now backs the mmapped area.
  56. */
  57. static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
  58. int len, unsigned long long offset, struct aio_context *aio)
  59. {
  60. struct iocb *iocbp = & ((struct iocb) {
  61. .aio_data = (unsigned long) aio,
  62. .aio_fildes = fd,
  63. .aio_buf = (unsigned long) buf,
  64. .aio_nbytes = len,
  65. .aio_offset = offset
  66. });
  67. char c;
  68. switch (type) {
  69. case AIO_READ:
  70. iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  71. break;
  72. case AIO_WRITE:
  73. iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
  74. break;
  75. case AIO_MMAP:
  76. iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  77. iocbp->aio_buf = (unsigned long) &c;
  78. iocbp->aio_nbytes = sizeof(c);
  79. break;
  80. default:
  81. printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
  82. return -EINVAL;
  83. }
  84. return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
  85. }
  86. /* Initialized in an initcall and unchanged thereafter */
  87. static aio_context_t ctx = 0;
  88. static int aio_thread(void *arg)
  89. {
  90. struct aio_thread_reply reply;
  91. struct io_event event;
  92. int err, n, reply_fd;
  93. signal(SIGWINCH, SIG_IGN);
  94. while (1) {
  95. n = io_getevents(ctx, 1, 1, &event, NULL);
  96. if (n < 0) {
  97. if (errno == EINTR)
  98. continue;
  99. printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
  100. "errno = %d\n", errno);
  101. }
  102. else {
  103. reply = ((struct aio_thread_reply)
  104. { .data = (void *) (long) event.data,
  105. .err = event.res });
  106. reply_fd = ((struct aio_context *) reply.data)->reply_fd;
  107. err = write(reply_fd, &reply, sizeof(reply));
  108. if (err != sizeof(reply))
  109. printk(UM_KERN_ERR "aio_thread - write failed, "
  110. "fd = %d, err = %d\n", reply_fd, errno);
  111. }
  112. }
  113. return 0;
  114. }
  115. #endif
  116. static int do_not_aio(struct aio_thread_req *req)
  117. {
  118. char c;
  119. unsigned long long actual;
  120. int n;
  121. actual = lseek64(req->io_fd, req->offset, SEEK_SET);
  122. if (actual != req->offset)
  123. return -errno;
  124. switch (req->type) {
  125. case AIO_READ:
  126. n = read(req->io_fd, req->buf, req->len);
  127. break;
  128. case AIO_WRITE:
  129. n = write(req->io_fd, req->buf, req->len);
  130. break;
  131. case AIO_MMAP:
  132. n = read(req->io_fd, &c, sizeof(c));
  133. break;
  134. default:
  135. printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
  136. req->type);
  137. return -EINVAL;
  138. }
  139. if (n < 0)
  140. return -errno;
  141. return 0;
  142. }
  143. /* These are initialized in initcalls and not changed */
  144. static int aio_req_fd_r = -1;
  145. static int aio_req_fd_w = -1;
  146. static int aio_pid = -1;
  147. static unsigned long aio_stack;
  148. static int not_aio_thread(void *arg)
  149. {
  150. struct aio_thread_req req;
  151. struct aio_thread_reply reply;
  152. int err;
  153. signal(SIGWINCH, SIG_IGN);
  154. while (1) {
  155. err = read(aio_req_fd_r, &req, sizeof(req));
  156. if (err != sizeof(req)) {
  157. if (err < 0)
  158. printk(UM_KERN_ERR "not_aio_thread - "
  159. "read failed, fd = %d, err = %d\n",
  160. aio_req_fd_r,
  161. errno);
  162. else {
  163. printk(UM_KERN_ERR "not_aio_thread - short "
  164. "read, fd = %d, length = %d\n",
  165. aio_req_fd_r, err);
  166. }
  167. continue;
  168. }
  169. err = do_not_aio(&req);
  170. reply = ((struct aio_thread_reply) { .data = req.aio,
  171. .err = err });
  172. err = write(req.aio->reply_fd, &reply, sizeof(reply));
  173. if (err != sizeof(reply))
  174. printk(UM_KERN_ERR "not_aio_thread - write failed, "
  175. "fd = %d, err = %d\n", req.aio->reply_fd, errno);
  176. }
  177. return 0;
  178. }
  179. static int init_aio_24(void)
  180. {
  181. int fds[2], err;
  182. err = os_pipe(fds, 1, 1);
  183. if (err)
  184. goto out;
  185. aio_req_fd_w = fds[0];
  186. aio_req_fd_r = fds[1];
  187. err = os_set_fd_block(aio_req_fd_w, 0);
  188. if (err)
  189. goto out_close_pipe;
  190. err = run_helper_thread(not_aio_thread, NULL,
  191. CLONE_FILES | CLONE_VM, &aio_stack);
  192. if (err < 0)
  193. goto out_close_pipe;
  194. aio_pid = err;
  195. goto out;
  196. out_close_pipe:
  197. close(fds[0]);
  198. close(fds[1]);
  199. aio_req_fd_w = -1;
  200. aio_req_fd_r = -1;
  201. out:
  202. #ifndef HAVE_AIO_ABI
  203. printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
  204. "build\n");
  205. #endif
  206. printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
  207. "I/O thread\n");
  208. return 0;
  209. }
  210. #ifdef HAVE_AIO_ABI
  211. #define DEFAULT_24_AIO 0
  212. static int init_aio_26(void)
  213. {
  214. int err;
  215. if (io_setup(256, &ctx)) {
  216. err = -errno;
  217. printk(UM_KERN_ERR "aio_thread failed to initialize context, "
  218. "err = %d\n", errno);
  219. return err;
  220. }
  221. err = run_helper_thread(aio_thread, NULL,
  222. CLONE_FILES | CLONE_VM, &aio_stack);
  223. if (err < 0)
  224. return err;
  225. aio_pid = err;
  226. printk(UM_KERN_INFO "Using 2.6 host AIO\n");
  227. return 0;
  228. }
  229. static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
  230. unsigned long long offset, struct aio_context *aio)
  231. {
  232. struct aio_thread_reply reply;
  233. int err;
  234. err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
  235. if (err) {
  236. reply = ((struct aio_thread_reply) { .data = aio,
  237. .err = err });
  238. err = write(aio->reply_fd, &reply, sizeof(reply));
  239. if (err != sizeof(reply)) {
  240. err = -errno;
  241. printk(UM_KERN_ERR "submit_aio_26 - write failed, "
  242. "fd = %d, err = %d\n", aio->reply_fd, -err);
  243. }
  244. else err = 0;
  245. }
  246. return err;
  247. }
  248. #else
  249. #define DEFAULT_24_AIO 1
  250. static int init_aio_26(void)
  251. {
  252. return -ENOSYS;
  253. }
  254. static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
  255. unsigned long long offset, struct aio_context *aio)
  256. {
  257. return -ENOSYS;
  258. }
  259. #endif
  260. /* Initialized in an initcall and unchanged thereafter */
  261. static int aio_24 = DEFAULT_24_AIO;
  262. static int __init set_aio_24(char *name, int *add)
  263. {
  264. aio_24 = 1;
  265. return 0;
  266. }
  267. __uml_setup("aio=2.4", set_aio_24,
  268. "aio=2.4\n"
  269. " This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
  270. " available. 2.4 AIO is a single thread that handles one request at a\n"
  271. " time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
  272. " interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
  273. " is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
  274. " /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
  275. " include aio_abi.h, so you will need to copy it from a kernel tree to\n"
  276. " your /usr/include/linux in order to build an AIO-capable UML\n\n"
  277. );
  278. static int init_aio(void)
  279. {
  280. int err;
  281. if (!aio_24) {
  282. err = init_aio_26();
  283. if (err && (errno == ENOSYS)) {
  284. printk(UM_KERN_INFO "2.6 AIO not supported on the "
  285. "host - reverting to 2.4 AIO\n");
  286. aio_24 = 1;
  287. }
  288. else return err;
  289. }
  290. if (aio_24)
  291. return init_aio_24();
  292. return 0;
  293. }
  294. /*
  295. * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
  296. * needs to be called when the kernel is running because it calls run_helper,
  297. * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
  298. * kernel does not run __exitcalls on shutdown, and can't because many of them
  299. * break when called outside of module unloading.
  300. */
  301. __initcall(init_aio);
  302. static void exit_aio(void)
  303. {
  304. if (aio_pid != -1) {
  305. os_kill_process(aio_pid, 1);
  306. free_stack(aio_stack, 0);
  307. }
  308. }
  309. __uml_exitcall(exit_aio);
  310. static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
  311. unsigned long long offset, struct aio_context *aio)
  312. {
  313. struct aio_thread_req req = { .type = type,
  314. .io_fd = io_fd,
  315. .offset = offset,
  316. .buf = buf,
  317. .len = len,
  318. .aio = aio,
  319. };
  320. int err;
  321. err = write(aio_req_fd_w, &req, sizeof(req));
  322. if (err == sizeof(req))
  323. err = 0;
  324. else err = -errno;
  325. return err;
  326. }
  327. int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
  328. unsigned long long offset, int reply_fd,
  329. struct aio_context *aio)
  330. {
  331. aio->reply_fd = reply_fd;
  332. if (aio_24)
  333. return submit_aio_24(type, io_fd, buf, len, offset, aio);
  334. else
  335. return submit_aio_26(type, io_fd, buf, len, offset, aio);
  336. }