blob: b126df4ea168e6f9211b289ee4f033835fa7ec66 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027#if defined(HAVE_AIO_ABI)
28#include <linux/aio_abi.h>
29
30/* If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
Jeff Diked50084a2006-01-06 00:18:50 -080038 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070039}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
Jeff Diked50084a2006-01-06 00:18:50 -080043 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070044}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080047 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070048{
Jeff Diked50084a2006-01-06 00:18:50 -080049 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070050}
51
52#endif
53
54/* The AIO_MMAP cases force the mmapped page into memory here
55 * rather than in whatever place first touches the data. I used
56 * to do this by touching the page, but that's delicate because
57 * gcc is prone to optimizing that away. So, what's done here
58 * is we read from the descriptor from which the page was
59 * mapped. The caller is required to pass an offset which is
60 * inside the page that was mapped. Thus, when the read
61 * returns, we know that the page is in the page cache, and
62 * that it now backs the mmapped area.
63 */
64
Jeff Dike91acb212005-10-10 23:10:32 -040065static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080066 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070067{
Jeff Diked50084a2006-01-06 00:18:50 -080068 struct iocb iocb, *iocbp = &iocb;
69 char c;
70 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070071
Jeff Diked50084a2006-01-06 00:18:50 -080072 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
73 .aio_reqprio = 0,
74 .aio_fildes = fd,
75 .aio_buf = (unsigned long) buf,
76 .aio_nbytes = len,
77 .aio_offset = offset,
78 .aio_reserved1 = 0,
79 .aio_reserved2 = 0,
80 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070081
Jeff Diked50084a2006-01-06 00:18:50 -080082 switch(type){
83 case AIO_READ:
84 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
85 err = io_submit(ctx, 1, &iocbp);
86 break;
87 case AIO_WRITE:
88 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_MMAP:
92 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
93 iocb.aio_buf = (unsigned long) &c;
94 iocb.aio_nbytes = sizeof(c);
95 err = io_submit(ctx, 1, &iocbp);
96 break;
97 default:
98 printk("Bogus op in do_aio - %d\n", type);
99 err = -EINVAL;
100 break;
101 }
Jeff Dike09ace812005-09-03 15:57:46 -0700102
Jeff Diked50084a2006-01-06 00:18:50 -0800103 if(err > 0)
104 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700105 else
106 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700107
Jeff Diked50084a2006-01-06 00:18:50 -0800108 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700109}
110
Jeff Dike9683da92007-02-10 01:44:27 -0800111/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
Jeff Diked50084a2006-01-06 00:18:50 -0800116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700119
Jeff Diked50084a2006-01-06 00:18:50 -0800120 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
Jeff Dikea61f3342007-05-06 14:51:35 -0700135 err = write(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800136 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400137 printk("aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700138 "err = %d\n", reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800139 }
140 }
141 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700142}
143
144#endif
145
Jeff Dike91acb212005-10-10 23:10:32 -0400146static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700147{
Jeff Diked50084a2006-01-06 00:18:50 -0800148 char c;
Jeff Dikeef0470c2007-05-06 14:51:33 -0700149 unsigned long long actual;
Jeff Dikea61f3342007-05-06 14:51:35 -0700150 int n;
Jeff Dike75e55842005-09-03 15:57:45 -0700151
Jeff Dikeef0470c2007-05-06 14:51:33 -0700152 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
153 if(actual != req->offset)
154 return -errno;
155
Jeff Diked50084a2006-01-06 00:18:50 -0800156 switch(req->type){
157 case AIO_READ:
Jeff Dikea61f3342007-05-06 14:51:35 -0700158 n = read(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800159 break;
160 case AIO_WRITE:
Jeff Dikea61f3342007-05-06 14:51:35 -0700161 n = write(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800162 break;
163 case AIO_MMAP:
Jeff Dikea61f3342007-05-06 14:51:35 -0700164 n = read(req->io_fd, &c, sizeof(c));
Jeff Diked50084a2006-01-06 00:18:50 -0800165 break;
166 default:
167 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dikea61f3342007-05-06 14:51:35 -0700168 return -EINVAL;
Jeff Diked50084a2006-01-06 00:18:50 -0800169 }
Jeff Dike75e55842005-09-03 15:57:45 -0700170
Jeff Dikea61f3342007-05-06 14:51:35 -0700171 if(n < 0)
172 return -errno;
173 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700174}
175
Jeff Dike9683da92007-02-10 01:44:27 -0800176/* These are initialized in initcalls and not changed */
177static int aio_req_fd_r = -1;
178static int aio_req_fd_w = -1;
179static int aio_pid = -1;
Jeff Dikec4399012007-07-15 23:38:56 -0700180static unsigned long aio_stack;
Jeff Dike9683da92007-02-10 01:44:27 -0800181
Jeff Dike75e55842005-09-03 15:57:45 -0700182static int not_aio_thread(void *arg)
183{
Jeff Diked50084a2006-01-06 00:18:50 -0800184 struct aio_thread_req req;
185 struct aio_thread_reply reply;
186 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700187
Jeff Diked50084a2006-01-06 00:18:50 -0800188 signal(SIGWINCH, SIG_IGN);
189 while(1){
Jeff Dikea61f3342007-05-06 14:51:35 -0700190 err = read(aio_req_fd_r, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800191 if(err != sizeof(req)){
192 if(err < 0)
193 printk("not_aio_thread - read failed, "
194 "fd = %d, err = %d\n", aio_req_fd_r,
Jeff Dikea61f3342007-05-06 14:51:35 -0700195 errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800196 else {
197 printk("not_aio_thread - short read, fd = %d, "
198 "length = %d\n", aio_req_fd_r, err);
199 }
200 continue;
201 }
202 err = do_not_aio(&req);
203 reply = ((struct aio_thread_reply) { .data = req.aio,
Jeff Dikeef0470c2007-05-06 14:51:33 -0700204 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700205 err = write(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800206 if(err != sizeof(reply))
207 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700208 "err = %d\n", req.aio->reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800209 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800210
211 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700212}
213
Jeff Dike75e55842005-09-03 15:57:45 -0700214static int init_aio_24(void)
215{
Jeff Diked50084a2006-01-06 00:18:50 -0800216 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700217
Jeff Diked50084a2006-01-06 00:18:50 -0800218 err = os_pipe(fds, 1, 1);
219 if(err)
220 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700221
Jeff Diked50084a2006-01-06 00:18:50 -0800222 aio_req_fd_w = fds[0];
223 aio_req_fd_r = fds[1];
Jeff Dike8603ec82007-05-06 14:51:44 -0700224
225 err = os_set_fd_block(aio_req_fd_w, 0);
226 if(err)
227 goto out_close_pipe;
228
Jeff Diked50084a2006-01-06 00:18:50 -0800229 err = run_helper_thread(not_aio_thread, NULL,
Jeff Dikec4399012007-07-15 23:38:56 -0700230 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
Jeff Diked50084a2006-01-06 00:18:50 -0800231 if(err < 0)
232 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700233
Jeff Diked50084a2006-01-06 00:18:50 -0800234 aio_pid = err;
235 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700236
Jeff Diked50084a2006-01-06 00:18:50 -0800237out_close_pipe:
238 os_close_file(fds[0]);
239 os_close_file(fds[1]);
240 aio_req_fd_w = -1;
241 aio_req_fd_r = -1;
242out:
Jeff Dike75e55842005-09-03 15:57:45 -0700243#ifndef HAVE_AIO_ABI
244 printk("/usr/include/linux/aio_abi.h not present during build\n");
245#endif
246 printk("2.6 host AIO support not used - falling back to I/O "
247 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800248 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700249}
250
251#ifdef HAVE_AIO_ABI
252#define DEFAULT_24_AIO 0
253static int init_aio_26(void)
254{
Jeff Diked50084a2006-01-06 00:18:50 -0800255 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700256
Jeff Diked50084a2006-01-06 00:18:50 -0800257 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700258 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800259 printk("aio_thread failed to initialize context, err = %d\n",
260 errno);
261 return err;
262 }
Jeff Dike75e55842005-09-03 15:57:45 -0700263
Jeff Diked50084a2006-01-06 00:18:50 -0800264 err = run_helper_thread(aio_thread, NULL,
Jeff Dikec4399012007-07-15 23:38:56 -0700265 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
Jeff Diked50084a2006-01-06 00:18:50 -0800266 if(err < 0)
267 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700268
Jeff Diked50084a2006-01-06 00:18:50 -0800269 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700270
271 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800272 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700273}
274
Jeff Dike91acb212005-10-10 23:10:32 -0400275static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
276 unsigned long long offset, struct aio_context *aio)
277{
Jeff Diked50084a2006-01-06 00:18:50 -0800278 struct aio_thread_reply reply;
279 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400280
Jeff Diked50084a2006-01-06 00:18:50 -0800281 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
282 if(err){
283 reply = ((struct aio_thread_reply) { .data = aio,
284 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700285 err = write(aio->reply_fd, &reply, sizeof(reply));
286 if(err != sizeof(reply)){
287 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800288 printk("submit_aio_26 - write failed, "
289 "fd = %d, err = %d\n", aio->reply_fd, -err);
Jeff Dikea61f3342007-05-06 14:51:35 -0700290 }
Jeff Diked50084a2006-01-06 00:18:50 -0800291 else err = 0;
292 }
Jeff Dike91acb212005-10-10 23:10:32 -0400293
Jeff Diked50084a2006-01-06 00:18:50 -0800294 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400295}
296
Jeff Dike75e55842005-09-03 15:57:45 -0700297#else
298#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400299static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700300{
Jeff Diked50084a2006-01-06 00:18:50 -0800301 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700302}
303
Jeff Dike91acb212005-10-10 23:10:32 -0400304static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
305 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700306{
Jeff Diked50084a2006-01-06 00:18:50 -0800307 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700308}
309#endif
310
Jeff Dike9683da92007-02-10 01:44:27 -0800311/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700312static int aio_24 = DEFAULT_24_AIO;
313
314static int __init set_aio_24(char *name, int *add)
315{
Jeff Diked50084a2006-01-06 00:18:50 -0800316 aio_24 = 1;
317 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700318}
319
320__uml_setup("aio=2.4", set_aio_24,
321"aio=2.4\n"
322" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
323" available. 2.4 AIO is a single thread that handles one request at a\n"
324" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
325" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
326" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
327" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
328" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
329" your /usr/include/linux in order to build an AIO-capable UML\n\n"
330);
331
332static int init_aio(void)
333{
Jeff Diked50084a2006-01-06 00:18:50 -0800334 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700335
Jeff Diked50084a2006-01-06 00:18:50 -0800336 CHOOSE_MODE(({ if(!aio_24){
337 printk("Disabling 2.6 AIO in tt mode\n");
338 aio_24 = 1;
339 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700340
Jeff Diked50084a2006-01-06 00:18:50 -0800341 if(!aio_24){
342 err = init_aio_26();
343 if(err && (errno == ENOSYS)){
344 printk("2.6 AIO not supported on the host - "
345 "reverting to 2.4 AIO\n");
346 aio_24 = 1;
347 }
348 else return err;
349 }
Jeff Dike75e55842005-09-03 15:57:45 -0700350
Jeff Diked50084a2006-01-06 00:18:50 -0800351 if(aio_24)
352 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700353
Jeff Diked50084a2006-01-06 00:18:50 -0800354 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700355}
356
357/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
358 * needs to be called when the kernel is running because it calls run_helper,
359 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
360 * kernel does not run __exitcalls on shutdown, and can't because many of them
361 * break when called outside of module unloading.
362 */
363__initcall(init_aio);
364
365static void exit_aio(void)
366{
Jeff Dikec4399012007-07-15 23:38:56 -0700367 if (aio_pid != -1) {
Jeff Diked50084a2006-01-06 00:18:50 -0800368 os_kill_process(aio_pid, 1);
Jeff Dikec4399012007-07-15 23:38:56 -0700369 free_stack(aio_stack, 0);
370 }
Jeff Dike75e55842005-09-03 15:57:45 -0700371}
372
373__uml_exitcall(exit_aio);
374
Jeff Dike91acb212005-10-10 23:10:32 -0400375static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
376 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700377{
Jeff Diked50084a2006-01-06 00:18:50 -0800378 struct aio_thread_req req = { .type = type,
379 .io_fd = io_fd,
380 .offset = offset,
381 .buf = buf,
382 .len = len,
383 .aio = aio,
384 };
385 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400386
Jeff Dikea61f3342007-05-06 14:51:35 -0700387 err = write(aio_req_fd_w, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800388 if(err == sizeof(req))
389 err = 0;
Jeff Dikea61f3342007-05-06 14:51:35 -0700390 else err = -errno;
Jeff Dike91acb212005-10-10 23:10:32 -0400391
Jeff Diked50084a2006-01-06 00:18:50 -0800392 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400393}
394
395int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800396 unsigned long long offset, int reply_fd,
397 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400398{
Jeff Diked50084a2006-01-06 00:18:50 -0800399 aio->reply_fd = reply_fd;
400 if(aio_24)
401 return submit_aio_24(type, io_fd, buf, len, offset, aio);
402 else {
403 return submit_aio_26(type, io_fd, buf, len, offset, aio);
404 }
Jeff Dike75e55842005-09-03 15:57:45 -0700405}