blob: 6ff12743a0bdbe3e396e78b19ac667b52265c74b [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027#if defined(HAVE_AIO_ABI)
28#include <linux/aio_abi.h>
29
30/* If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
Jeff Diked50084a2006-01-06 00:18:50 -080038 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070039}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
Jeff Diked50084a2006-01-06 00:18:50 -080043 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070044}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080047 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070048{
Jeff Diked50084a2006-01-06 00:18:50 -080049 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070050}
51
52#endif
53
54/* The AIO_MMAP cases force the mmapped page into memory here
55 * rather than in whatever place first touches the data. I used
56 * to do this by touching the page, but that's delicate because
57 * gcc is prone to optimizing that away. So, what's done here
58 * is we read from the descriptor from which the page was
59 * mapped. The caller is required to pass an offset which is
60 * inside the page that was mapped. Thus, when the read
61 * returns, we know that the page is in the page cache, and
62 * that it now backs the mmapped area.
63 */
64
Jeff Dike91acb212005-10-10 23:10:32 -040065static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080066 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070067{
Jeff Diked50084a2006-01-06 00:18:50 -080068 struct iocb iocb, *iocbp = &iocb;
69 char c;
70 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070071
Jeff Diked50084a2006-01-06 00:18:50 -080072 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
73 .aio_reqprio = 0,
74 .aio_fildes = fd,
75 .aio_buf = (unsigned long) buf,
76 .aio_nbytes = len,
77 .aio_offset = offset,
78 .aio_reserved1 = 0,
79 .aio_reserved2 = 0,
80 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070081
Jeff Diked50084a2006-01-06 00:18:50 -080082 switch(type){
83 case AIO_READ:
84 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
85 err = io_submit(ctx, 1, &iocbp);
86 break;
87 case AIO_WRITE:
88 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_MMAP:
92 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
93 iocb.aio_buf = (unsigned long) &c;
94 iocb.aio_nbytes = sizeof(c);
95 err = io_submit(ctx, 1, &iocbp);
96 break;
97 default:
98 printk("Bogus op in do_aio - %d\n", type);
99 err = -EINVAL;
100 break;
101 }
Jeff Dike09ace812005-09-03 15:57:46 -0700102
Jeff Diked50084a2006-01-06 00:18:50 -0800103 if(err > 0)
104 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700105 else
106 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700107
Jeff Diked50084a2006-01-06 00:18:50 -0800108 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700109}
110
Jeff Dike9683da92007-02-10 01:44:27 -0800111/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
Jeff Diked50084a2006-01-06 00:18:50 -0800116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700119
Jeff Diked50084a2006-01-06 00:18:50 -0800120 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
135 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800136 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400137 printk("aio_thread - write failed, fd = %d, "
Jeff Dike9683da92007-02-10 01:44:27 -0800138 "err = %d\n", reply_fd, -err);
Jeff Diked50084a2006-01-06 00:18:50 -0800139 }
140 }
141 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700142}
143
144#endif
145
Jeff Dike91acb212005-10-10 23:10:32 -0400146static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700147{
Jeff Diked50084a2006-01-06 00:18:50 -0800148 char c;
149 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700150
Jeff Diked50084a2006-01-06 00:18:50 -0800151 switch(req->type){
152 case AIO_READ:
153 err = os_seek_file(req->io_fd, req->offset);
154 if(err)
155 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700156
Jeff Diked50084a2006-01-06 00:18:50 -0800157 err = os_read_file(req->io_fd, req->buf, req->len);
158 break;
159 case AIO_WRITE:
160 err = os_seek_file(req->io_fd, req->offset);
161 if(err)
162 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700163
Jeff Diked50084a2006-01-06 00:18:50 -0800164 err = os_write_file(req->io_fd, req->buf, req->len);
165 break;
166 case AIO_MMAP:
167 err = os_seek_file(req->io_fd, req->offset);
168 if(err)
169 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700170
Jeff Diked50084a2006-01-06 00:18:50 -0800171 err = os_read_file(req->io_fd, &c, sizeof(c));
172 break;
173 default:
174 printk("do_not_aio - bad request type : %d\n", req->type);
175 err = -EINVAL;
176 break;
177 }
Jeff Dike75e55842005-09-03 15:57:45 -0700178
Jeff Diked50084a2006-01-06 00:18:50 -0800179out:
180 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700181}
182
Jeff Dike9683da92007-02-10 01:44:27 -0800183/* These are initialized in initcalls and not changed */
184static int aio_req_fd_r = -1;
185static int aio_req_fd_w = -1;
186static int aio_pid = -1;
187
Jeff Dike75e55842005-09-03 15:57:45 -0700188static int not_aio_thread(void *arg)
189{
Jeff Diked50084a2006-01-06 00:18:50 -0800190 struct aio_thread_req req;
191 struct aio_thread_reply reply;
192 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700193
Jeff Diked50084a2006-01-06 00:18:50 -0800194 signal(SIGWINCH, SIG_IGN);
195 while(1){
196 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
197 if(err != sizeof(req)){
198 if(err < 0)
199 printk("not_aio_thread - read failed, "
200 "fd = %d, err = %d\n", aio_req_fd_r,
201 -err);
202 else {
203 printk("not_aio_thread - short read, fd = %d, "
204 "length = %d\n", aio_req_fd_r, err);
205 }
206 continue;
207 }
208 err = do_not_aio(&req);
209 reply = ((struct aio_thread_reply) { .data = req.aio,
210 .err = err });
211 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
212 if(err != sizeof(reply))
213 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dike9683da92007-02-10 01:44:27 -0800214 "err = %d\n", req.aio->reply_fd, -err);
Jeff Diked50084a2006-01-06 00:18:50 -0800215 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800216
217 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700218}
219
Jeff Dike75e55842005-09-03 15:57:45 -0700220static int init_aio_24(void)
221{
Jeff Diked50084a2006-01-06 00:18:50 -0800222 unsigned long stack;
223 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700224
Jeff Diked50084a2006-01-06 00:18:50 -0800225 err = os_pipe(fds, 1, 1);
226 if(err)
227 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700228
Jeff Diked50084a2006-01-06 00:18:50 -0800229 aio_req_fd_w = fds[0];
230 aio_req_fd_r = fds[1];
231 err = run_helper_thread(not_aio_thread, NULL,
232 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
233 if(err < 0)
234 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700235
Jeff Diked50084a2006-01-06 00:18:50 -0800236 aio_pid = err;
237 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700238
Jeff Diked50084a2006-01-06 00:18:50 -0800239out_close_pipe:
240 os_close_file(fds[0]);
241 os_close_file(fds[1]);
242 aio_req_fd_w = -1;
243 aio_req_fd_r = -1;
244out:
Jeff Dike75e55842005-09-03 15:57:45 -0700245#ifndef HAVE_AIO_ABI
246 printk("/usr/include/linux/aio_abi.h not present during build\n");
247#endif
248 printk("2.6 host AIO support not used - falling back to I/O "
249 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800250 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700251}
252
253#ifdef HAVE_AIO_ABI
254#define DEFAULT_24_AIO 0
255static int init_aio_26(void)
256{
Jeff Diked50084a2006-01-06 00:18:50 -0800257 unsigned long stack;
258 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700259
Jeff Diked50084a2006-01-06 00:18:50 -0800260 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700261 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800262 printk("aio_thread failed to initialize context, err = %d\n",
263 errno);
264 return err;
265 }
Jeff Dike75e55842005-09-03 15:57:45 -0700266
Jeff Diked50084a2006-01-06 00:18:50 -0800267 err = run_helper_thread(aio_thread, NULL,
268 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
269 if(err < 0)
270 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700271
Jeff Diked50084a2006-01-06 00:18:50 -0800272 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700273
274 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800275 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700276}
277
Jeff Dike91acb212005-10-10 23:10:32 -0400278static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
279 unsigned long long offset, struct aio_context *aio)
280{
Jeff Diked50084a2006-01-06 00:18:50 -0800281 struct aio_thread_reply reply;
282 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400283
Jeff Diked50084a2006-01-06 00:18:50 -0800284 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
285 if(err){
286 reply = ((struct aio_thread_reply) { .data = aio,
287 .err = err });
288 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
289 if(err != sizeof(reply))
290 printk("submit_aio_26 - write failed, "
291 "fd = %d, err = %d\n", aio->reply_fd, -err);
292 else err = 0;
293 }
Jeff Dike91acb212005-10-10 23:10:32 -0400294
Jeff Diked50084a2006-01-06 00:18:50 -0800295 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400296}
297
Jeff Dike75e55842005-09-03 15:57:45 -0700298#else
299#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400300static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700301{
Jeff Diked50084a2006-01-06 00:18:50 -0800302 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700303}
304
Jeff Dike91acb212005-10-10 23:10:32 -0400305static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
306 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700307{
Jeff Diked50084a2006-01-06 00:18:50 -0800308 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700309}
310#endif
311
Jeff Dike9683da92007-02-10 01:44:27 -0800312/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700313static int aio_24 = DEFAULT_24_AIO;
314
315static int __init set_aio_24(char *name, int *add)
316{
Jeff Diked50084a2006-01-06 00:18:50 -0800317 aio_24 = 1;
318 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700319}
320
321__uml_setup("aio=2.4", set_aio_24,
322"aio=2.4\n"
323" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
324" available. 2.4 AIO is a single thread that handles one request at a\n"
325" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
326" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
327" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
328" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
329" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
330" your /usr/include/linux in order to build an AIO-capable UML\n\n"
331);
332
333static int init_aio(void)
334{
Jeff Diked50084a2006-01-06 00:18:50 -0800335 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700336
Jeff Diked50084a2006-01-06 00:18:50 -0800337 CHOOSE_MODE(({ if(!aio_24){
338 printk("Disabling 2.6 AIO in tt mode\n");
339 aio_24 = 1;
340 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700341
Jeff Diked50084a2006-01-06 00:18:50 -0800342 if(!aio_24){
343 err = init_aio_26();
344 if(err && (errno == ENOSYS)){
345 printk("2.6 AIO not supported on the host - "
346 "reverting to 2.4 AIO\n");
347 aio_24 = 1;
348 }
349 else return err;
350 }
Jeff Dike75e55842005-09-03 15:57:45 -0700351
Jeff Diked50084a2006-01-06 00:18:50 -0800352 if(aio_24)
353 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700354
Jeff Diked50084a2006-01-06 00:18:50 -0800355 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700356}
357
358/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
359 * needs to be called when the kernel is running because it calls run_helper,
360 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
361 * kernel does not run __exitcalls on shutdown, and can't because many of them
362 * break when called outside of module unloading.
363 */
364__initcall(init_aio);
365
366static void exit_aio(void)
367{
Jeff Diked50084a2006-01-06 00:18:50 -0800368 if(aio_pid != -1)
369 os_kill_process(aio_pid, 1);
Jeff Dike75e55842005-09-03 15:57:45 -0700370}
371
372__uml_exitcall(exit_aio);
373
Jeff Dike91acb212005-10-10 23:10:32 -0400374static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
375 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700376{
Jeff Diked50084a2006-01-06 00:18:50 -0800377 struct aio_thread_req req = { .type = type,
378 .io_fd = io_fd,
379 .offset = offset,
380 .buf = buf,
381 .len = len,
382 .aio = aio,
383 };
384 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400385
Jeff Diked50084a2006-01-06 00:18:50 -0800386 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
387 if(err == sizeof(req))
388 err = 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400389
Jeff Diked50084a2006-01-06 00:18:50 -0800390 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400391}
392
393int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800394 unsigned long long offset, int reply_fd,
395 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400396{
Jeff Diked50084a2006-01-06 00:18:50 -0800397 aio->reply_fd = reply_fd;
398 if(aio_24)
399 return submit_aio_24(type, io_fd, buf, len, offset, aio);
400 else {
401 return submit_aio_26(type, io_fd, buf, len, offset, aio);
402 }
Jeff Dike75e55842005-09-03 15:57:45 -0700403}