blob: f897140cc4ae2ec6e76bb3610a510017f4679a7c [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027static int aio_req_fd_r = -1;
28static int aio_req_fd_w = -1;
29
30#if defined(HAVE_AIO_ABI)
31#include <linux/aio_abi.h>
32
33/* If we have the headers, we are going to build with AIO enabled.
34 * If we don't have aio in libc, we define the necessary stubs here.
35 */
36
37#if !defined(HAVE_AIO_LIBC)
38
39static long io_setup(int n, aio_context_t *ctxp)
40{
Jeff Diked50084a2006-01-06 00:18:50 -080041 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070042}
43
44static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
45{
Jeff Diked50084a2006-01-06 00:18:50 -080046 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070047}
48
49static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080050 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070051{
Jeff Diked50084a2006-01-06 00:18:50 -080052 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070053}
54
55#endif
56
57/* The AIO_MMAP cases force the mmapped page into memory here
58 * rather than in whatever place first touches the data. I used
59 * to do this by touching the page, but that's delicate because
60 * gcc is prone to optimizing that away. So, what's done here
61 * is we read from the descriptor from which the page was
62 * mapped. The caller is required to pass an offset which is
63 * inside the page that was mapped. Thus, when the read
64 * returns, we know that the page is in the page cache, and
65 * that it now backs the mmapped area.
66 */
67
Jeff Dike91acb212005-10-10 23:10:32 -040068static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080069 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070070{
Jeff Diked50084a2006-01-06 00:18:50 -080071 struct iocb iocb, *iocbp = &iocb;
72 char c;
73 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070074
Jeff Diked50084a2006-01-06 00:18:50 -080075 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
76 .aio_reqprio = 0,
77 .aio_fildes = fd,
78 .aio_buf = (unsigned long) buf,
79 .aio_nbytes = len,
80 .aio_offset = offset,
81 .aio_reserved1 = 0,
82 .aio_reserved2 = 0,
83 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070084
Jeff Diked50084a2006-01-06 00:18:50 -080085 switch(type){
86 case AIO_READ:
87 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
88 err = io_submit(ctx, 1, &iocbp);
89 break;
90 case AIO_WRITE:
91 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
92 err = io_submit(ctx, 1, &iocbp);
93 break;
94 case AIO_MMAP:
95 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
96 iocb.aio_buf = (unsigned long) &c;
97 iocb.aio_nbytes = sizeof(c);
98 err = io_submit(ctx, 1, &iocbp);
99 break;
100 default:
101 printk("Bogus op in do_aio - %d\n", type);
102 err = -EINVAL;
103 break;
104 }
Jeff Dike09ace812005-09-03 15:57:46 -0700105
Jeff Diked50084a2006-01-06 00:18:50 -0800106 if(err > 0)
107 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700108 else
109 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700110
Jeff Diked50084a2006-01-06 00:18:50 -0800111 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700112}
113
114static aio_context_t ctx = 0;
115
116static int aio_thread(void *arg)
117{
Jeff Diked50084a2006-01-06 00:18:50 -0800118 struct aio_thread_reply reply;
119 struct io_event event;
120 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700123
Jeff Diked50084a2006-01-06 00:18:50 -0800124 while(1){
125 n = io_getevents(ctx, 1, 1, &event, NULL);
126 if(n < 0){
127 if(errno == EINTR)
128 continue;
129 printk("aio_thread - io_getevents failed, "
130 "errno = %d\n", errno);
131 }
132 else {
133 reply = ((struct aio_thread_reply)
134 { .data = (void *) (long) event.data,
135 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400136 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
137 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800138 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400139 printk("aio_thread - write failed, fd = %d, "
Jeff Diked50084a2006-01-06 00:18:50 -0800140 "err = %d\n", aio_req_fd_r, -err);
141 }
142 }
143 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700144}
145
146#endif
147
Jeff Dike91acb212005-10-10 23:10:32 -0400148static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700149{
Jeff Diked50084a2006-01-06 00:18:50 -0800150 char c;
151 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700152
Jeff Diked50084a2006-01-06 00:18:50 -0800153 switch(req->type){
154 case AIO_READ:
155 err = os_seek_file(req->io_fd, req->offset);
156 if(err)
157 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700158
Jeff Diked50084a2006-01-06 00:18:50 -0800159 err = os_read_file(req->io_fd, req->buf, req->len);
160 break;
161 case AIO_WRITE:
162 err = os_seek_file(req->io_fd, req->offset);
163 if(err)
164 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700165
Jeff Diked50084a2006-01-06 00:18:50 -0800166 err = os_write_file(req->io_fd, req->buf, req->len);
167 break;
168 case AIO_MMAP:
169 err = os_seek_file(req->io_fd, req->offset);
170 if(err)
171 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700172
Jeff Diked50084a2006-01-06 00:18:50 -0800173 err = os_read_file(req->io_fd, &c, sizeof(c));
174 break;
175 default:
176 printk("do_not_aio - bad request type : %d\n", req->type);
177 err = -EINVAL;
178 break;
179 }
Jeff Dike75e55842005-09-03 15:57:45 -0700180
Jeff Diked50084a2006-01-06 00:18:50 -0800181out:
182 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700183}
184
185static int not_aio_thread(void *arg)
186{
Jeff Diked50084a2006-01-06 00:18:50 -0800187 struct aio_thread_req req;
188 struct aio_thread_reply reply;
189 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700190
Jeff Diked50084a2006-01-06 00:18:50 -0800191 signal(SIGWINCH, SIG_IGN);
192 while(1){
193 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
194 if(err != sizeof(req)){
195 if(err < 0)
196 printk("not_aio_thread - read failed, "
197 "fd = %d, err = %d\n", aio_req_fd_r,
198 -err);
199 else {
200 printk("not_aio_thread - short read, fd = %d, "
201 "length = %d\n", aio_req_fd_r, err);
202 }
203 continue;
204 }
205 err = do_not_aio(&req);
206 reply = ((struct aio_thread_reply) { .data = req.aio,
207 .err = err });
208 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
209 if(err != sizeof(reply))
210 printk("not_aio_thread - write failed, fd = %d, "
211 "err = %d\n", aio_req_fd_r, -err);
212 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800213
214 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700215}
216
217static int aio_pid = -1;
218
219static int init_aio_24(void)
220{
Jeff Diked50084a2006-01-06 00:18:50 -0800221 unsigned long stack;
222 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700223
Jeff Diked50084a2006-01-06 00:18:50 -0800224 err = os_pipe(fds, 1, 1);
225 if(err)
226 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700227
Jeff Diked50084a2006-01-06 00:18:50 -0800228 aio_req_fd_w = fds[0];
229 aio_req_fd_r = fds[1];
230 err = run_helper_thread(not_aio_thread, NULL,
231 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
232 if(err < 0)
233 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700234
Jeff Diked50084a2006-01-06 00:18:50 -0800235 aio_pid = err;
236 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700237
Jeff Diked50084a2006-01-06 00:18:50 -0800238out_close_pipe:
239 os_close_file(fds[0]);
240 os_close_file(fds[1]);
241 aio_req_fd_w = -1;
242 aio_req_fd_r = -1;
243out:
Jeff Dike75e55842005-09-03 15:57:45 -0700244#ifndef HAVE_AIO_ABI
245 printk("/usr/include/linux/aio_abi.h not present during build\n");
246#endif
247 printk("2.6 host AIO support not used - falling back to I/O "
248 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800249 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700250}
251
252#ifdef HAVE_AIO_ABI
253#define DEFAULT_24_AIO 0
254static int init_aio_26(void)
255{
Jeff Diked50084a2006-01-06 00:18:50 -0800256 unsigned long stack;
257 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700258
Jeff Diked50084a2006-01-06 00:18:50 -0800259 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700260 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800261 printk("aio_thread failed to initialize context, err = %d\n",
262 errno);
263 return err;
264 }
Jeff Dike75e55842005-09-03 15:57:45 -0700265
Jeff Diked50084a2006-01-06 00:18:50 -0800266 err = run_helper_thread(aio_thread, NULL,
267 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
268 if(err < 0)
269 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700270
Jeff Diked50084a2006-01-06 00:18:50 -0800271 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700272
273 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800274 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700275}
276
Jeff Dike91acb212005-10-10 23:10:32 -0400277static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
278 unsigned long long offset, struct aio_context *aio)
279{
Jeff Diked50084a2006-01-06 00:18:50 -0800280 struct aio_thread_reply reply;
281 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400282
Jeff Diked50084a2006-01-06 00:18:50 -0800283 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
284 if(err){
285 reply = ((struct aio_thread_reply) { .data = aio,
286 .err = err });
287 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
288 if(err != sizeof(reply))
289 printk("submit_aio_26 - write failed, "
290 "fd = %d, err = %d\n", aio->reply_fd, -err);
291 else err = 0;
292 }
Jeff Dike91acb212005-10-10 23:10:32 -0400293
Jeff Diked50084a2006-01-06 00:18:50 -0800294 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400295}
296
Jeff Dike75e55842005-09-03 15:57:45 -0700297#else
298#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400299static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700300{
Jeff Diked50084a2006-01-06 00:18:50 -0800301 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700302}
303
Jeff Dike91acb212005-10-10 23:10:32 -0400304static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
305 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700306{
Jeff Diked50084a2006-01-06 00:18:50 -0800307 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700308}
309#endif
310
311static int aio_24 = DEFAULT_24_AIO;
312
313static int __init set_aio_24(char *name, int *add)
314{
Jeff Diked50084a2006-01-06 00:18:50 -0800315 aio_24 = 1;
316 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700317}
318
319__uml_setup("aio=2.4", set_aio_24,
320"aio=2.4\n"
321" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
322" available. 2.4 AIO is a single thread that handles one request at a\n"
323" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
324" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
325" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
326" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
327" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
328" your /usr/include/linux in order to build an AIO-capable UML\n\n"
329);
330
331static int init_aio(void)
332{
Jeff Diked50084a2006-01-06 00:18:50 -0800333 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700334
Jeff Diked50084a2006-01-06 00:18:50 -0800335 CHOOSE_MODE(({ if(!aio_24){
336 printk("Disabling 2.6 AIO in tt mode\n");
337 aio_24 = 1;
338 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700339
Jeff Diked50084a2006-01-06 00:18:50 -0800340 if(!aio_24){
341 err = init_aio_26();
342 if(err && (errno == ENOSYS)){
343 printk("2.6 AIO not supported on the host - "
344 "reverting to 2.4 AIO\n");
345 aio_24 = 1;
346 }
347 else return err;
348 }
Jeff Dike75e55842005-09-03 15:57:45 -0700349
Jeff Diked50084a2006-01-06 00:18:50 -0800350 if(aio_24)
351 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700352
Jeff Diked50084a2006-01-06 00:18:50 -0800353 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700354}
355
356/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
357 * needs to be called when the kernel is running because it calls run_helper,
358 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
359 * kernel does not run __exitcalls on shutdown, and can't because many of them
360 * break when called outside of module unloading.
361 */
362__initcall(init_aio);
363
364static void exit_aio(void)
365{
Jeff Diked50084a2006-01-06 00:18:50 -0800366 if(aio_pid != -1)
367 os_kill_process(aio_pid, 1);
Jeff Dike75e55842005-09-03 15:57:45 -0700368}
369
370__uml_exitcall(exit_aio);
371
Jeff Dike91acb212005-10-10 23:10:32 -0400372static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
373 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700374{
Jeff Diked50084a2006-01-06 00:18:50 -0800375 struct aio_thread_req req = { .type = type,
376 .io_fd = io_fd,
377 .offset = offset,
378 .buf = buf,
379 .len = len,
380 .aio = aio,
381 };
382 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400383
Jeff Diked50084a2006-01-06 00:18:50 -0800384 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
385 if(err == sizeof(req))
386 err = 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400387
Jeff Diked50084a2006-01-06 00:18:50 -0800388 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400389}
390
391int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800392 unsigned long long offset, int reply_fd,
393 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400394{
Jeff Diked50084a2006-01-06 00:18:50 -0800395 aio->reply_fd = reply_fd;
396 if(aio_24)
397 return submit_aio_24(type, io_fd, buf, len, offset, aio);
398 else {
399 return submit_aio_26(type, io_fd, buf, len, offset, aio);
400 }
Jeff Dike75e55842005-09-03 15:57:45 -0700401}