blob: b04897cd995d02d021ec82864f24ad82d6de6b25 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
Jeff Dike09ace812005-09-03 15:57:46 -07009#include <string.h>
Jeff Dike75e55842005-09-03 15:57:45 -070010#include <errno.h>
11#include <sched.h>
12#include <sys/syscall.h>
13#include "os.h"
14#include "helper.h"
15#include "aio.h"
16#include "init.h"
17#include "user.h"
18#include "mode.h"
19
Jeff Dike75e55842005-09-03 15:57:45 -070020static int aio_req_fd_r = -1;
21static int aio_req_fd_w = -1;
22
Jeff Dike09ace812005-09-03 15:57:46 -070023static int update_aio(struct aio_context *aio, int res)
24{
25 if(res < 0)
26 aio->len = res;
27 else if((res == 0) && (aio->type == AIO_READ)){
28 /* This is the EOF case - we have hit the end of the file
29 * and it ends in a partial block, so we fill the end of
30 * the block with zeros and claim success.
31 */
32 memset(aio->data, 0, aio->len);
33 aio->len = 0;
34 }
35 else if(res > 0){
36 aio->len -= res;
37 aio->data += res;
38 aio->offset += res;
39 return aio->len;
40 }
41
42 return 0;
43}
44
Jeff Dike75e55842005-09-03 15:57:45 -070045#if defined(HAVE_AIO_ABI)
46#include <linux/aio_abi.h>
47
48/* If we have the headers, we are going to build with AIO enabled.
49 * If we don't have aio in libc, we define the necessary stubs here.
50 */
51
52#if !defined(HAVE_AIO_LIBC)
53
54static long io_setup(int n, aio_context_t *ctxp)
55{
56 return syscall(__NR_io_setup, n, ctxp);
57}
58
59static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
60{
61 return syscall(__NR_io_submit, ctx, nr, iocbpp);
62}
63
64static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
65 struct io_event *events, struct timespec *timeout)
66{
67 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
68}
69
70#endif
71
72/* The AIO_MMAP cases force the mmapped page into memory here
73 * rather than in whatever place first touches the data. I used
74 * to do this by touching the page, but that's delicate because
75 * gcc is prone to optimizing that away. So, what's done here
76 * is we read from the descriptor from which the page was
77 * mapped. The caller is required to pass an offset which is
78 * inside the page that was mapped. Thus, when the read
79 * returns, we know that the page is in the page cache, and
80 * that it now backs the mmapped area.
81 */
82
Jeff Dike09ace812005-09-03 15:57:46 -070083static int do_aio(aio_context_t ctx, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070084{
85 struct iocb iocb, *iocbp = &iocb;
86 char c;
87 int err;
88
89 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
90 .aio_reqprio = 0,
Jeff Dike09ace812005-09-03 15:57:46 -070091 .aio_fildes = aio->fd,
92 .aio_buf = (unsigned long) aio->data,
93 .aio_nbytes = aio->len,
94 .aio_offset = aio->offset,
Jeff Dike75e55842005-09-03 15:57:45 -070095 .aio_reserved1 = 0,
96 .aio_reserved2 = 0,
97 .aio_reserved3 = 0 });
98
Jeff Dike09ace812005-09-03 15:57:46 -070099 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700100 case AIO_READ:
101 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike75e55842005-09-03 15:57:45 -0700102 break;
103 case AIO_WRITE:
104 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike75e55842005-09-03 15:57:45 -0700105 break;
106 case AIO_MMAP:
107 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
108 iocb.aio_buf = (unsigned long) &c;
109 iocb.aio_nbytes = sizeof(c);
Jeff Dike75e55842005-09-03 15:57:45 -0700110 break;
111 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700112 printk("Bogus op in do_aio - %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700113 err = -EINVAL;
Jeff Dike09ace812005-09-03 15:57:46 -0700114 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700115 }
Jeff Dike09ace812005-09-03 15:57:46 -0700116
117 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -0700118 if(err > 0)
119 err = 0;
120
Jeff Dike09ace812005-09-03 15:57:46 -0700121 out:
Jeff Dike75e55842005-09-03 15:57:45 -0700122 return err;
123}
124
125static aio_context_t ctx = 0;
126
127static int aio_thread(void *arg)
128{
129 struct aio_thread_reply reply;
Jeff Dike09ace812005-09-03 15:57:46 -0700130 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700131 struct io_event event;
Jeff Dike09ace812005-09-03 15:57:46 -0700132 int err, n;
Jeff Dike75e55842005-09-03 15:57:45 -0700133
134 signal(SIGWINCH, SIG_IGN);
135
136 while(1){
137 n = io_getevents(ctx, 1, 1, &event, NULL);
138 if(n < 0){
139 if(errno == EINTR)
140 continue;
141 printk("aio_thread - io_getevents failed, "
142 "errno = %d\n", errno);
143 }
144 else {
Jeff Dike09ace812005-09-03 15:57:46 -0700145 aio = (struct aio_context *) event.data;
146 if(update_aio(aio, event.res)){
147 do_aio(ctx, aio);
148 continue;
149 }
150
Jeff Dike75e55842005-09-03 15:57:45 -0700151 reply = ((struct aio_thread_reply)
Jeff Dike09ace812005-09-03 15:57:46 -0700152 { .data = aio,
153 .err = aio->len });
154 err = os_write_file(aio->reply_fd, &reply,
155 sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700156 if(err != sizeof(reply))
Jeff Dike09ace812005-09-03 15:57:46 -0700157 printk("aio_thread - write failed, "
158 "fd = %d, err = %d\n", aio->reply_fd,
159 -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700160 }
161 }
162 return 0;
163}
164
165#endif
166
Jeff Dike09ace812005-09-03 15:57:46 -0700167static int do_not_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700168{
169 char c;
170 int err;
171
Jeff Dike09ace812005-09-03 15:57:46 -0700172 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700173 case AIO_READ:
Jeff Dike09ace812005-09-03 15:57:46 -0700174 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700175 if(err)
176 goto out;
177
Jeff Dike09ace812005-09-03 15:57:46 -0700178 err = os_read_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700179 break;
180 case AIO_WRITE:
Jeff Dike09ace812005-09-03 15:57:46 -0700181 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700182 if(err)
183 goto out;
184
Jeff Dike09ace812005-09-03 15:57:46 -0700185 err = os_write_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700186 break;
187 case AIO_MMAP:
Jeff Dike09ace812005-09-03 15:57:46 -0700188 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700189 if(err)
190 goto out;
191
Jeff Dike09ace812005-09-03 15:57:46 -0700192 err = os_read_file(aio->fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700193 break;
194 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700195 printk("do_not_aio - bad request type : %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700196 err = -EINVAL;
197 break;
198 }
199
200 out:
201 return err;
202}
203
204static int not_aio_thread(void *arg)
205{
Jeff Dike09ace812005-09-03 15:57:46 -0700206 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700207 struct aio_thread_reply reply;
208 int err;
209
210 signal(SIGWINCH, SIG_IGN);
211 while(1){
Jeff Dike09ace812005-09-03 15:57:46 -0700212 err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
213 if(err != sizeof(aio)){
Jeff Dike75e55842005-09-03 15:57:45 -0700214 if(err < 0)
215 printk("not_aio_thread - read failed, "
216 "fd = %d, err = %d\n", aio_req_fd_r,
217 -err);
218 else {
219 printk("not_aio_thread - short read, fd = %d, "
220 "length = %d\n", aio_req_fd_r, err);
221 }
222 continue;
223 }
Jeff Dike09ace812005-09-03 15:57:46 -0700224 again:
225 err = do_not_aio(aio);
226
227 if(update_aio(aio, err))
228 goto again;
229
230 reply = ((struct aio_thread_reply) { .data = aio,
231 .err = aio->len });
232 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700233 if(err != sizeof(reply))
234 printk("not_aio_thread - write failed, fd = %d, "
235 "err = %d\n", aio_req_fd_r, -err);
236 }
237}
238
Jeff Dike09ace812005-09-03 15:57:46 -0700239static int submit_aio_24(struct aio_context *aio)
240{
241 int err;
242
243 err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
244 if(err == sizeof(aio))
245 err = 0;
246
247 return err;
248}
249
Jeff Dike75e55842005-09-03 15:57:45 -0700250static int aio_pid = -1;
Jeff Dike09ace812005-09-03 15:57:46 -0700251static int (*submit_proc)(struct aio_context *aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700252
253static int init_aio_24(void)
254{
255 unsigned long stack;
256 int fds[2], err;
257
258 err = os_pipe(fds, 1, 1);
259 if(err)
260 goto out;
261
262 aio_req_fd_w = fds[0];
263 aio_req_fd_r = fds[1];
264 err = run_helper_thread(not_aio_thread, NULL,
265 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
266 if(err < 0)
267 goto out_close_pipe;
268
269 aio_pid = err;
270 goto out;
271
272 out_close_pipe:
273 os_close_file(fds[0]);
274 os_close_file(fds[1]);
275 aio_req_fd_w = -1;
276 aio_req_fd_r = -1;
277 out:
278#ifndef HAVE_AIO_ABI
279 printk("/usr/include/linux/aio_abi.h not present during build\n");
280#endif
281 printk("2.6 host AIO support not used - falling back to I/O "
282 "thread\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700283
284 submit_proc = submit_aio_24;
285
Jeff Dike75e55842005-09-03 15:57:45 -0700286 return 0;
287}
288
289#ifdef HAVE_AIO_ABI
290#define DEFAULT_24_AIO 0
Jeff Dike09ace812005-09-03 15:57:46 -0700291static int submit_aio_26(struct aio_context *aio)
292{
293 struct aio_thread_reply reply;
294 int err;
295
296 err = do_aio(ctx, aio);
297 if(err){
298 reply = ((struct aio_thread_reply) { .data = aio,
299 .err = err });
300 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
301 if(err != sizeof(reply))
302 printk("submit_aio_26 - write failed, "
303 "fd = %d, err = %d\n", aio->reply_fd, -err);
304 else err = 0;
305 }
306
307 return err;
308}
309
Jeff Dike75e55842005-09-03 15:57:45 -0700310static int init_aio_26(void)
311{
312 unsigned long stack;
313 int err;
314
315 if(io_setup(256, &ctx)){
316 printk("aio_thread failed to initialize context, err = %d\n",
317 errno);
318 return -errno;
319 }
320
321 err = run_helper_thread(aio_thread, NULL,
322 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
323 if(err < 0)
324 return -errno;
325
326 aio_pid = err;
327
328 printk("Using 2.6 host AIO\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700329
330 submit_proc = submit_aio_26;
331
Jeff Dike75e55842005-09-03 15:57:45 -0700332 return 0;
333}
334
Jeff Dike75e55842005-09-03 15:57:45 -0700335#else
336#define DEFAULT_24_AIO 1
Jeff Dike09ace812005-09-03 15:57:46 -0700337static int submit_aio_26(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700338{
339 return -ENOSYS;
340}
341
Jeff Dike09ace812005-09-03 15:57:46 -0700342static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700343{
Jeff Dike09ace812005-09-03 15:57:46 -0700344 submit_proc = submit_aio_26;
Jeff Dike75e55842005-09-03 15:57:45 -0700345 return -ENOSYS;
346}
347#endif
348
349static int aio_24 = DEFAULT_24_AIO;
350
351static int __init set_aio_24(char *name, int *add)
352{
353 aio_24 = 1;
354 return 0;
355}
356
357__uml_setup("aio=2.4", set_aio_24,
358"aio=2.4\n"
359" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
360" available. 2.4 AIO is a single thread that handles one request at a\n"
361" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
362" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
363" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
364" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
365" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
366" your /usr/include/linux in order to build an AIO-capable UML\n\n"
367);
368
369static int init_aio(void)
370{
371 int err;
372
373 CHOOSE_MODE(({
374 if(!aio_24){
375 printk("Disabling 2.6 AIO in tt mode\n");
376 aio_24 = 1;
377 } }), (void) 0);
378
379 if(!aio_24){
380 err = init_aio_26();
381 if(err && (errno == ENOSYS)){
382 printk("2.6 AIO not supported on the host - "
383 "reverting to 2.4 AIO\n");
384 aio_24 = 1;
385 }
386 else return err;
387 }
388
389 if(aio_24)
390 return init_aio_24();
391
392 return 0;
393}
394
395/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
396 * needs to be called when the kernel is running because it calls run_helper,
397 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
398 * kernel does not run __exitcalls on shutdown, and can't because many of them
399 * break when called outside of module unloading.
400 */
401__initcall(init_aio);
402
403static void exit_aio(void)
404{
405 if(aio_pid != -1)
406 os_kill_process(aio_pid, 1);
407}
408
409__uml_exitcall(exit_aio);
410
Jeff Dike09ace812005-09-03 15:57:46 -0700411int submit_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700412{
Jeff Dike09ace812005-09-03 15:57:46 -0700413 return (*submit_proc)(aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700414}