blob: f2ca2992bbd6abbbc5dc719ada9201b0ca79967a [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
13#include "helper.h"
14#include "aio.h"
15#include "init.h"
16#include "user.h"
17#include "mode.h"
18
19struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
28static int aio_req_fd_r = -1;
29static int aio_req_fd_w = -1;
30
31#if defined(HAVE_AIO_ABI)
32#include <linux/aio_abi.h>
33
34/* If we have the headers, we are going to build with AIO enabled.
35 * If we don't have aio in libc, we define the necessary stubs here.
36 */
37
38#if !defined(HAVE_AIO_LIBC)
39
40static long io_setup(int n, aio_context_t *ctxp)
41{
42 return syscall(__NR_io_setup, n, ctxp);
43}
44
45static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
46{
47 return syscall(__NR_io_submit, ctx, nr, iocbpp);
48}
49
50static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
51 struct io_event *events, struct timespec *timeout)
52{
53 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
54}
55
56#endif
57
58/* The AIO_MMAP cases force the mmapped page into memory here
59 * rather than in whatever place first touches the data. I used
60 * to do this by touching the page, but that's delicate because
61 * gcc is prone to optimizing that away. So, what's done here
62 * is we read from the descriptor from which the page was
63 * mapped. The caller is required to pass an offset which is
64 * inside the page that was mapped. Thus, when the read
65 * returns, we know that the page is in the page cache, and
66 * that it now backs the mmapped area.
67 */
68
69static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
70 int len, unsigned long long offset, struct aio_context *aio)
71{
72 struct iocb iocb, *iocbp = &iocb;
73 char c;
74 int err;
75
76 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
77 .aio_reqprio = 0,
78 .aio_fildes = fd,
79 .aio_buf = (unsigned long) buf,
80 .aio_nbytes = len,
81 .aio_offset = offset,
82 .aio_reserved1 = 0,
83 .aio_reserved2 = 0,
84 .aio_reserved3 = 0 });
85
86 switch(type){
87 case AIO_READ:
88 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_WRITE:
92 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
93 err = io_submit(ctx, 1, &iocbp);
94 break;
95 case AIO_MMAP:
96 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
97 iocb.aio_buf = (unsigned long) &c;
98 iocb.aio_nbytes = sizeof(c);
99 err = io_submit(ctx, 1, &iocbp);
100 break;
101 default:
102 printk("Bogus op in do_aio - %d\n", type);
103 err = -EINVAL;
104 break;
105 }
106 if(err > 0)
107 err = 0;
108
109 return err;
110}
111
112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
119
120 signal(SIGWINCH, SIG_IGN);
121
122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
135 err = os_write_file(reply_fd, &reply, sizeof(reply));
136 if(err != sizeof(reply))
137 printk("not_aio_thread - write failed, "
138 "fd = %d, err = %d\n",
139 aio_req_fd_r, -err);
140 }
141 }
142 return 0;
143}
144
145#endif
146
147static int do_not_aio(struct aio_thread_req *req)
148{
149 char c;
150 int err;
151
152 switch(req->type){
153 case AIO_READ:
154 err = os_seek_file(req->io_fd, req->offset);
155 if(err)
156 goto out;
157
158 err = os_read_file(req->io_fd, req->buf, req->len);
159 break;
160 case AIO_WRITE:
161 err = os_seek_file(req->io_fd, req->offset);
162 if(err)
163 goto out;
164
165 err = os_write_file(req->io_fd, req->buf, req->len);
166 break;
167 case AIO_MMAP:
168 err = os_seek_file(req->io_fd, req->offset);
169 if(err)
170 goto out;
171
172 err = os_read_file(req->io_fd, &c, sizeof(c));
173 break;
174 default:
175 printk("do_not_aio - bad request type : %d\n", req->type);
176 err = -EINVAL;
177 break;
178 }
179
180 out:
181 return err;
182}
183
184static int not_aio_thread(void *arg)
185{
186 struct aio_thread_req req;
187 struct aio_thread_reply reply;
188 int err;
189
190 signal(SIGWINCH, SIG_IGN);
191 while(1){
192 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
193 if(err != sizeof(req)){
194 if(err < 0)
195 printk("not_aio_thread - read failed, "
196 "fd = %d, err = %d\n", aio_req_fd_r,
197 -err);
198 else {
199 printk("not_aio_thread - short read, fd = %d, "
200 "length = %d\n", aio_req_fd_r, err);
201 }
202 continue;
203 }
204 err = do_not_aio(&req);
205 reply = ((struct aio_thread_reply) { .data = req.aio,
206 .err = err });
207 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
208 if(err != sizeof(reply))
209 printk("not_aio_thread - write failed, fd = %d, "
210 "err = %d\n", aio_req_fd_r, -err);
211 }
212}
213
214static int aio_pid = -1;
215
216static int init_aio_24(void)
217{
218 unsigned long stack;
219 int fds[2], err;
220
221 err = os_pipe(fds, 1, 1);
222 if(err)
223 goto out;
224
225 aio_req_fd_w = fds[0];
226 aio_req_fd_r = fds[1];
227 err = run_helper_thread(not_aio_thread, NULL,
228 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
229 if(err < 0)
230 goto out_close_pipe;
231
232 aio_pid = err;
233 goto out;
234
235 out_close_pipe:
236 os_close_file(fds[0]);
237 os_close_file(fds[1]);
238 aio_req_fd_w = -1;
239 aio_req_fd_r = -1;
240 out:
241#ifndef HAVE_AIO_ABI
242 printk("/usr/include/linux/aio_abi.h not present during build\n");
243#endif
244 printk("2.6 host AIO support not used - falling back to I/O "
245 "thread\n");
246 return 0;
247}
248
249#ifdef HAVE_AIO_ABI
250#define DEFAULT_24_AIO 0
251static int init_aio_26(void)
252{
253 unsigned long stack;
254 int err;
255
256 if(io_setup(256, &ctx)){
257 printk("aio_thread failed to initialize context, err = %d\n",
258 errno);
259 return -errno;
260 }
261
262 err = run_helper_thread(aio_thread, NULL,
263 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
264 if(err < 0)
265 return -errno;
266
267 aio_pid = err;
268
269 printk("Using 2.6 host AIO\n");
270 return 0;
271}
272
273static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
274 unsigned long long offset, struct aio_context *aio)
275{
276 struct aio_thread_reply reply;
277 int err;
278
279 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
280 if(err){
281 reply = ((struct aio_thread_reply) { .data = aio,
282 .err = err });
283 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
284 if(err != sizeof(reply))
285 printk("submit_aio_26 - write failed, "
286 "fd = %d, err = %d\n", aio->reply_fd, -err);
287 else err = 0;
288 }
289
290 return err;
291}
292
293#else
294#define DEFAULT_24_AIO 1
295static int init_aio_26(void)
296{
297 return -ENOSYS;
298}
299
300static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
301 unsigned long long offset, struct aio_context *aio)
302{
303 return -ENOSYS;
304}
305#endif
306
307static int aio_24 = DEFAULT_24_AIO;
308
309static int __init set_aio_24(char *name, int *add)
310{
311 aio_24 = 1;
312 return 0;
313}
314
315__uml_setup("aio=2.4", set_aio_24,
316"aio=2.4\n"
317" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
318" available. 2.4 AIO is a single thread that handles one request at a\n"
319" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
320" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
321" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
322" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
323" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
324" your /usr/include/linux in order to build an AIO-capable UML\n\n"
325);
326
327static int init_aio(void)
328{
329 int err;
330
331 CHOOSE_MODE(({
332 if(!aio_24){
333 printk("Disabling 2.6 AIO in tt mode\n");
334 aio_24 = 1;
335 } }), (void) 0);
336
337 if(!aio_24){
338 err = init_aio_26();
339 if(err && (errno == ENOSYS)){
340 printk("2.6 AIO not supported on the host - "
341 "reverting to 2.4 AIO\n");
342 aio_24 = 1;
343 }
344 else return err;
345 }
346
347 if(aio_24)
348 return init_aio_24();
349
350 return 0;
351}
352
353/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
354 * needs to be called when the kernel is running because it calls run_helper,
355 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
356 * kernel does not run __exitcalls on shutdown, and can't because many of them
357 * break when called outside of module unloading.
358 */
359__initcall(init_aio);
360
361static void exit_aio(void)
362{
363 if(aio_pid != -1)
364 os_kill_process(aio_pid, 1);
365}
366
367__uml_exitcall(exit_aio);
368
369static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
370 unsigned long long offset, struct aio_context *aio)
371{
372 struct aio_thread_req req = { .type = type,
373 .io_fd = io_fd,
374 .offset = offset,
375 .buf = buf,
376 .len = len,
377 .aio = aio,
378 };
379 int err;
380
381 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
382 if(err == sizeof(req))
383 err = 0;
384
385 return err;
386}
387
388int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
389 unsigned long long offset, int reply_fd,
390 struct aio_context *aio)
391{
392 aio->reply_fd = reply_fd;
393 if(aio_24)
394 return submit_aio_24(type, io_fd, buf, len, offset, aio);
395 else {
396 return submit_aio_26(type, io_fd, buf, len, offset, aio);
397 }
398}