blob: 85a705fc8bd4d8ae6347f2378a0058e8566c0d2f [file] [log] [blame]
Daniel Gollubfc5c0342014-02-17 14:35:28 +01001/*
2 * rbd engine
3 *
4 * IO engine using Ceph's librbd to test RADOS Block Devices.
5 *
6 */
7
8#include <rbd/librbd.h>
9
10#include "../fio.h"
11
12struct fio_rbd_iou {
13 struct io_u *io_u;
14 int io_complete;
15};
16
17struct rbd_data {
18 rados_t cluster;
19 rados_ioctx_t io_ctx;
20 rbd_image_t image;
21 struct io_u **aio_events;
22};
23
24struct rbd_options {
25 struct thread_data *td;
26 char *rbd_name;
27 char *pool_name;
28 char *client_name;
29};
30
31static struct fio_option options[] = {
32 {
33 .name = "rbdname",
34 .lname = "rbd engine rbdname",
35 .type = FIO_OPT_STR_STORE,
36 .help = "RBD name for RBD engine",
37 .off1 = offsetof(struct rbd_options, rbd_name),
38 .category = FIO_OPT_C_ENGINE,
39 .group = FIO_OPT_G_RBD,
40 },
41 {
42 .name = "pool",
43 .lname = "rbd engine pool",
44 .type = FIO_OPT_STR_STORE,
45 .help = "Name of the pool hosting the RBD for the RBD engine",
46 .off1 = offsetof(struct rbd_options, pool_name),
47 .category = FIO_OPT_C_ENGINE,
48 .group = FIO_OPT_G_RBD,
49 },
50 {
51 .name = "clientname",
52 .lname = "rbd engine clientname",
53 .type = FIO_OPT_STR_STORE,
54 .help = "Name of the ceph client to access the RBD for the RBD engine",
55 .off1 = offsetof(struct rbd_options, client_name),
56 .category = FIO_OPT_C_ENGINE,
57 .group = FIO_OPT_G_RBD,
58 },
59 {
60 .name = NULL,
61 },
62};
63
64static int _fio_setup_rbd_data(struct thread_data *td,
65 struct rbd_data **rbd_data_ptr)
66{
67 struct rbd_data *rbd_data;
68
69 if (td->io_ops->data)
70 return 0;
71
72 rbd_data = malloc(sizeof(struct rbd_data));
73 if (!rbd_data)
74 goto failed;
75
76 memset(rbd_data, 0, sizeof(struct rbd_data));
77
78 rbd_data->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *));
79 if (!rbd_data->aio_events)
80 goto failed;
81
82 memset(rbd_data->aio_events, 0, td->o.iodepth * sizeof(struct io_u *));
83
84 *rbd_data_ptr = rbd_data;
85
86 return 0;
87
88failed:
Jens Axboe0d65bfb2014-07-03 15:31:32 -060089 if (rbd_data)
90 free(rbd_data);
Daniel Gollubfc5c0342014-02-17 14:35:28 +010091 return 1;
92
93}
94
95static int _fio_rbd_connect(struct thread_data *td)
96{
97 struct rbd_data *rbd_data = td->io_ops->data;
98 struct rbd_options *o = td->eo;
99 int r;
100
101 r = rados_create(&(rbd_data->cluster), o->client_name);
102 if (r < 0) {
103 log_err("rados_create failed.\n");
104 goto failed_early;
105 }
106
107 r = rados_conf_read_file(rbd_data->cluster, NULL);
108 if (r < 0) {
109 log_err("rados_conf_read_file failed.\n");
110 goto failed_early;
111 }
112
113 r = rados_connect(rbd_data->cluster);
114 if (r < 0) {
115 log_err("rados_connect failed.\n");
116 goto failed_shutdown;
117 }
118
119 r = rados_ioctx_create(rbd_data->cluster, o->pool_name,
120 &(rbd_data->io_ctx));
121 if (r < 0) {
122 log_err("rados_ioctx_create failed.\n");
123 goto failed_shutdown;
124 }
125
126 r = rbd_open(rbd_data->io_ctx, o->rbd_name, &(rbd_data->image),
127 NULL /*snap */ );
128 if (r < 0) {
129 log_err("rbd_open failed.\n");
130 goto failed_open;
131 }
132 return 0;
133
134failed_open:
135 rados_ioctx_destroy(rbd_data->io_ctx);
Xan Peng94950b92014-05-19 09:39:00 -0600136 rbd_data->io_ctx = NULL;
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100137failed_shutdown:
138 rados_shutdown(rbd_data->cluster);
Xan Peng94950b92014-05-19 09:39:00 -0600139 rbd_data->cluster = NULL;
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100140failed_early:
141 return 1;
142}
143
144static void _fio_rbd_disconnect(struct rbd_data *rbd_data)
145{
146 if (!rbd_data)
147 return;
148
149 /* shutdown everything */
150 if (rbd_data->image) {
151 rbd_close(rbd_data->image);
152 rbd_data->image = NULL;
153 }
154
155 if (rbd_data->io_ctx) {
156 rados_ioctx_destroy(rbd_data->io_ctx);
157 rbd_data->io_ctx = NULL;
158 }
159
160 if (rbd_data->cluster) {
161 rados_shutdown(rbd_data->cluster);
162 rbd_data->cluster = NULL;
163 }
164}
165
166static void _fio_rbd_finish_write_aiocb(rbd_completion_t comp, void *data)
167{
168 struct io_u *io_u = (struct io_u *)data;
169 struct fio_rbd_iou *fio_rbd_iou =
170 (struct fio_rbd_iou *)io_u->engine_data;
171
172 fio_rbd_iou->io_complete = 1;
173
174 /* if write needs to be verified - we should not release comp here
175 without fetching the result */
176
177 rbd_aio_release(comp);
178 /* TODO handle error */
179
180 return;
181}
182
183static void _fio_rbd_finish_read_aiocb(rbd_completion_t comp, void *data)
184{
185 struct io_u *io_u = (struct io_u *)data;
186 struct fio_rbd_iou *fio_rbd_iou =
187 (struct fio_rbd_iou *)io_u->engine_data;
188
189 fio_rbd_iou->io_complete = 1;
190
191 /* if read needs to be verified - we should not release comp here
192 without fetching the result */
193 rbd_aio_release(comp);
194
195 /* TODO handle error */
196
197 return;
198}
199
Haomai Wang3f0151b2014-05-21 16:30:55 +0800200static void _fio_rbd_finish_sync_aiocb(rbd_completion_t comp, void *data)
201{
202 struct io_u *io_u = (struct io_u *)data;
203 struct fio_rbd_iou *fio_rbd_iou =
204 (struct fio_rbd_iou *)io_u->engine_data;
205
206 fio_rbd_iou->io_complete = 1;
207
208 /* if sync needs to be verified - we should not release comp here
209 without fetching the result */
210 rbd_aio_release(comp);
211
212 /* TODO handle error */
213
214 return;
215}
216
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100217static struct io_u *fio_rbd_event(struct thread_data *td, int event)
218{
219 struct rbd_data *rbd_data = td->io_ops->data;
220
221 return rbd_data->aio_events[event];
222}
223
224static int fio_rbd_getevents(struct thread_data *td, unsigned int min,
225 unsigned int max, struct timespec *t)
226{
227 struct rbd_data *rbd_data = td->io_ops->data;
228 unsigned int events = 0;
229 struct io_u *io_u;
230 int i;
231 struct fio_rbd_iou *fov;
232
233 do {
234 io_u_qiter(&td->io_u_all, io_u, i) {
235 if (!(io_u->flags & IO_U_F_FLIGHT))
236 continue;
237
238 fov = (struct fio_rbd_iou *)io_u->engine_data;
239
240 if (fov->io_complete) {
241 fov->io_complete = 0;
242 rbd_data->aio_events[events] = io_u;
243 events++;
244 }
245
246 }
247 if (events < min)
248 usleep(100);
249 else
250 break;
251
252 } while (1);
253
254 return events;
255}
256
257static int fio_rbd_queue(struct thread_data *td, struct io_u *io_u)
258{
259 int r = -1;
260 struct rbd_data *rbd_data = td->io_ops->data;
261 rbd_completion_t comp;
262
263 fio_ro_check(td, io_u);
264
265 if (io_u->ddir == DDIR_WRITE) {
266 r = rbd_aio_create_completion(io_u,
267 (rbd_callback_t)
268 _fio_rbd_finish_write_aiocb,
269 &comp);
270 if (r < 0) {
271 log_err
272 ("rbd_aio_create_completion for DDIR_WRITE failed.\n");
273 goto failed;
274 }
275
276 r = rbd_aio_write(rbd_data->image, io_u->offset,
277 io_u->xfer_buflen, io_u->xfer_buf, comp);
278 if (r < 0) {
279 log_err("rbd_aio_write failed.\n");
280 goto failed;
281 }
282
283 } else if (io_u->ddir == DDIR_READ) {
284 r = rbd_aio_create_completion(io_u,
285 (rbd_callback_t)
286 _fio_rbd_finish_read_aiocb,
287 &comp);
288 if (r < 0) {
289 log_err
290 ("rbd_aio_create_completion for DDIR_READ failed.\n");
291 goto failed;
292 }
293
294 r = rbd_aio_read(rbd_data->image, io_u->offset,
295 io_u->xfer_buflen, io_u->xfer_buf, comp);
296
297 if (r < 0) {
298 log_err("rbd_aio_read failed.\n");
299 goto failed;
300 }
301
302 } else if (io_u->ddir == DDIR_SYNC) {
Haomai Wang3f0151b2014-05-21 16:30:55 +0800303 r = rbd_aio_create_completion(io_u,
304 (rbd_callback_t)
305 _fio_rbd_finish_sync_aiocb,
306 &comp);
307 if (r < 0) {
308 log_err
309 ("rbd_aio_create_completion for DDIR_SYNC failed.\n");
310 goto failed;
311 }
312
313 r = rbd_aio_flush(rbd_data->image, comp);
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100314 if (r < 0) {
315 log_err("rbd_flush failed.\n");
316 goto failed;
317 }
318
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100319 } else {
320 dprint(FD_IO, "%s: Warning: unhandled ddir: %d\n", __func__,
321 io_u->ddir);
322 return FIO_Q_COMPLETED;
323 }
324
325 return FIO_Q_QUEUED;
326
327failed:
328 io_u->error = r;
329 td_verror(td, io_u->error, "xfer");
330 return FIO_Q_COMPLETED;
331}
332
333static int fio_rbd_init(struct thread_data *td)
334{
335 int r;
336
337 r = _fio_rbd_connect(td);
338 if (r) {
339 log_err("fio_rbd_connect failed, return code: %d .\n", r);
340 goto failed;
341 }
342
343 return 0;
344
345failed:
346 return 1;
347
348}
349
350static void fio_rbd_cleanup(struct thread_data *td)
351{
352 struct rbd_data *rbd_data = td->io_ops->data;
353
354 if (rbd_data) {
355 _fio_rbd_disconnect(rbd_data);
356 free(rbd_data->aio_events);
357 free(rbd_data);
358 }
359
360}
361
362static int fio_rbd_setup(struct thread_data *td)
363{
364 int r = 0;
365 rbd_image_info_t info;
366 struct fio_file *f;
367 struct rbd_data *rbd_data = NULL;
368 int major, minor, extra;
369
370 /* log version of librbd. No cluster connection required. */
371 rbd_version(&major, &minor, &extra);
372 log_info("rbd engine: RBD version: %d.%d.%d\n", major, minor, extra);
373
374 /* allocate engine specific structure to deal with librbd. */
375 r = _fio_setup_rbd_data(td, &rbd_data);
376 if (r) {
377 log_err("fio_setup_rbd_data failed.\n");
378 goto cleanup;
379 }
380 td->io_ops->data = rbd_data;
381
382 /* librbd does not allow us to run first in the main thread and later in a
383 * fork child. It needs to be the same process context all the time.
384 */
385 td->o.use_thread = 1;
386
387 /* connect in the main thread to determine to determine
388 * the size of the given RADOS block device. And disconnect
389 * later on.
390 */
391 r = _fio_rbd_connect(td);
392 if (r) {
393 log_err("fio_rbd_connect failed.\n");
394 goto cleanup;
395 }
396
397 /* get size of the RADOS block device */
398 r = rbd_stat(rbd_data->image, &info, sizeof(info));
399 if (r < 0) {
400 log_err("rbd_status failed.\n");
401 goto disconnect;
402 }
403 dprint(FD_IO, "rbd-engine: image size: %lu\n", info.size);
404
405 /* taken from "net" engine. Pretend we deal with files,
406 * even if we do not have any ideas about files.
407 * The size of the RBD is set instead of a artificial file.
408 */
409 if (!td->files_index) {
Jens Axboe5903e7b2014-02-26 13:42:13 -0800410 add_file(td, td->o.filename ? : "rbd", 0, 0);
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100411 td->o.nr_files = td->o.nr_files ? : 1;
Jens Axboeb53f2c52014-04-08 21:07:12 -0600412 td->o.open_files++;
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100413 }
414 f = td->files[0];
415 f->real_file_size = info.size;
416
417 /* disconnect, then we were only connected to determine
418 * the size of the RBD.
419 */
420 _fio_rbd_disconnect(rbd_data);
421 return 0;
422
423disconnect:
424 _fio_rbd_disconnect(rbd_data);
425cleanup:
426 fio_rbd_cleanup(td);
427 return r;
428}
429
430static int fio_rbd_open(struct thread_data *td, struct fio_file *f)
431{
432 return 0;
433}
434
Jens Axboe1be9f212014-05-19 19:57:05 -0600435static int fio_rbd_invalidate(struct thread_data *td, struct fio_file *f)
436{
437 return 0;
438}
439
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100440static void fio_rbd_io_u_free(struct thread_data *td, struct io_u *io_u)
441{
442 struct fio_rbd_iou *o = io_u->engine_data;
443
444 if (o) {
445 io_u->engine_data = NULL;
446 free(o);
447 }
448}
449
450static int fio_rbd_io_u_init(struct thread_data *td, struct io_u *io_u)
451{
452 struct fio_rbd_iou *o;
453
454 o = malloc(sizeof(*o));
455 o->io_complete = 0;
456 o->io_u = io_u;
457 io_u->engine_data = o;
458 return 0;
459}
460
Jens Axboe10aa1362014-04-01 21:10:36 -0600461static struct ioengine_ops ioengine = {
Jens Axboe1be9f212014-05-19 19:57:05 -0600462 .name = "rbd",
463 .version = FIO_IOOPS_VERSION,
464 .setup = fio_rbd_setup,
465 .init = fio_rbd_init,
466 .queue = fio_rbd_queue,
467 .getevents = fio_rbd_getevents,
468 .event = fio_rbd_event,
469 .cleanup = fio_rbd_cleanup,
470 .open_file = fio_rbd_open,
471 .invalidate = fio_rbd_invalidate,
472 .options = options,
473 .io_u_init = fio_rbd_io_u_init,
474 .io_u_free = fio_rbd_io_u_free,
475 .option_struct_size = sizeof(struct rbd_options),
Daniel Gollubfc5c0342014-02-17 14:35:28 +0100476};
477
478static void fio_init fio_rbd_register(void)
479{
480 register_ioengine(&ioengine);
481}
482
483static void fio_exit fio_rbd_unregister(void)
484{
485 unregister_ioengine(&ioengine);
486}