blob: 8438830352145084a60e4269cbdf5457f14ba132 [file] [log] [blame]
Mike Marshall274dcf52015-07-17 10:38:13 -04001/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6#include "protocol.h"
7#include "pvfs2-kernel.h"
8#include "pvfs2-bufmap.h"
9
10DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
11
Mike Marshall84d02152015-07-28 13:27:51 -040012static struct pvfs2_bufmap {
Mike Marshall274dcf52015-07-17 10:38:13 -040013 atomic_t refcnt;
14
15 int desc_size;
16 int desc_shift;
17 int desc_count;
18 int total_size;
19 int page_count;
20
21 struct page **page_array;
22 struct pvfs_bufmap_desc *desc_array;
23
24 /* array to track usage of buffer descriptors */
25 int *buffer_index_array;
26 spinlock_t buffer_index_lock;
27
28 /* array to track usage of buffer descriptors for readdir */
29 int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
30 spinlock_t readdir_index_lock;
31} *__pvfs2_bufmap;
32
33static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
34
35static void
36pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
37{
38 int i;
39
40 for (i = 0; i < bufmap->page_count; i++)
41 page_cache_release(bufmap->page_array[i]);
42}
43
44static void
45pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
46{
47 kfree(bufmap->page_array);
48 kfree(bufmap->desc_array);
49 kfree(bufmap->buffer_index_array);
50 kfree(bufmap);
51}
52
53struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
54{
55 struct pvfs2_bufmap *bufmap = NULL;
56
57 spin_lock(&pvfs2_bufmap_lock);
58 if (__pvfs2_bufmap) {
59 bufmap = __pvfs2_bufmap;
60 atomic_inc(&bufmap->refcnt);
61 }
62 spin_unlock(&pvfs2_bufmap_lock);
63 return bufmap;
64}
65
66void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
67{
68 if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
69 __pvfs2_bufmap = NULL;
70 spin_unlock(&pvfs2_bufmap_lock);
71
72 pvfs2_bufmap_unmap(bufmap);
73 pvfs2_bufmap_free(bufmap);
74 }
75}
76
77inline int pvfs_bufmap_size_query(void)
78{
79 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
80 int size = bufmap ? bufmap->desc_size : 0;
81
82 pvfs2_bufmap_unref(bufmap);
83 return size;
84}
85
86inline int pvfs_bufmap_shift_query(void)
87{
88 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
89 int shift = bufmap ? bufmap->desc_shift : 0;
90
91 pvfs2_bufmap_unref(bufmap);
92 return shift;
93}
94
95static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
97
98/*
99 * get_bufmap_init
100 *
101 * If bufmap_init is 1, then the shared memory system, including the
102 * buffer_index_array, is available. Otherwise, it is not.
103 *
104 * returns the value of bufmap_init
105 */
106int get_bufmap_init(void)
107{
108 return __pvfs2_bufmap ? 1 : 0;
109}
110
111
112static struct pvfs2_bufmap *
113pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
114{
115 struct pvfs2_bufmap *bufmap;
116
117 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
118 if (!bufmap)
119 goto out;
120
121 atomic_set(&bufmap->refcnt, 1);
122 bufmap->total_size = user_desc->total_size;
123 bufmap->desc_count = user_desc->count;
124 bufmap->desc_size = user_desc->size;
125 bufmap->desc_shift = ilog2(bufmap->desc_size);
126
127 spin_lock_init(&bufmap->buffer_index_lock);
128 bufmap->buffer_index_array =
129 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130 if (!bufmap->buffer_index_array) {
131 gossip_err("pvfs2: could not allocate %d buffer indices\n",
132 bufmap->desc_count);
133 goto out_free_bufmap;
134 }
135 spin_lock_init(&bufmap->readdir_index_lock);
136
137 bufmap->desc_array =
138 kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
139 GFP_KERNEL);
140 if (!bufmap->desc_array) {
141 gossip_err("pvfs2: could not allocate %d descriptors\n",
142 bufmap->desc_count);
143 goto out_free_index_array;
144 }
145
146 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
147
148 /* allocate storage to track our page mappings */
149 bufmap->page_array =
150 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151 if (!bufmap->page_array)
152 goto out_free_desc_array;
153
154 return bufmap;
155
156out_free_desc_array:
157 kfree(bufmap->desc_array);
158out_free_index_array:
159 kfree(bufmap->buffer_index_array);
160out_free_bufmap:
161 kfree(bufmap);
162out:
163 return NULL;
164}
165
166static int
167pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
168 struct PVFS_dev_map_desc *user_desc)
169{
170 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171 int offset = 0, ret, i;
172
173 /* map the pages */
174 down_write(&current->mm->mmap_sem);
175 ret = get_user_pages(current,
176 current->mm,
177 (unsigned long)user_desc->ptr,
178 bufmap->page_count,
179 1,
180 0,
181 bufmap->page_array,
182 NULL);
183 up_write(&current->mm->mmap_sem);
184
185 if (ret < 0)
186 return ret;
187
188 if (ret != bufmap->page_count) {
189 gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
190 bufmap->page_count, ret);
191
192 for (i = 0; i < ret; i++) {
193 SetPageError(bufmap->page_array[i]);
194 page_cache_release(bufmap->page_array[i]);
195 }
196 return -ENOMEM;
197 }
198
199 /*
200 * ideally we want to get kernel space pointers for each page, but
201 * we can't kmap that many pages at once if highmem is being used.
202 * so instead, we just kmap/kunmap the page address each time the
203 * kaddr is needed.
204 */
205 for (i = 0; i < bufmap->page_count; i++)
206 flush_dcache_page(bufmap->page_array[i]);
207
208 /* build a list of available descriptors */
209 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
210 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
211 bufmap->desc_array[i].array_count = pages_per_desc;
212 bufmap->desc_array[i].uaddr =
213 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
214 offset += pages_per_desc;
215 }
216
217 return 0;
218}
219
220/*
221 * pvfs_bufmap_initialize()
222 *
223 * initializes the mapped buffer interface
224 *
225 * returns 0 on success, -errno on failure
226 */
227int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
228{
229 struct pvfs2_bufmap *bufmap;
230 int ret = -EINVAL;
231
232 gossip_debug(GOSSIP_BUFMAP_DEBUG,
233 "pvfs_bufmap_initialize: called (ptr ("
234 "%p) sz (%d) cnt(%d).\n",
235 user_desc->ptr,
236 user_desc->size,
237 user_desc->count);
238
239 /*
240 * sanity check alignment and size of buffer that caller wants to
241 * work with
242 */
243 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
244 (unsigned long)user_desc->ptr) {
245 gossip_err("pvfs2 error: memory alignment (front). %p\n",
246 user_desc->ptr);
247 goto out;
248 }
249
250 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
251 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
252 gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
253 user_desc->ptr,
254 user_desc->total_size);
255 goto out;
256 }
257
258 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
259 gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
260 user_desc->total_size,
261 user_desc->size,
262 user_desc->count);
263 goto out;
264 }
265
266 if ((user_desc->size % PAGE_SIZE) != 0) {
267 gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
268 user_desc->size);
269 goto out;
270 }
271
272 ret = -ENOMEM;
273 bufmap = pvfs2_bufmap_alloc(user_desc);
274 if (!bufmap)
275 goto out;
276
277 ret = pvfs2_bufmap_map(bufmap, user_desc);
278 if (ret)
279 goto out_free_bufmap;
280
281
282 spin_lock(&pvfs2_bufmap_lock);
283 if (__pvfs2_bufmap) {
284 spin_unlock(&pvfs2_bufmap_lock);
285 gossip_err("pvfs2: error: bufmap already initialized.\n");
286 ret = -EALREADY;
287 goto out_unmap_bufmap;
288 }
289 __pvfs2_bufmap = bufmap;
290 spin_unlock(&pvfs2_bufmap_lock);
291
292 /*
293 * If there are operations in pvfs2_bufmap_init_waitq, wake them up.
294 * This scenario occurs when the client-core is restarted and I/O
295 * requests in the in-progress or waiting tables are restarted. I/O
296 * requests cannot be restarted until the shared memory system is
297 * completely re-initialized, so we put the I/O requests in this
298 * waitq until initialization has completed. NOTE: the I/O requests
299 * are also on a timer, so they don't wait forever just in case the
300 * client-core doesn't come back up.
301 */
302 wake_up_interruptible(&pvfs2_bufmap_init_waitq);
303
304 gossip_debug(GOSSIP_BUFMAP_DEBUG,
305 "pvfs_bufmap_initialize: exiting normally\n");
306 return 0;
307
308out_unmap_bufmap:
309 pvfs2_bufmap_unmap(bufmap);
310out_free_bufmap:
311 pvfs2_bufmap_free(bufmap);
312out:
313 return ret;
314}
315
316/*
317 * pvfs_bufmap_finalize()
318 *
319 * shuts down the mapped buffer interface and releases any resources
320 * associated with it
321 *
322 * no return value
323 */
324void pvfs_bufmap_finalize(void)
325{
326 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
327 BUG_ON(!__pvfs2_bufmap);
328 pvfs2_bufmap_unref(__pvfs2_bufmap);
329 gossip_debug(GOSSIP_BUFMAP_DEBUG,
330 "pvfs2_bufmap_finalize: exiting normally\n");
331}
332
333struct slot_args {
334 int slot_count;
335 int *slot_array;
336 spinlock_t *slot_lock;
337 wait_queue_head_t *slot_wq;
338};
339
340static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
341{
342 int ret = -1;
343 int i = 0;
344 DECLARE_WAITQUEUE(my_wait, current);
345
346
347 add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
348
349 while (1) {
350 set_current_state(TASK_INTERRUPTIBLE);
351
352 /*
353 * check for available desc, slot_lock is the appropriate
354 * index_lock
355 */
356 spin_lock(slargs->slot_lock);
357 for (i = 0; i < slargs->slot_count; i++)
358 if (slargs->slot_array[i] == 0) {
359 slargs->slot_array[i] = 1;
360 *buffer_index = i;
361 ret = 0;
362 break;
363 }
364 spin_unlock(slargs->slot_lock);
365
366 /* if we acquired a buffer, then break out of while */
367 if (ret == 0)
368 break;
369
370 if (!signal_pending(current)) {
371 int timeout =
372 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
373 gossip_debug(GOSSIP_BUFMAP_DEBUG,
374 "[BUFMAP]: waiting %d "
375 "seconds for a slot\n",
376 slot_timeout_secs);
377 if (!schedule_timeout(timeout)) {
378 gossip_debug(GOSSIP_BUFMAP_DEBUG,
379 "*** wait_for_a_slot timed out\n");
380 ret = -ETIMEDOUT;
381 break;
382 }
383 gossip_debug(GOSSIP_BUFMAP_DEBUG,
384 "[BUFMAP]: woken up by a slot becoming available.\n");
385 continue;
386 }
387
388 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
389 __func__);
390 ret = -EINTR;
391 break;
392 }
393
394 set_current_state(TASK_RUNNING);
395 remove_wait_queue(slargs->slot_wq, &my_wait);
396 return ret;
397}
398
399static void put_back_slot(struct slot_args *slargs, int buffer_index)
400{
401 /* slot_lock is the appropriate index_lock */
402 spin_lock(slargs->slot_lock);
403 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
404 spin_unlock(slargs->slot_lock);
405 return;
406 }
407
408 /* put the desc back on the queue */
409 slargs->slot_array[buffer_index] = 0;
410 spin_unlock(slargs->slot_lock);
411
412 /* wake up anyone who may be sleeping on the queue */
413 wake_up_interruptible(slargs->slot_wq);
414}
415
416/*
417 * pvfs_bufmap_get()
418 *
419 * gets a free mapped buffer descriptor, will sleep until one becomes
420 * available if necessary
421 *
422 * returns 0 on success, -errno on failure
423 */
424int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
425{
426 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
427 struct slot_args slargs;
428 int ret;
429
430 if (!bufmap) {
431 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
432 return -EIO;
433 }
434
435 slargs.slot_count = bufmap->desc_count;
436 slargs.slot_array = bufmap->buffer_index_array;
437 slargs.slot_lock = &bufmap->buffer_index_lock;
438 slargs.slot_wq = &bufmap_waitq;
439 ret = wait_for_a_slot(&slargs, buffer_index);
440 if (ret)
441 pvfs2_bufmap_unref(bufmap);
442 *mapp = bufmap;
443 return ret;
444}
445
446/*
447 * pvfs_bufmap_put()
448 *
449 * returns a mapped buffer descriptor to the collection
450 *
451 * no return value
452 */
453void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
454{
455 struct slot_args slargs;
456
457 slargs.slot_count = bufmap->desc_count;
458 slargs.slot_array = bufmap->buffer_index_array;
459 slargs.slot_lock = &bufmap->buffer_index_lock;
460 slargs.slot_wq = &bufmap_waitq;
461 put_back_slot(&slargs, buffer_index);
462 pvfs2_bufmap_unref(bufmap);
463}
464
465/*
466 * readdir_index_get()
467 *
468 * gets a free descriptor, will sleep until one becomes
469 * available if necessary.
470 * Although the readdir buffers are not mapped into kernel space
471 * we could do that at a later point of time. Regardless, these
472 * indices are used by the client-core.
473 *
474 * returns 0 on success, -errno on failure
475 */
476int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
477{
478 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
479 struct slot_args slargs;
480 int ret;
481
482 if (!bufmap) {
483 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
484 return -EIO;
485 }
486
487 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
488 slargs.slot_array = bufmap->readdir_index_array;
489 slargs.slot_lock = &bufmap->readdir_index_lock;
490 slargs.slot_wq = &readdir_waitq;
491 ret = wait_for_a_slot(&slargs, buffer_index);
492 if (ret)
493 pvfs2_bufmap_unref(bufmap);
494 *mapp = bufmap;
495 return ret;
496}
497
498void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
499{
500 struct slot_args slargs;
501
502 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
503 slargs.slot_array = bufmap->readdir_index_array;
504 slargs.slot_lock = &bufmap->readdir_index_lock;
505 slargs.slot_wq = &readdir_waitq;
506 put_back_slot(&slargs, buffer_index);
507 pvfs2_bufmap_unref(bufmap);
508}
509
Mike Marshall4d1c4402015-09-04 10:31:16 -0400510int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap,
Mike Marshall54804942015-10-05 13:44:24 -0400511 struct iov_iter *iter,
512 int buffer_index,
513 size_t size)
Mike Marshall274dcf52015-07-17 10:38:13 -0400514{
Mike Marshall274dcf52015-07-17 10:38:13 -0400515 struct pvfs_bufmap_desc *to;
Mike Marshall4d1c4402015-09-04 10:31:16 -0400516 struct page *page;
517 size_t copied;
518 int i;
Mike Marshall274dcf52015-07-17 10:38:13 -0400519
520 gossip_debug(GOSSIP_BUFMAP_DEBUG,
Mike Marshall4d1c4402015-09-04 10:31:16 -0400521 "%s: buffer_index:%d: size:%lu:\n",
522 __func__, buffer_index, size);
Mike Marshall274dcf52015-07-17 10:38:13 -0400523
524 to = &bufmap->desc_array[buffer_index];
525
Mike Marshall4d1c4402015-09-04 10:31:16 -0400526 for (i = 0; size; i++) {
527 page = to->page_array[i];
528 copied = copy_page_from_iter(page, 0, PAGE_SIZE, iter);
529 size -= copied;
530 if ((copied == 0) && (size))
531 break;
Mike Marshall274dcf52015-07-17 10:38:13 -0400532 }
533
Mike Marshall4d1c4402015-09-04 10:31:16 -0400534 return size ? -EFAULT : 0;
Mike Marshall274dcf52015-07-17 10:38:13 -0400535
Mike Marshall274dcf52015-07-17 10:38:13 -0400536}
537
538/*
Mike Marshall4d1c4402015-09-04 10:31:16 -0400539 * Iterate through the array of pages containing the bytes from
540 * a file being read.
Mike Marshall274dcf52015-07-17 10:38:13 -0400541 *
Mike Marshall274dcf52015-07-17 10:38:13 -0400542 */
Mike Marshall4d1c4402015-09-04 10:31:16 -0400543int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap,
544 struct iov_iter *iter,
Al Viro5c278222015-10-08 17:43:58 -0400545 int buffer_index,
546 size_t size)
Mike Marshall274dcf52015-07-17 10:38:13 -0400547{
Al Viro5c278222015-10-08 17:43:58 -0400548 struct pvfs_bufmap_desc *from = &bufmap->desc_array[buffer_index];
Mike Marshall4d1c4402015-09-04 10:31:16 -0400549 int i;
Mike Marshall274dcf52015-07-17 10:38:13 -0400550
551 gossip_debug(GOSSIP_BUFMAP_DEBUG,
Al Viro5c278222015-10-08 17:43:58 -0400552 "%s: buffer_index:%d: size:%zu:\n",
553 __func__, buffer_index, size);
Mike Marshall274dcf52015-07-17 10:38:13 -0400554
Mike Marshall274dcf52015-07-17 10:38:13 -0400555
Al Viro5c278222015-10-08 17:43:58 -0400556 for (i = 0; size; i++) {
557 struct page *page = from->page_array[i];
558 size_t n = size;
559 if (n > PAGE_SIZE)
560 n = PAGE_SIZE;
561 n = copy_page_to_iter(page, 0, n, iter);
562 if (!n)
563 return -EFAULT;
564 size -= n;
Mike Marshall274dcf52015-07-17 10:38:13 -0400565 }
Al Viro5c278222015-10-08 17:43:58 -0400566 return 0;
Mike Marshall274dcf52015-07-17 10:38:13 -0400567}