blob: 8bff6bbfece227ffc70d182956c52c2c9f2a743b [file] [log] [blame]
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -08001/*
Dennis Dalessandrofe314192016-01-22 13:04:58 -08002 * Copyright(c) 2016 Intel Corporation.
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -08003 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48#include <linux/slab.h>
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -080049#include <linux/vmalloc.h>
50#include <rdma/ib_umem.h>
51#include <rdma/rdma_vt.h>
52#include "vt.h"
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -080053#include "mr.h"
54
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -080055/*
56 * Do any intilization needed when a driver registers with rdmavt.
57 */
58int rvt_driver_mr_init(struct rvt_dev_info *rdi)
59{
60 unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
61 unsigned lk_tab_size;
62 int i;
63
64 if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) {
65 rvt_pr_info(rdi, "Driver is doing MR init.\n");
66 return 0;
67 }
68
69 /*
70 * The top hfi1_lkey_table_size bits are used to index the
71 * table. The lower 8 bits can be owned by the user (copied from
72 * the LKEY). The remaining bits act as a generation number or tag.
73 */
74 if (!lkey_table_size)
75 return -EINVAL;
76
77 spin_lock_init(&rdi->lkey_table.lock);
78
79 rdi->lkey_table.max = 1 << lkey_table_size;
80
81 /* ensure generation is at least 4 bits */
82 if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
83 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
84 lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
85 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
86 lkey_table_size = rdi->dparms.lkey_table_size;
87 }
88 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
89 rdi->lkey_table.table = (struct rvt_mregion __rcu **)
Mitko Haralanovd1b697b2016-02-03 14:14:54 -080090 vmalloc_node(lk_tab_size, rdi->dparms.node);
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -080091 if (!rdi->lkey_table.table)
92 return -ENOMEM;
93
94 RCU_INIT_POINTER(rdi->dma_mr, NULL);
95 for (i = 0; i < rdi->lkey_table.max; i++)
96 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
97
98 return 0;
99}
100
101/*
102 * called when drivers have unregistered or perhaps failed to register with us
103 */
104void rvt_mr_exit(struct rvt_dev_info *rdi)
105{
106 if (rdi->dma_mr)
107 rvt_pr_err(rdi, "DMA MR not null!\n");
108
109 vfree(rdi->lkey_table.table);
110}
111
112static void rvt_deinit_mregion(struct rvt_mregion *mr)
113{
114 int i = mr->mapsz;
115
116 mr->mapsz = 0;
117 while (i)
118 kfree(mr->map[--i]);
119}
120
121static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
122 int count)
123{
124 int m, i = 0;
125
126 mr->mapsz = 0;
127 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
128 for (; i < m; i++) {
129 mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
130 if (!mr->map[i]) {
131 rvt_deinit_mregion(mr);
132 return -ENOMEM;
133 }
134 mr->mapsz++;
135 }
136 init_completion(&mr->comp);
137 /* count returning the ptr to user */
138 atomic_set(&mr->refcount, 1);
139 mr->pd = pd;
140 mr->max_segs = count;
141 return 0;
142}
143
144/**
145 * rvt_alloc_lkey - allocate an lkey
146 * @mr: memory region that this lkey protects
147 * @dma_region: 0->normal key, 1->restricted DMA key
148 *
149 * Returns 0 if successful, otherwise returns -errno.
150 *
151 * Increments mr reference count as required.
152 *
153 * Sets the lkey field mr for non-dma regions.
154 *
155 */
156static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
157{
158 unsigned long flags;
159 u32 r;
160 u32 n;
161 int ret = 0;
162 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
163 struct rvt_lkey_table *rkt = &dev->lkey_table;
164
165 rvt_get_mr(mr);
166 spin_lock_irqsave(&rkt->lock, flags);
167
168 /* special case for dma_mr lkey == 0 */
169 if (dma_region) {
170 struct rvt_mregion *tmr;
171
172 tmr = rcu_access_pointer(dev->dma_mr);
173 if (!tmr) {
174 rcu_assign_pointer(dev->dma_mr, mr);
175 mr->lkey_published = 1;
176 } else {
177 rvt_put_mr(mr);
178 }
179 goto success;
180 }
181
182 /* Find the next available LKEY */
183 r = rkt->next;
184 n = r;
185 for (;;) {
186 if (!rcu_access_pointer(rkt->table[r]))
187 break;
188 r = (r + 1) & (rkt->max - 1);
189 if (r == n)
190 goto bail;
191 }
192 rkt->next = (r + 1) & (rkt->max - 1);
193 /*
194 * Make sure lkey is never zero which is reserved to indicate an
195 * unrestricted LKEY.
196 */
197 rkt->gen++;
198 /*
199 * bits are capped to ensure enough bits for generation number
200 */
201 mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
202 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
203 << 8);
204 if (mr->lkey == 0) {
205 mr->lkey |= 1 << 8;
206 rkt->gen++;
207 }
208 rcu_assign_pointer(rkt->table[r], mr);
209 mr->lkey_published = 1;
210success:
211 spin_unlock_irqrestore(&rkt->lock, flags);
212out:
213 return ret;
214bail:
215 rvt_put_mr(mr);
216 spin_unlock_irqrestore(&rkt->lock, flags);
217 ret = -ENOMEM;
218 goto out;
219}
220
221/**
222 * rvt_free_lkey - free an lkey
223 * @mr: mr to free from tables
224 */
225static void rvt_free_lkey(struct rvt_mregion *mr)
226{
227 unsigned long flags;
228 u32 lkey = mr->lkey;
229 u32 r;
230 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
231 struct rvt_lkey_table *rkt = &dev->lkey_table;
232 int freed = 0;
233
234 spin_lock_irqsave(&rkt->lock, flags);
235 if (!mr->lkey_published)
236 goto out;
237 if (lkey == 0) {
238 RCU_INIT_POINTER(dev->dma_mr, NULL);
239 } else {
240 r = lkey >> (32 - dev->dparms.lkey_table_size);
241 RCU_INIT_POINTER(rkt->table[r], NULL);
242 }
243 mr->lkey_published = 0;
244 freed++;
245out:
246 spin_unlock_irqrestore(&rkt->lock, flags);
247 if (freed) {
248 synchronize_rcu();
249 rvt_put_mr(mr);
250 }
251}
252
253static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
254{
255 struct rvt_mr *mr;
256 int rval = -ENOMEM;
257 int m;
258
259 /* Allocate struct plus pointers to first level page tables. */
260 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
261 mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
262 if (!mr)
263 goto bail;
264
265 rval = rvt_init_mregion(&mr->mr, pd, count);
266 if (rval)
267 goto bail;
268 /*
269 * ib_reg_phys_mr() will initialize mr->ibmr except for
270 * lkey and rkey.
271 */
272 rval = rvt_alloc_lkey(&mr->mr, 0);
273 if (rval)
274 goto bail_mregion;
275 mr->ibmr.lkey = mr->mr.lkey;
276 mr->ibmr.rkey = mr->mr.lkey;
277done:
278 return mr;
279
280bail_mregion:
281 rvt_deinit_mregion(&mr->mr);
282bail:
283 kfree(mr);
284 mr = ERR_PTR(rval);
285 goto done;
286}
287
288static void __rvt_free_mr(struct rvt_mr *mr)
289{
290 rvt_deinit_mregion(&mr->mr);
291 rvt_free_lkey(&mr->mr);
292 vfree(mr);
293}
294
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800295/**
296 * rvt_get_dma_mr - get a DMA memory region
297 * @pd: protection domain for this memory region
298 * @acc: access flags
299 *
300 * Returns the memory region on success, otherwise returns an errno.
301 * Note that all DMA addresses should be created via the
302 * struct ib_dma_mapping_ops functions (see dma.c).
303 */
304struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
305{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800306 struct rvt_mr *mr;
307 struct ib_mr *ret;
308 int rval;
309
310 if (ibpd_to_rvtpd(pd)->user)
311 return ERR_PTR(-EPERM);
312
313 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
314 if (!mr) {
315 ret = ERR_PTR(-ENOMEM);
316 goto bail;
317 }
318
319 rval = rvt_init_mregion(&mr->mr, pd, 0);
320 if (rval) {
321 ret = ERR_PTR(rval);
322 goto bail;
323 }
324
325 rval = rvt_alloc_lkey(&mr->mr, 1);
326 if (rval) {
327 ret = ERR_PTR(rval);
328 goto bail_mregion;
329 }
330
331 mr->mr.access_flags = acc;
332 ret = &mr->ibmr;
333done:
334 return ret;
335
336bail_mregion:
337 rvt_deinit_mregion(&mr->mr);
338bail:
339 kfree(mr);
340 goto done;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800341}
342
343/**
344 * rvt_reg_user_mr - register a userspace memory region
345 * @pd: protection domain for this memory region
346 * @start: starting userspace address
347 * @length: length of region to register
348 * @mr_access_flags: access flags for this memory region
349 * @udata: unused by the driver
350 *
351 * Returns the memory region on success, otherwise returns an errno.
352 */
353struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
354 u64 virt_addr, int mr_access_flags,
355 struct ib_udata *udata)
356{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800357 struct rvt_mr *mr;
358 struct ib_umem *umem;
359 struct scatterlist *sg;
360 int n, m, entry;
361 struct ib_mr *ret;
362
363 if (length == 0)
364 return ERR_PTR(-EINVAL);
365
366 umem = ib_umem_get(pd->uobject->context, start, length,
367 mr_access_flags, 0);
368 if (IS_ERR(umem))
369 return (void *)umem;
370
371 n = umem->nmap;
372
373 mr = __rvt_alloc_mr(n, pd);
374 if (IS_ERR(mr)) {
375 ret = (struct ib_mr *)mr;
376 goto bail_umem;
377 }
378
379 mr->mr.user_base = start;
380 mr->mr.iova = virt_addr;
381 mr->mr.length = length;
382 mr->mr.offset = ib_umem_offset(umem);
383 mr->mr.access_flags = mr_access_flags;
384 mr->umem = umem;
385
386 if (is_power_of_2(umem->page_size))
387 mr->mr.page_shift = ilog2(umem->page_size);
388 m = 0;
389 n = 0;
390 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
391 void *vaddr;
392
393 vaddr = page_address(sg_page(sg));
394 if (!vaddr) {
395 ret = ERR_PTR(-EINVAL);
396 goto bail_inval;
397 }
398 mr->mr.map[m]->segs[n].vaddr = vaddr;
399 mr->mr.map[m]->segs[n].length = umem->page_size;
400 n++;
401 if (n == RVT_SEGSZ) {
402 m++;
403 n = 0;
404 }
405 }
406 return &mr->ibmr;
407
408bail_inval:
409 __rvt_free_mr(mr);
410
411bail_umem:
412 ib_umem_release(umem);
413
414 return ret;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800415}
416
417/**
418 * rvt_dereg_mr - unregister and free a memory region
419 * @ibmr: the memory region to free
420 *
421 * Returns 0 on success.
422 *
423 * Note that this is called to free MRs created by rvt_get_dma_mr()
424 * or rvt_reg_user_mr().
425 */
426int rvt_dereg_mr(struct ib_mr *ibmr)
427{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800428 struct rvt_mr *mr = to_imr(ibmr);
429 struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
430 int ret = 0;
431 unsigned long timeout;
432
433 rvt_free_lkey(&mr->mr);
434
435 rvt_put_mr(&mr->mr); /* will set completion if last */
436 timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
437 if (!timeout) {
438 rvt_pr_err(rdi,
439 "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
440 mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
441 rvt_get_mr(&mr->mr);
442 ret = -EBUSY;
443 goto out;
444 }
445 rvt_deinit_mregion(&mr->mr);
446 if (mr->umem)
447 ib_umem_release(mr->umem);
448 kfree(mr);
449out:
450 return ret;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800451}
452
453/**
454 * rvt_alloc_mr - Allocate a memory region usable with the
455 * @pd: protection domain for this memory region
456 * @mr_type: mem region type
457 * @max_num_sg: Max number of segments allowed
458 *
459 * Return the memory region on success, otherwise return an errno.
460 */
461struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
462 enum ib_mr_type mr_type,
463 u32 max_num_sg)
464{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800465 struct rvt_mr *mr;
466
467 if (mr_type != IB_MR_TYPE_MEM_REG)
468 return ERR_PTR(-EINVAL);
469
470 mr = __rvt_alloc_mr(max_num_sg, pd);
471 if (IS_ERR(mr))
472 return (struct ib_mr *)mr;
473
474 return &mr->ibmr;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800475}
476
477/**
478 * rvt_alloc_fmr - allocate a fast memory region
479 * @pd: the protection domain for this memory region
480 * @mr_access_flags: access flags for this memory region
481 * @fmr_attr: fast memory region attributes
482 *
483 * Returns the memory region on success, otherwise returns an errno.
484 */
485struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
486 struct ib_fmr_attr *fmr_attr)
487{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800488 struct rvt_fmr *fmr;
489 int m;
490 struct ib_fmr *ret;
491 int rval = -ENOMEM;
492
493 /* Allocate struct plus pointers to first level page tables. */
494 m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
495 fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
496 if (!fmr)
497 goto bail;
498
499 rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
500 if (rval)
501 goto bail;
502
503 /*
504 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
505 * rkey.
506 */
507 rval = rvt_alloc_lkey(&fmr->mr, 0);
508 if (rval)
509 goto bail_mregion;
510 fmr->ibfmr.rkey = fmr->mr.lkey;
511 fmr->ibfmr.lkey = fmr->mr.lkey;
512 /*
513 * Resources are allocated but no valid mapping (RKEY can't be
514 * used).
515 */
516 fmr->mr.access_flags = mr_access_flags;
517 fmr->mr.max_segs = fmr_attr->max_pages;
518 fmr->mr.page_shift = fmr_attr->page_shift;
519
520 ret = &fmr->ibfmr;
521done:
522 return ret;
523
524bail_mregion:
525 rvt_deinit_mregion(&fmr->mr);
526bail:
527 kfree(fmr);
528 ret = ERR_PTR(rval);
529 goto done;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800530}
531
532/**
533 * rvt_map_phys_fmr - set up a fast memory region
534 * @ibmfr: the fast memory region to set up
535 * @page_list: the list of pages to associate with the fast memory region
536 * @list_len: the number of pages to associate with the fast memory region
537 * @iova: the virtual address of the start of the fast memory region
538 *
539 * This may be called from interrupt context.
540 */
541
542int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
543 int list_len, u64 iova)
544{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800545 struct rvt_fmr *fmr = to_ifmr(ibfmr);
546 struct rvt_lkey_table *rkt;
547 unsigned long flags;
548 int m, n, i;
549 u32 ps;
550 struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
551
552 i = atomic_read(&fmr->mr.refcount);
553 if (i > 2)
554 return -EBUSY;
555
556 if (list_len > fmr->mr.max_segs)
557 return -EINVAL;
558
559 rkt = &rdi->lkey_table;
560 spin_lock_irqsave(&rkt->lock, flags);
561 fmr->mr.user_base = iova;
562 fmr->mr.iova = iova;
563 ps = 1 << fmr->mr.page_shift;
564 fmr->mr.length = list_len * ps;
565 m = 0;
566 n = 0;
567 for (i = 0; i < list_len; i++) {
568 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
569 fmr->mr.map[m]->segs[n].length = ps;
570 if (++n == RVT_SEGSZ) {
571 m++;
572 n = 0;
573 }
574 }
575 spin_unlock_irqrestore(&rkt->lock, flags);
576 return 0;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800577}
578
579/**
580 * rvt_unmap_fmr - unmap fast memory regions
581 * @fmr_list: the list of fast memory regions to unmap
582 *
583 * Returns 0 on success.
584 */
585int rvt_unmap_fmr(struct list_head *fmr_list)
586{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800587 struct rvt_fmr *fmr;
588 struct rvt_lkey_table *rkt;
589 unsigned long flags;
590 struct rvt_dev_info *rdi;
591
592 list_for_each_entry(fmr, fmr_list, ibfmr.list) {
593 rdi = ib_to_rvt(fmr->ibfmr.device);
594 rkt = &rdi->lkey_table;
595 spin_lock_irqsave(&rkt->lock, flags);
596 fmr->mr.user_base = 0;
597 fmr->mr.iova = 0;
598 fmr->mr.length = 0;
599 spin_unlock_irqrestore(&rkt->lock, flags);
600 }
601 return 0;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800602}
603
604/**
605 * rvt_dealloc_fmr - deallocate a fast memory region
606 * @ibfmr: the fast memory region to deallocate
607 *
608 * Returns 0 on success.
609 */
610int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
611{
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800612 struct rvt_fmr *fmr = to_ifmr(ibfmr);
613 int ret = 0;
614 unsigned long timeout;
615
616 rvt_free_lkey(&fmr->mr);
617 rvt_put_mr(&fmr->mr); /* will set completion if last */
618 timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
619 if (!timeout) {
620 rvt_get_mr(&fmr->mr);
621 ret = -EBUSY;
622 goto out;
623 }
624 rvt_deinit_mregion(&fmr->mr);
625 kfree(fmr);
626out:
627 return ret;
Dennis Dalessandro2a055eb2016-01-06 09:57:21 -0800628}
Dennis Dalessandro7b1e2092016-01-06 10:03:31 -0800629
630/**
631 * rvt_lkey_ok - check IB SGE for validity and initialize
632 * @rkt: table containing lkey to check SGE against
633 * @pd: protection domain
634 * @isge: outgoing internal SGE
635 * @sge: SGE to check
636 * @acc: access flags
637 *
638 * Return 1 if valid and successful, otherwise returns 0.
639 *
640 * increments the reference count upon success
641 *
642 * Check the IB SGE for validity and initialize our internal version
643 * of it.
644 */
645int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
646 struct rvt_sge *isge, struct ib_sge *sge, int acc)
647{
648 struct rvt_mregion *mr;
649 unsigned n, m;
650 size_t off;
651 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
652
653 /*
654 * We use LKEY == zero for kernel virtual addresses
655 * (see rvt_get_dma_mr and dma.c).
656 */
657 rcu_read_lock();
658 if (sge->lkey == 0) {
659 if (pd->user)
660 goto bail;
661 mr = rcu_dereference(dev->dma_mr);
662 if (!mr)
663 goto bail;
664 atomic_inc(&mr->refcount);
665 rcu_read_unlock();
666
667 isge->mr = mr;
668 isge->vaddr = (void *)sge->addr;
669 isge->length = sge->length;
670 isge->sge_length = sge->length;
671 isge->m = 0;
672 isge->n = 0;
673 goto ok;
674 }
675 mr = rcu_dereference(
676 rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
677 if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
678 goto bail;
679
680 off = sge->addr - mr->user_base;
681 if (unlikely(sge->addr < mr->user_base ||
682 off + sge->length > mr->length ||
683 (mr->access_flags & acc) != acc))
684 goto bail;
685 atomic_inc(&mr->refcount);
686 rcu_read_unlock();
687
688 off += mr->offset;
689 if (mr->page_shift) {
690 /*
691 * page sizes are uniform power of 2 so no loop is necessary
692 * entries_spanned_by_off is the number of times the loop below
693 * would have executed.
694 */
695 size_t entries_spanned_by_off;
696
697 entries_spanned_by_off = off >> mr->page_shift;
698 off -= (entries_spanned_by_off << mr->page_shift);
699 m = entries_spanned_by_off / RVT_SEGSZ;
700 n = entries_spanned_by_off % RVT_SEGSZ;
701 } else {
702 m = 0;
703 n = 0;
704 while (off >= mr->map[m]->segs[n].length) {
705 off -= mr->map[m]->segs[n].length;
706 n++;
707 if (n >= RVT_SEGSZ) {
708 m++;
709 n = 0;
710 }
711 }
712 }
713 isge->mr = mr;
714 isge->vaddr = mr->map[m]->segs[n].vaddr + off;
715 isge->length = mr->map[m]->segs[n].length - off;
716 isge->sge_length = sge->length;
717 isge->m = m;
718 isge->n = n;
719ok:
720 return 1;
721bail:
722 rcu_read_unlock();
723 return 0;
724}
725EXPORT_SYMBOL(rvt_lkey_ok);
726
727/**
728 * rvt_rkey_ok - check the IB virtual address, length, and RKEY
729 * @qp: qp for validation
730 * @sge: SGE state
731 * @len: length of data
732 * @vaddr: virtual address to place data
733 * @rkey: rkey to check
734 * @acc: access flags
735 *
736 * Return 1 if successful, otherwise 0.
737 *
738 * increments the reference count upon success
739 */
740int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
741 u32 len, u64 vaddr, u32 rkey, int acc)
742{
743 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
744 struct rvt_lkey_table *rkt = &dev->lkey_table;
745 struct rvt_mregion *mr;
746 unsigned n, m;
747 size_t off;
748
749 /*
750 * We use RKEY == zero for kernel virtual addresses
751 * (see rvt_get_dma_mr and dma.c).
752 */
753 rcu_read_lock();
754 if (rkey == 0) {
755 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
756 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
757
758 if (pd->user)
759 goto bail;
760 mr = rcu_dereference(rdi->dma_mr);
761 if (!mr)
762 goto bail;
763 atomic_inc(&mr->refcount);
764 rcu_read_unlock();
765
766 sge->mr = mr;
767 sge->vaddr = (void *)vaddr;
768 sge->length = len;
769 sge->sge_length = len;
770 sge->m = 0;
771 sge->n = 0;
772 goto ok;
773 }
774
775 mr = rcu_dereference(
776 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
777 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
778 goto bail;
779
780 off = vaddr - mr->iova;
781 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
782 (mr->access_flags & acc) == 0))
783 goto bail;
784 atomic_inc(&mr->refcount);
785 rcu_read_unlock();
786
787 off += mr->offset;
788 if (mr->page_shift) {
789 /*
790 * page sizes are uniform power of 2 so no loop is necessary
791 * entries_spanned_by_off is the number of times the loop below
792 * would have executed.
793 */
794 size_t entries_spanned_by_off;
795
796 entries_spanned_by_off = off >> mr->page_shift;
797 off -= (entries_spanned_by_off << mr->page_shift);
798 m = entries_spanned_by_off / RVT_SEGSZ;
799 n = entries_spanned_by_off % RVT_SEGSZ;
800 } else {
801 m = 0;
802 n = 0;
803 while (off >= mr->map[m]->segs[n].length) {
804 off -= mr->map[m]->segs[n].length;
805 n++;
806 if (n >= RVT_SEGSZ) {
807 m++;
808 n = 0;
809 }
810 }
811 }
812 sge->mr = mr;
813 sge->vaddr = mr->map[m]->segs[n].vaddr + off;
814 sge->length = mr->map[m]->segs[n].length - off;
815 sge->sge_length = len;
816 sge->m = m;
817 sge->n = n;
818ok:
819 return 1;
820bail:
821 rcu_read_unlock();
822 return 0;
823}
824EXPORT_SYMBOL(rvt_rkey_ok);