blob: e2daa8f02476eed8a091c8fe0372fc5b9bb9e7fb [file] [log] [blame]
Eli Cohene126ba92013-07-07 17:25:49 +03001/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <rdma/ib_umem.h>
39#include "mlx5_ib.h"
40
41enum {
42 DEF_CACHE_SIZE = 10,
43};
44
45static __be64 *mr_align(__be64 *ptr, int align)
46{
47 unsigned long mask = align - 1;
48
49 return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
50}
51
52static int order2idx(struct mlx5_ib_dev *dev, int order)
53{
54 struct mlx5_mr_cache *cache = &dev->cache;
55
56 if (order < cache->ent[0].order)
57 return 0;
58 else
59 return order - cache->ent[0].order;
60}
61
62static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
63{
64 struct device *ddev = dev->ib_dev.dma_device;
65 struct mlx5_mr_cache *cache = &dev->cache;
66 struct mlx5_cache_ent *ent = &cache->ent[c];
67 struct mlx5_create_mkey_mbox_in *in;
68 struct mlx5_ib_mr *mr;
69 int npages = 1 << ent->order;
70 int size = sizeof(u64) * npages;
71 int err = 0;
72 int i;
73
74 in = kzalloc(sizeof(*in), GFP_KERNEL);
75 if (!in)
76 return -ENOMEM;
77
78 for (i = 0; i < num; i++) {
79 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
80 if (!mr) {
81 err = -ENOMEM;
82 goto out;
83 }
84 mr->order = ent->order;
85 mr->umred = 1;
86 mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
87 if (!mr->pas) {
88 kfree(mr);
89 err = -ENOMEM;
90 goto out;
91 }
92 mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
93 DMA_TO_DEVICE);
94 if (dma_mapping_error(ddev, mr->dma)) {
95 kfree(mr->pas);
96 kfree(mr);
97 err = -ENOMEM;
98 goto out;
99 }
100
101 in->seg.status = 1 << 6;
102 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
103 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
104 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
105 in->seg.log2_page_size = 12;
106
107 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
108 sizeof(*in));
109 if (err) {
110 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
111 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
112 kfree(mr->pas);
113 kfree(mr);
114 goto out;
115 }
116 cache->last_add = jiffies;
117
118 spin_lock(&ent->lock);
119 list_add_tail(&mr->list, &ent->head);
120 ent->cur++;
121 ent->size++;
122 spin_unlock(&ent->lock);
123 }
124
125out:
126 kfree(in);
127 return err;
128}
129
130static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
131{
132 struct device *ddev = dev->ib_dev.dma_device;
133 struct mlx5_mr_cache *cache = &dev->cache;
134 struct mlx5_cache_ent *ent = &cache->ent[c];
135 struct mlx5_ib_mr *mr;
136 int size;
137 int err;
138 int i;
139
140 for (i = 0; i < num; i++) {
141 spin_lock(&ent->lock);
142 if (list_empty(&ent->head)) {
143 spin_unlock(&ent->lock);
144 return;
145 }
146 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
147 list_del(&mr->list);
148 ent->cur--;
149 ent->size--;
150 spin_unlock(&ent->lock);
151 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
152 if (err) {
153 mlx5_ib_warn(dev, "failed destroy mkey\n");
154 } else {
155 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
156 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
157 kfree(mr->pas);
158 kfree(mr);
159 }
160 }
161}
162
163static ssize_t size_write(struct file *filp, const char __user *buf,
164 size_t count, loff_t *pos)
165{
166 struct mlx5_cache_ent *ent = filp->private_data;
167 struct mlx5_ib_dev *dev = ent->dev;
168 char lbuf[20];
169 u32 var;
170 int err;
171 int c;
172
173 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
174 return -EPERM;
175
176 c = order2idx(dev, ent->order);
177 lbuf[sizeof(lbuf) - 1] = 0;
178
179 if (sscanf(lbuf, "%u", &var) != 1)
180 return -EINVAL;
181
182 if (var < ent->limit)
183 return -EINVAL;
184
185 if (var > ent->size) {
186 err = add_keys(dev, c, var - ent->size);
187 if (err)
188 return err;
189 } else if (var < ent->size) {
190 remove_keys(dev, c, ent->size - var);
191 }
192
193 return count;
194}
195
196static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
197 loff_t *pos)
198{
199 struct mlx5_cache_ent *ent = filp->private_data;
200 char lbuf[20];
201 int err;
202
203 if (*pos)
204 return 0;
205
206 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
207 if (err < 0)
208 return err;
209
210 if (copy_to_user(buf, lbuf, err))
211 return -EPERM;
212
213 *pos += err;
214
215 return err;
216}
217
218static const struct file_operations size_fops = {
219 .owner = THIS_MODULE,
220 .open = simple_open,
221 .write = size_write,
222 .read = size_read,
223};
224
225static ssize_t limit_write(struct file *filp, const char __user *buf,
226 size_t count, loff_t *pos)
227{
228 struct mlx5_cache_ent *ent = filp->private_data;
229 struct mlx5_ib_dev *dev = ent->dev;
230 char lbuf[20];
231 u32 var;
232 int err;
233 int c;
234
235 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
236 return -EPERM;
237
238 c = order2idx(dev, ent->order);
239 lbuf[sizeof(lbuf) - 1] = 0;
240
241 if (sscanf(lbuf, "%u", &var) != 1)
242 return -EINVAL;
243
244 if (var > ent->size)
245 return -EINVAL;
246
247 ent->limit = var;
248
249 if (ent->cur < ent->limit) {
250 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
251 if (err)
252 return err;
253 }
254
255 return count;
256}
257
258static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
259 loff_t *pos)
260{
261 struct mlx5_cache_ent *ent = filp->private_data;
262 char lbuf[20];
263 int err;
264
265 if (*pos)
266 return 0;
267
268 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
269 if (err < 0)
270 return err;
271
272 if (copy_to_user(buf, lbuf, err))
273 return -EPERM;
274
275 *pos += err;
276
277 return err;
278}
279
280static const struct file_operations limit_fops = {
281 .owner = THIS_MODULE,
282 .open = simple_open,
283 .write = limit_write,
284 .read = limit_read,
285};
286
287static int someone_adding(struct mlx5_mr_cache *cache)
288{
289 int i;
290
291 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
292 if (cache->ent[i].cur < cache->ent[i].limit)
293 return 1;
294 }
295
296 return 0;
297}
298
299static void __cache_work_func(struct mlx5_cache_ent *ent)
300{
301 struct mlx5_ib_dev *dev = ent->dev;
302 struct mlx5_mr_cache *cache = &dev->cache;
303 int i = order2idx(dev, ent->order);
304
305 if (cache->stopped)
306 return;
307
308 ent = &dev->cache.ent[i];
309 if (ent->cur < 2 * ent->limit) {
310 add_keys(dev, i, 1);
311 if (ent->cur < 2 * ent->limit)
312 queue_work(cache->wq, &ent->work);
313 } else if (ent->cur > 2 * ent->limit) {
314 if (!someone_adding(cache) &&
315 time_after(jiffies, cache->last_add + 60 * HZ)) {
316 remove_keys(dev, i, 1);
317 if (ent->cur > ent->limit)
318 queue_work(cache->wq, &ent->work);
319 } else {
320 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
321 }
322 }
323}
324
325static void delayed_cache_work_func(struct work_struct *work)
326{
327 struct mlx5_cache_ent *ent;
328
329 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
330 __cache_work_func(ent);
331}
332
333static void cache_work_func(struct work_struct *work)
334{
335 struct mlx5_cache_ent *ent;
336
337 ent = container_of(work, struct mlx5_cache_ent, work);
338 __cache_work_func(ent);
339}
340
341static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
342{
343 struct mlx5_mr_cache *cache = &dev->cache;
344 struct mlx5_ib_mr *mr = NULL;
345 struct mlx5_cache_ent *ent;
346 int c;
347 int i;
348
349 c = order2idx(dev, order);
350 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
351 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
352 return NULL;
353 }
354
355 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
356 ent = &cache->ent[i];
357
358 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
359
360 spin_lock(&ent->lock);
361 if (!list_empty(&ent->head)) {
362 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
363 list);
364 list_del(&mr->list);
365 ent->cur--;
366 spin_unlock(&ent->lock);
367 if (ent->cur < ent->limit)
368 queue_work(cache->wq, &ent->work);
369 break;
370 }
371 spin_unlock(&ent->lock);
372
373 queue_work(cache->wq, &ent->work);
374
375 if (mr)
376 break;
377 }
378
379 if (!mr)
380 cache->ent[c].miss++;
381
382 return mr;
383}
384
385static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
386{
387 struct mlx5_mr_cache *cache = &dev->cache;
388 struct mlx5_cache_ent *ent;
389 int shrink = 0;
390 int c;
391
392 c = order2idx(dev, mr->order);
393 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
394 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
395 return;
396 }
397 ent = &cache->ent[c];
398 spin_lock(&ent->lock);
399 list_add_tail(&mr->list, &ent->head);
400 ent->cur++;
401 if (ent->cur > 2 * ent->limit)
402 shrink = 1;
403 spin_unlock(&ent->lock);
404
405 if (shrink)
406 queue_work(cache->wq, &ent->work);
407}
408
409static void clean_keys(struct mlx5_ib_dev *dev, int c)
410{
411 struct device *ddev = dev->ib_dev.dma_device;
412 struct mlx5_mr_cache *cache = &dev->cache;
413 struct mlx5_cache_ent *ent = &cache->ent[c];
414 struct mlx5_ib_mr *mr;
415 int size;
416 int err;
417
418 while (1) {
419 spin_lock(&ent->lock);
420 if (list_empty(&ent->head)) {
421 spin_unlock(&ent->lock);
422 return;
423 }
424 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
425 list_del(&mr->list);
426 ent->cur--;
427 ent->size--;
428 spin_unlock(&ent->lock);
429 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
430 if (err) {
431 mlx5_ib_warn(dev, "failed destroy mkey\n");
432 } else {
433 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
434 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
435 kfree(mr->pas);
436 kfree(mr);
437 }
438 }
439}
440
441static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
442{
443 struct mlx5_mr_cache *cache = &dev->cache;
444 struct mlx5_cache_ent *ent;
445 int i;
446
447 if (!mlx5_debugfs_root)
448 return 0;
449
450 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
451 if (!cache->root)
452 return -ENOMEM;
453
454 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
455 ent = &cache->ent[i];
456 sprintf(ent->name, "%d", ent->order);
457 ent->dir = debugfs_create_dir(ent->name, cache->root);
458 if (!ent->dir)
459 return -ENOMEM;
460
461 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
462 &size_fops);
463 if (!ent->fsize)
464 return -ENOMEM;
465
466 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
467 &limit_fops);
468 if (!ent->flimit)
469 return -ENOMEM;
470
471 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
472 &ent->cur);
473 if (!ent->fcur)
474 return -ENOMEM;
475
476 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
477 &ent->miss);
478 if (!ent->fmiss)
479 return -ENOMEM;
480 }
481
482 return 0;
483}
484
485static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
486{
487 if (!mlx5_debugfs_root)
488 return;
489
490 debugfs_remove_recursive(dev->cache.root);
491}
492
493int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
494{
495 struct mlx5_mr_cache *cache = &dev->cache;
496 struct mlx5_cache_ent *ent;
497 int limit;
498 int size;
499 int err;
500 int i;
501
502 cache->wq = create_singlethread_workqueue("mkey_cache");
503 if (!cache->wq) {
504 mlx5_ib_warn(dev, "failed to create work queue\n");
505 return -ENOMEM;
506 }
507
508 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
509 INIT_LIST_HEAD(&cache->ent[i].head);
510 spin_lock_init(&cache->ent[i].lock);
511
512 ent = &cache->ent[i];
513 INIT_LIST_HEAD(&ent->head);
514 spin_lock_init(&ent->lock);
515 ent->order = i + 2;
516 ent->dev = dev;
517
518 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
519 size = dev->mdev.profile->mr_cache[i].size;
520 limit = dev->mdev.profile->mr_cache[i].limit;
521 } else {
522 size = DEF_CACHE_SIZE;
523 limit = 0;
524 }
525 INIT_WORK(&ent->work, cache_work_func);
526 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
527 ent->limit = limit;
528 queue_work(cache->wq, &ent->work);
529 }
530
531 err = mlx5_mr_cache_debugfs_init(dev);
532 if (err)
533 mlx5_ib_warn(dev, "cache debugfs failure\n");
534
535 return 0;
536}
537
538int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
539{
540 int i;
541
542 dev->cache.stopped = 1;
543 destroy_workqueue(dev->cache.wq);
544
545 mlx5_mr_cache_debugfs_cleanup(dev);
546
547 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
548 clean_keys(dev, i);
549
550 return 0;
551}
552
553struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
554{
555 struct mlx5_ib_dev *dev = to_mdev(pd->device);
556 struct mlx5_core_dev *mdev = &dev->mdev;
557 struct mlx5_create_mkey_mbox_in *in;
558 struct mlx5_mkey_seg *seg;
559 struct mlx5_ib_mr *mr;
560 int err;
561
562 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
563 if (!mr)
564 return ERR_PTR(-ENOMEM);
565
566 in = kzalloc(sizeof(*in), GFP_KERNEL);
567 if (!in) {
568 err = -ENOMEM;
569 goto err_free;
570 }
571
572 seg = &in->seg;
573 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
574 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
575 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
576 seg->start_addr = 0;
577
578 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
579 if (err)
580 goto err_in;
581
582 kfree(in);
583 mr->ibmr.lkey = mr->mmr.key;
584 mr->ibmr.rkey = mr->mmr.key;
585 mr->umem = NULL;
586
587 return &mr->ibmr;
588
589err_in:
590 kfree(in);
591
592err_free:
593 kfree(mr);
594
595 return ERR_PTR(err);
596}
597
598static int get_octo_len(u64 addr, u64 len, int page_size)
599{
600 u64 offset;
601 int npages;
602
603 offset = addr & (page_size - 1);
604 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
605 return (npages + 1) / 2;
606}
607
608static int use_umr(int order)
609{
610 return order <= 17;
611}
612
613static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
614 struct ib_sge *sg, u64 dma, int n, u32 key,
615 int page_shift, u64 virt_addr, u64 len,
616 int access_flags)
617{
618 struct mlx5_ib_dev *dev = to_mdev(pd->device);
619 struct ib_mr *mr = dev->umrc.mr;
620
621 sg->addr = dma;
622 sg->length = ALIGN(sizeof(u64) * n, 64);
623 sg->lkey = mr->lkey;
624
625 wr->next = NULL;
626 wr->send_flags = 0;
627 wr->sg_list = sg;
628 if (n)
629 wr->num_sge = 1;
630 else
631 wr->num_sge = 0;
632
633 wr->opcode = MLX5_IB_WR_UMR;
634 wr->wr.fast_reg.page_list_len = n;
635 wr->wr.fast_reg.page_shift = page_shift;
636 wr->wr.fast_reg.rkey = key;
637 wr->wr.fast_reg.iova_start = virt_addr;
638 wr->wr.fast_reg.length = len;
639 wr->wr.fast_reg.access_flags = access_flags;
640 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
641}
642
643static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
644 struct ib_send_wr *wr, u32 key)
645{
646 wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
647 wr->opcode = MLX5_IB_WR_UMR;
648 wr->wr.fast_reg.rkey = key;
649}
650
651void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
652{
653 struct mlx5_ib_mr *mr;
654 struct ib_wc wc;
655 int err;
656
657 while (1) {
658 err = ib_poll_cq(cq, 1, &wc);
659 if (err < 0) {
660 pr_warn("poll cq error %d\n", err);
661 return;
662 }
663 if (err == 0)
664 break;
665
666 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
667 mr->status = wc.status;
668 complete(&mr->done);
669 }
670 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
671}
672
673static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
674 u64 virt_addr, u64 len, int npages,
675 int page_shift, int order, int access_flags)
676{
677 struct mlx5_ib_dev *dev = to_mdev(pd->device);
678 struct umr_common *umrc = &dev->umrc;
679 struct ib_send_wr wr, *bad;
680 struct mlx5_ib_mr *mr;
681 struct ib_sge sg;
682 int err;
683 int i;
684
685 for (i = 0; i < 10; i++) {
686 mr = alloc_cached_mr(dev, order);
687 if (mr)
688 break;
689
690 err = add_keys(dev, order2idx(dev, order), 1);
691 if (err) {
692 mlx5_ib_warn(dev, "add_keys failed\n");
693 break;
694 }
695 }
696
697 if (!mr)
698 return ERR_PTR(-EAGAIN);
699
700 mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
701
702 memset(&wr, 0, sizeof(wr));
703 wr.wr_id = (u64)(unsigned long)mr;
704 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
705
706 /* We serialize polls so one process does not kidnap another's
707 * completion. This is not a problem since wr is completed in
708 * around 1 usec
709 */
710 down(&umrc->sem);
711 init_completion(&mr->done);
712 err = ib_post_send(umrc->qp, &wr, &bad);
713 if (err) {
714 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
715 up(&umrc->sem);
716 goto error;
717 }
718 wait_for_completion(&mr->done);
719 up(&umrc->sem);
720
721 if (mr->status != IB_WC_SUCCESS) {
722 mlx5_ib_warn(dev, "reg umr failed\n");
723 err = -EFAULT;
724 goto error;
725 }
726
727 return mr;
728
729error:
730 free_cached_mr(dev, mr);
731 return ERR_PTR(err);
732}
733
734static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
735 u64 length, struct ib_umem *umem,
736 int npages, int page_shift,
737 int access_flags)
738{
739 struct mlx5_ib_dev *dev = to_mdev(pd->device);
740 struct mlx5_create_mkey_mbox_in *in;
741 struct mlx5_ib_mr *mr;
742 int inlen;
743 int err;
744
745 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
746 if (!mr)
747 return ERR_PTR(-ENOMEM);
748
749 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
750 in = mlx5_vzalloc(inlen);
751 if (!in) {
752 err = -ENOMEM;
753 goto err_1;
754 }
755 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
756
757 in->seg.flags = convert_access(access_flags) |
758 MLX5_ACCESS_MODE_MTT;
759 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
760 in->seg.start_addr = cpu_to_be64(virt_addr);
761 in->seg.len = cpu_to_be64(length);
762 in->seg.bsfs_octo_size = 0;
763 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
764 in->seg.log2_page_size = page_shift;
765 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
766 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
767 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
768 if (err) {
769 mlx5_ib_warn(dev, "create mkey failed\n");
770 goto err_2;
771 }
772 mr->umem = umem;
773 mlx5_vfree(in);
774
775 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
776
777 return mr;
778
779err_2:
780 mlx5_vfree(in);
781
782err_1:
783 kfree(mr);
784
785 return ERR_PTR(err);
786}
787
788struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
789 u64 virt_addr, int access_flags,
790 struct ib_udata *udata)
791{
792 struct mlx5_ib_dev *dev = to_mdev(pd->device);
793 struct mlx5_ib_mr *mr = NULL;
794 struct ib_umem *umem;
795 int page_shift;
796 int npages;
797 int ncont;
798 int order;
799 int err;
800
801 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
802 start, virt_addr, length);
803 umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
804 0);
805 if (IS_ERR(umem)) {
806 mlx5_ib_dbg(dev, "umem get failed\n");
807 return (void *)umem;
808 }
809
810 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
811 if (!npages) {
812 mlx5_ib_warn(dev, "avoid zero region\n");
813 err = -EINVAL;
814 goto error;
815 }
816
817 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
818 npages, ncont, order, page_shift);
819
820 if (use_umr(order)) {
821 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
822 order, access_flags);
823 if (PTR_ERR(mr) == -EAGAIN) {
824 mlx5_ib_dbg(dev, "cache empty for order %d", order);
825 mr = NULL;
826 }
827 }
828
829 if (!mr)
830 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
831 access_flags);
832
833 if (IS_ERR(mr)) {
834 err = PTR_ERR(mr);
835 goto error;
836 }
837
838 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
839
840 mr->umem = umem;
841 mr->npages = npages;
842 spin_lock(&dev->mr_lock);
843 dev->mdev.priv.reg_pages += npages;
844 spin_unlock(&dev->mr_lock);
845 mr->ibmr.lkey = mr->mmr.key;
846 mr->ibmr.rkey = mr->mmr.key;
847
848 return &mr->ibmr;
849
850error:
851 ib_umem_release(umem);
852 return ERR_PTR(err);
853}
854
855static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
856{
857 struct umr_common *umrc = &dev->umrc;
858 struct ib_send_wr wr, *bad;
859 int err;
860
861 memset(&wr, 0, sizeof(wr));
862 wr.wr_id = (u64)(unsigned long)mr;
863 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
864
865 down(&umrc->sem);
866 init_completion(&mr->done);
867 err = ib_post_send(umrc->qp, &wr, &bad);
868 if (err) {
869 up(&umrc->sem);
870 mlx5_ib_dbg(dev, "err %d\n", err);
871 goto error;
872 }
873 wait_for_completion(&mr->done);
874 up(&umrc->sem);
875 if (mr->status != IB_WC_SUCCESS) {
876 mlx5_ib_warn(dev, "unreg umr failed\n");
877 err = -EFAULT;
878 goto error;
879 }
880 return 0;
881
882error:
883 return err;
884}
885
886int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
887{
888 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
889 struct mlx5_ib_mr *mr = to_mmr(ibmr);
890 struct ib_umem *umem = mr->umem;
891 int npages = mr->npages;
892 int umred = mr->umred;
893 int err;
894
895 if (!umred) {
896 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
897 if (err) {
898 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
899 mr->mmr.key, err);
900 return err;
901 }
902 } else {
903 err = unreg_umr(dev, mr);
904 if (err) {
905 mlx5_ib_warn(dev, "failed unregister\n");
906 return err;
907 }
908 free_cached_mr(dev, mr);
909 }
910
911 if (umem) {
912 ib_umem_release(umem);
913 spin_lock(&dev->mr_lock);
914 dev->mdev.priv.reg_pages -= npages;
915 spin_unlock(&dev->mr_lock);
916 }
917
918 if (!umred)
919 kfree(mr);
920
921 return 0;
922}
923
924struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
925 int max_page_list_len)
926{
927 struct mlx5_ib_dev *dev = to_mdev(pd->device);
928 struct mlx5_create_mkey_mbox_in *in;
929 struct mlx5_ib_mr *mr;
930 int err;
931
932 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
933 if (!mr)
934 return ERR_PTR(-ENOMEM);
935
936 in = kzalloc(sizeof(*in), GFP_KERNEL);
937 if (!in) {
938 err = -ENOMEM;
939 goto err_free;
940 }
941
942 in->seg.status = 1 << 6; /* free */
943 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
944 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
945 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
946 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
947 /*
948 * TBD not needed - issue 197292 */
949 in->seg.log2_page_size = PAGE_SHIFT;
950
951 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
952 kfree(in);
953 if (err)
954 goto err_free;
955
956 mr->ibmr.lkey = mr->mmr.key;
957 mr->ibmr.rkey = mr->mmr.key;
958 mr->umem = NULL;
959
960 return &mr->ibmr;
961
962err_free:
963 kfree(mr);
964 return ERR_PTR(err);
965}
966
967struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
968 int page_list_len)
969{
970 struct mlx5_ib_fast_reg_page_list *mfrpl;
971 int size = page_list_len * sizeof(u64);
972
973 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
974 if (!mfrpl)
975 return ERR_PTR(-ENOMEM);
976
977 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
978 if (!mfrpl->ibfrpl.page_list)
979 goto err_free;
980
981 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
982 size, &mfrpl->map,
983 GFP_KERNEL);
984 if (!mfrpl->mapped_page_list)
985 goto err_free;
986
987 WARN_ON(mfrpl->map & 0x3f);
988
989 return &mfrpl->ibfrpl;
990
991err_free:
992 kfree(mfrpl->ibfrpl.page_list);
993 kfree(mfrpl);
994 return ERR_PTR(-ENOMEM);
995}
996
997void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
998{
999 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1000 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1001 int size = page_list->max_page_list_len * sizeof(u64);
1002
1003 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1004 mfrpl->map);
1005 kfree(mfrpl->ibfrpl.page_list);
1006 kfree(mfrpl);
1007}