Adit Ranadive | 29c8d9e | 2016-10-02 19:10:22 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of EITHER the GNU General Public License |
| 6 | * version 2 as published by the Free Software Foundation or the BSD |
| 7 | * 2-Clause License. This program is distributed in the hope that it |
| 8 | * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED |
| 9 | * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
| 10 | * See the GNU General Public License version 2 for more details at |
| 11 | * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public License |
| 14 | * along with this program available in the file COPYING in the main |
| 15 | * directory of this source tree. |
| 16 | * |
| 17 | * The BSD 2-Clause License |
| 18 | * |
| 19 | * Redistribution and use in source and binary forms, with or |
| 20 | * without modification, are permitted provided that the following |
| 21 | * conditions are met: |
| 22 | * |
| 23 | * - Redistributions of source code must retain the above |
| 24 | * copyright notice, this list of conditions and the following |
| 25 | * disclaimer. |
| 26 | * |
| 27 | * - Redistributions in binary form must reproduce the above |
| 28 | * copyright notice, this list of conditions and the following |
| 29 | * disclaimer in the documentation and/or other materials |
| 30 | * provided with the distribution. |
| 31 | * |
| 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 33 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 34 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 35 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 36 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
| 37 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 39 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 40 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 41 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 42 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 43 | * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 44 | */ |
| 45 | |
| 46 | #include <asm/page.h> |
| 47 | #include <linux/io.h> |
| 48 | #include <linux/wait.h> |
| 49 | #include <rdma/ib_addr.h> |
| 50 | #include <rdma/ib_smi.h> |
| 51 | #include <rdma/ib_user_verbs.h> |
| 52 | |
| 53 | #include "pvrdma.h" |
| 54 | |
| 55 | /** |
| 56 | * pvrdma_req_notify_cq - request notification for a completion queue |
| 57 | * @ibcq: the completion queue |
| 58 | * @notify_flags: notification flags |
| 59 | * |
| 60 | * @return: 0 for success. |
| 61 | */ |
| 62 | int pvrdma_req_notify_cq(struct ib_cq *ibcq, |
| 63 | enum ib_cq_notify_flags notify_flags) |
| 64 | { |
| 65 | struct pvrdma_dev *dev = to_vdev(ibcq->device); |
| 66 | struct pvrdma_cq *cq = to_vcq(ibcq); |
| 67 | u32 val = cq->cq_handle; |
| 68 | |
| 69 | val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? |
| 70 | PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; |
| 71 | |
| 72 | pvrdma_write_uar_cq(dev, val); |
| 73 | |
| 74 | return 0; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * pvrdma_create_cq - create completion queue |
| 79 | * @ibdev: the device |
| 80 | * @attr: completion queue attributes |
| 81 | * @context: user context |
| 82 | * @udata: user data |
| 83 | * |
| 84 | * @return: ib_cq completion queue pointer on success, |
| 85 | * otherwise returns negative errno. |
| 86 | */ |
| 87 | struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, |
| 88 | const struct ib_cq_init_attr *attr, |
| 89 | struct ib_ucontext *context, |
| 90 | struct ib_udata *udata) |
| 91 | { |
| 92 | int entries = attr->cqe; |
| 93 | struct pvrdma_dev *dev = to_vdev(ibdev); |
| 94 | struct pvrdma_cq *cq; |
| 95 | int ret; |
| 96 | int npages; |
| 97 | unsigned long flags; |
| 98 | union pvrdma_cmd_req req; |
| 99 | union pvrdma_cmd_resp rsp; |
| 100 | struct pvrdma_cmd_create_cq *cmd = &req.create_cq; |
| 101 | struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; |
| 102 | struct pvrdma_create_cq ucmd; |
| 103 | |
| 104 | BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); |
| 105 | |
| 106 | entries = roundup_pow_of_two(entries); |
| 107 | if (entries < 1 || entries > dev->dsr->caps.max_cqe) |
| 108 | return ERR_PTR(-EINVAL); |
| 109 | |
| 110 | if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) |
| 111 | return ERR_PTR(-ENOMEM); |
| 112 | |
| 113 | cq = kzalloc(sizeof(*cq), GFP_KERNEL); |
| 114 | if (!cq) { |
| 115 | atomic_dec(&dev->num_cqs); |
| 116 | return ERR_PTR(-ENOMEM); |
| 117 | } |
| 118 | |
| 119 | cq->ibcq.cqe = entries; |
| 120 | |
| 121 | if (context) { |
| 122 | if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { |
| 123 | ret = -EFAULT; |
| 124 | goto err_cq; |
| 125 | } |
| 126 | |
| 127 | cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size, |
| 128 | IB_ACCESS_LOCAL_WRITE, 1); |
| 129 | if (IS_ERR(cq->umem)) { |
| 130 | ret = PTR_ERR(cq->umem); |
| 131 | goto err_cq; |
| 132 | } |
| 133 | |
| 134 | npages = ib_umem_page_count(cq->umem); |
| 135 | } else { |
| 136 | cq->is_kernel = true; |
| 137 | |
| 138 | /* One extra page for shared ring state */ |
| 139 | npages = 1 + (entries * sizeof(struct pvrdma_cqe) + |
| 140 | PAGE_SIZE - 1) / PAGE_SIZE; |
| 141 | |
| 142 | /* Skip header page. */ |
| 143 | cq->offset = PAGE_SIZE; |
| 144 | } |
| 145 | |
| 146 | if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { |
| 147 | dev_warn(&dev->pdev->dev, |
| 148 | "overflow pages in completion queue\n"); |
| 149 | ret = -EINVAL; |
| 150 | goto err_umem; |
| 151 | } |
| 152 | |
| 153 | ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel); |
| 154 | if (ret) { |
| 155 | dev_warn(&dev->pdev->dev, |
| 156 | "could not allocate page directory\n"); |
| 157 | goto err_umem; |
| 158 | } |
| 159 | |
| 160 | /* Ring state is always the first page. Set in library for user cq. */ |
| 161 | if (cq->is_kernel) |
| 162 | cq->ring_state = cq->pdir.pages[0]; |
| 163 | else |
| 164 | pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); |
| 165 | |
| 166 | atomic_set(&cq->refcnt, 1); |
| 167 | init_waitqueue_head(&cq->wait); |
| 168 | spin_lock_init(&cq->cq_lock); |
| 169 | |
| 170 | memset(cmd, 0, sizeof(*cmd)); |
| 171 | cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; |
| 172 | cmd->nchunks = npages; |
| 173 | cmd->ctx_handle = (context) ? |
| 174 | (u64)to_vucontext(context)->ctx_handle : 0; |
| 175 | cmd->cqe = entries; |
| 176 | cmd->pdir_dma = cq->pdir.dir_dma; |
| 177 | ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); |
| 178 | if (ret < 0) { |
| 179 | dev_warn(&dev->pdev->dev, |
| 180 | "could not create completion queue, error: %d\n", ret); |
| 181 | goto err_page_dir; |
| 182 | } |
| 183 | |
| 184 | cq->ibcq.cqe = resp->cqe; |
| 185 | cq->cq_handle = resp->cq_handle; |
| 186 | spin_lock_irqsave(&dev->cq_tbl_lock, flags); |
| 187 | dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; |
| 188 | spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); |
| 189 | |
| 190 | if (context) { |
| 191 | cq->uar = &(to_vucontext(context)->uar); |
| 192 | |
| 193 | /* Copy udata back. */ |
| 194 | if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { |
| 195 | dev_warn(&dev->pdev->dev, |
| 196 | "failed to copy back udata\n"); |
| 197 | pvrdma_destroy_cq(&cq->ibcq); |
| 198 | return ERR_PTR(-EINVAL); |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | return &cq->ibcq; |
| 203 | |
| 204 | err_page_dir: |
| 205 | pvrdma_page_dir_cleanup(dev, &cq->pdir); |
| 206 | err_umem: |
| 207 | if (context) |
| 208 | ib_umem_release(cq->umem); |
| 209 | err_cq: |
| 210 | atomic_dec(&dev->num_cqs); |
| 211 | kfree(cq); |
| 212 | |
| 213 | return ERR_PTR(ret); |
| 214 | } |
| 215 | |
| 216 | static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) |
| 217 | { |
| 218 | atomic_dec(&cq->refcnt); |
| 219 | wait_event(cq->wait, !atomic_read(&cq->refcnt)); |
| 220 | |
| 221 | if (!cq->is_kernel) |
| 222 | ib_umem_release(cq->umem); |
| 223 | |
| 224 | pvrdma_page_dir_cleanup(dev, &cq->pdir); |
| 225 | kfree(cq); |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | * pvrdma_destroy_cq - destroy completion queue |
| 230 | * @cq: the completion queue to destroy. |
| 231 | * |
| 232 | * @return: 0 for success. |
| 233 | */ |
| 234 | int pvrdma_destroy_cq(struct ib_cq *cq) |
| 235 | { |
| 236 | struct pvrdma_cq *vcq = to_vcq(cq); |
| 237 | union pvrdma_cmd_req req; |
| 238 | struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq; |
| 239 | struct pvrdma_dev *dev = to_vdev(cq->device); |
| 240 | unsigned long flags; |
| 241 | int ret; |
| 242 | |
| 243 | memset(cmd, 0, sizeof(*cmd)); |
| 244 | cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ; |
| 245 | cmd->cq_handle = vcq->cq_handle; |
| 246 | |
| 247 | ret = pvrdma_cmd_post(dev, &req, NULL, 0); |
| 248 | if (ret < 0) |
| 249 | dev_warn(&dev->pdev->dev, |
| 250 | "could not destroy completion queue, error: %d\n", |
| 251 | ret); |
| 252 | |
| 253 | /* free cq's resources */ |
| 254 | spin_lock_irqsave(&dev->cq_tbl_lock, flags); |
| 255 | dev->cq_tbl[vcq->cq_handle] = NULL; |
| 256 | spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); |
| 257 | |
| 258 | pvrdma_free_cq(dev, vcq); |
| 259 | atomic_dec(&dev->num_cqs); |
| 260 | |
| 261 | return ret; |
| 262 | } |
| 263 | |
| 264 | /** |
| 265 | * pvrdma_modify_cq - modify the CQ moderation parameters |
| 266 | * @ibcq: the CQ to modify |
| 267 | * @cq_count: number of CQEs that will trigger an event |
| 268 | * @cq_period: max period of time in usec before triggering an event |
| 269 | * |
| 270 | * @return: -EOPNOTSUPP as CQ resize is not supported. |
| 271 | */ |
| 272 | int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) |
| 273 | { |
| 274 | return -EOPNOTSUPP; |
| 275 | } |
| 276 | |
| 277 | static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) |
| 278 | { |
| 279 | return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( |
| 280 | &cq->pdir, |
| 281 | cq->offset + |
| 282 | sizeof(struct pvrdma_cqe) * i); |
| 283 | } |
| 284 | |
| 285 | void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) |
| 286 | { |
| 287 | int head; |
| 288 | int has_data; |
| 289 | |
| 290 | if (!cq->is_kernel) |
| 291 | return; |
| 292 | |
| 293 | /* Lock held */ |
| 294 | has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, |
| 295 | cq->ibcq.cqe, &head); |
| 296 | if (unlikely(has_data > 0)) { |
| 297 | int items; |
| 298 | int curr; |
| 299 | int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail, |
| 300 | cq->ibcq.cqe); |
| 301 | struct pvrdma_cqe *cqe; |
| 302 | struct pvrdma_cqe *curr_cqe; |
| 303 | |
| 304 | items = (tail > head) ? (tail - head) : |
| 305 | (cq->ibcq.cqe - head + tail); |
| 306 | curr = --tail; |
| 307 | while (items-- > 0) { |
| 308 | if (curr < 0) |
| 309 | curr = cq->ibcq.cqe - 1; |
| 310 | if (tail < 0) |
| 311 | tail = cq->ibcq.cqe - 1; |
| 312 | curr_cqe = get_cqe(cq, curr); |
| 313 | if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) { |
| 314 | if (curr != tail) { |
| 315 | cqe = get_cqe(cq, tail); |
| 316 | *cqe = *curr_cqe; |
| 317 | } |
| 318 | tail--; |
| 319 | } else { |
| 320 | pvrdma_idx_ring_inc( |
| 321 | &cq->ring_state->rx.cons_head, |
| 322 | cq->ibcq.cqe); |
| 323 | } |
| 324 | curr--; |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, |
| 330 | struct ib_wc *wc) |
| 331 | { |
| 332 | struct pvrdma_dev *dev = to_vdev(cq->ibcq.device); |
| 333 | int has_data; |
| 334 | unsigned int head; |
| 335 | bool tried = false; |
| 336 | struct pvrdma_cqe *cqe; |
| 337 | |
| 338 | retry: |
| 339 | has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, |
| 340 | cq->ibcq.cqe, &head); |
| 341 | if (has_data == 0) { |
| 342 | if (tried) |
| 343 | return -EAGAIN; |
| 344 | |
| 345 | pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL); |
| 346 | |
| 347 | tried = true; |
| 348 | goto retry; |
| 349 | } else if (has_data == PVRDMA_INVALID_IDX) { |
| 350 | dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); |
| 351 | return -EAGAIN; |
| 352 | } |
| 353 | |
| 354 | cqe = get_cqe(cq, head); |
| 355 | |
| 356 | /* Ensure cqe is valid. */ |
| 357 | rmb(); |
| 358 | if (dev->qp_tbl[cqe->qp & 0xffff]) |
| 359 | *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff]; |
| 360 | else |
| 361 | return -EAGAIN; |
| 362 | |
| 363 | wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode); |
| 364 | wc->status = pvrdma_wc_status_to_ib(cqe->status); |
| 365 | wc->wr_id = cqe->wr_id; |
| 366 | wc->qp = &(*cur_qp)->ibqp; |
| 367 | wc->byte_len = cqe->byte_len; |
| 368 | wc->ex.imm_data = cqe->imm_data; |
| 369 | wc->src_qp = cqe->src_qp; |
| 370 | wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags); |
| 371 | wc->pkey_index = cqe->pkey_index; |
| 372 | wc->slid = cqe->slid; |
| 373 | wc->sl = cqe->sl; |
| 374 | wc->dlid_path_bits = cqe->dlid_path_bits; |
| 375 | wc->port_num = cqe->port_num; |
| 376 | wc->vendor_err = 0; |
| 377 | |
| 378 | /* Update shared ring state */ |
| 379 | pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); |
| 380 | |
| 381 | return 0; |
| 382 | } |
| 383 | |
| 384 | /** |
| 385 | * pvrdma_poll_cq - poll for work completion queue entries |
| 386 | * @ibcq: completion queue |
| 387 | * @num_entries: the maximum number of entries |
| 388 | * @entry: pointer to work completion array |
| 389 | * |
| 390 | * @return: number of polled completion entries |
| 391 | */ |
| 392 | int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) |
| 393 | { |
| 394 | struct pvrdma_cq *cq = to_vcq(ibcq); |
| 395 | struct pvrdma_qp *cur_qp = NULL; |
| 396 | unsigned long flags; |
| 397 | int npolled; |
| 398 | |
| 399 | if (num_entries < 1 || wc == NULL) |
| 400 | return 0; |
| 401 | |
| 402 | spin_lock_irqsave(&cq->cq_lock, flags); |
| 403 | for (npolled = 0; npolled < num_entries; ++npolled) { |
| 404 | if (pvrdma_poll_one(cq, &cur_qp, wc + npolled)) |
| 405 | break; |
| 406 | } |
| 407 | |
| 408 | spin_unlock_irqrestore(&cq->cq_lock, flags); |
| 409 | |
| 410 | /* Ensure we do not return errors from poll_cq */ |
| 411 | return npolled; |
| 412 | } |
| 413 | |
| 414 | /** |
| 415 | * pvrdma_resize_cq - resize CQ |
| 416 | * @ibcq: the completion queue |
| 417 | * @entries: CQ entries |
| 418 | * @udata: user data |
| 419 | * |
| 420 | * @return: -EOPNOTSUPP as CQ resize is not supported. |
| 421 | */ |
| 422 | int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) |
| 423 | { |
| 424 | return -EOPNOTSUPP; |
| 425 | } |