Ralph Campbell | f931551 | 2010-05-23 21:44:54 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. |
| 3 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. |
| 4 | * |
| 5 | * This software is available to you under a choice of one of two |
| 6 | * licenses. You may choose to be licensed under the terms of the GNU |
| 7 | * General Public License (GPL) Version 2, available from the file |
| 8 | * COPYING in the main directory of this source tree, or the |
| 9 | * OpenIB.org BSD license below: |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or |
| 12 | * without modification, are permitted provided that the following |
| 13 | * conditions are met: |
| 14 | * |
| 15 | * - Redistributions of source code must retain the above |
| 16 | * copyright notice, this list of conditions and the following |
| 17 | * disclaimer. |
| 18 | * |
| 19 | * - Redistributions in binary form must reproduce the above |
| 20 | * copyright notice, this list of conditions and the following |
| 21 | * disclaimer in the documentation and/or other materials |
| 22 | * provided with the distribution. |
| 23 | * |
| 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 31 | * SOFTWARE. |
| 32 | */ |
| 33 | |
| 34 | #include <linux/err.h> |
| 35 | #include <linux/slab.h> |
| 36 | #include <linux/vmalloc.h> |
| 37 | |
| 38 | #include "qib_verbs.h" |
| 39 | |
| 40 | /** |
| 41 | * qib_cq_enter - add a new entry to the completion queue |
| 42 | * @cq: completion queue |
| 43 | * @entry: work completion entry to add |
| 44 | * @sig: true if @entry is a solicitated entry |
| 45 | * |
| 46 | * This may be called with qp->s_lock held. |
| 47 | */ |
| 48 | void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) |
| 49 | { |
| 50 | struct qib_cq_wc *wc; |
| 51 | unsigned long flags; |
| 52 | u32 head; |
| 53 | u32 next; |
| 54 | |
| 55 | spin_lock_irqsave(&cq->lock, flags); |
| 56 | |
| 57 | /* |
| 58 | * Note that the head pointer might be writable by user processes. |
| 59 | * Take care to verify it is a sane value. |
| 60 | */ |
| 61 | wc = cq->queue; |
| 62 | head = wc->head; |
| 63 | if (head >= (unsigned) cq->ibcq.cqe) { |
| 64 | head = cq->ibcq.cqe; |
| 65 | next = 0; |
| 66 | } else |
| 67 | next = head + 1; |
| 68 | if (unlikely(next == wc->tail)) { |
| 69 | spin_unlock_irqrestore(&cq->lock, flags); |
| 70 | if (cq->ibcq.event_handler) { |
| 71 | struct ib_event ev; |
| 72 | |
| 73 | ev.device = cq->ibcq.device; |
| 74 | ev.element.cq = &cq->ibcq; |
| 75 | ev.event = IB_EVENT_CQ_ERR; |
| 76 | cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); |
| 77 | } |
| 78 | return; |
| 79 | } |
| 80 | if (cq->ip) { |
| 81 | wc->uqueue[head].wr_id = entry->wr_id; |
| 82 | wc->uqueue[head].status = entry->status; |
| 83 | wc->uqueue[head].opcode = entry->opcode; |
| 84 | wc->uqueue[head].vendor_err = entry->vendor_err; |
| 85 | wc->uqueue[head].byte_len = entry->byte_len; |
| 86 | wc->uqueue[head].ex.imm_data = |
| 87 | (__u32 __force)entry->ex.imm_data; |
| 88 | wc->uqueue[head].qp_num = entry->qp->qp_num; |
| 89 | wc->uqueue[head].src_qp = entry->src_qp; |
| 90 | wc->uqueue[head].wc_flags = entry->wc_flags; |
| 91 | wc->uqueue[head].pkey_index = entry->pkey_index; |
| 92 | wc->uqueue[head].slid = entry->slid; |
| 93 | wc->uqueue[head].sl = entry->sl; |
| 94 | wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; |
| 95 | wc->uqueue[head].port_num = entry->port_num; |
| 96 | /* Make sure entry is written before the head index. */ |
| 97 | smp_wmb(); |
| 98 | } else |
| 99 | wc->kqueue[head] = *entry; |
| 100 | wc->head = next; |
| 101 | |
| 102 | if (cq->notify == IB_CQ_NEXT_COMP || |
Mike Marciniszyn | a377acd | 2011-01-10 17:42:19 -0800 | [diff] [blame] | 103 | (cq->notify == IB_CQ_SOLICITED && |
| 104 | (solicited || entry->status != IB_WC_SUCCESS))) { |
Ralph Campbell | f931551 | 2010-05-23 21:44:54 -0700 | [diff] [blame] | 105 | cq->notify = IB_CQ_NONE; |
| 106 | cq->triggered++; |
| 107 | /* |
| 108 | * This will cause send_complete() to be called in |
| 109 | * another thread. |
| 110 | */ |
| 111 | queue_work(qib_cq_wq, &cq->comptask); |
| 112 | } |
| 113 | |
| 114 | spin_unlock_irqrestore(&cq->lock, flags); |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * qib_poll_cq - poll for work completion entries |
| 119 | * @ibcq: the completion queue to poll |
| 120 | * @num_entries: the maximum number of entries to return |
| 121 | * @entry: pointer to array where work completions are placed |
| 122 | * |
| 123 | * Returns the number of completion entries polled. |
| 124 | * |
| 125 | * This may be called from interrupt context. Also called by ib_poll_cq() |
| 126 | * in the generic verbs code. |
| 127 | */ |
| 128 | int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) |
| 129 | { |
| 130 | struct qib_cq *cq = to_icq(ibcq); |
| 131 | struct qib_cq_wc *wc; |
| 132 | unsigned long flags; |
| 133 | int npolled; |
| 134 | u32 tail; |
| 135 | |
| 136 | /* The kernel can only poll a kernel completion queue */ |
| 137 | if (cq->ip) { |
| 138 | npolled = -EINVAL; |
| 139 | goto bail; |
| 140 | } |
| 141 | |
| 142 | spin_lock_irqsave(&cq->lock, flags); |
| 143 | |
| 144 | wc = cq->queue; |
| 145 | tail = wc->tail; |
| 146 | if (tail > (u32) cq->ibcq.cqe) |
| 147 | tail = (u32) cq->ibcq.cqe; |
| 148 | for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { |
| 149 | if (tail == wc->head) |
| 150 | break; |
| 151 | /* The kernel doesn't need a RMB since it has the lock. */ |
| 152 | *entry = wc->kqueue[tail]; |
| 153 | if (tail >= cq->ibcq.cqe) |
| 154 | tail = 0; |
| 155 | else |
| 156 | tail++; |
| 157 | } |
| 158 | wc->tail = tail; |
| 159 | |
| 160 | spin_unlock_irqrestore(&cq->lock, flags); |
| 161 | |
| 162 | bail: |
| 163 | return npolled; |
| 164 | } |
| 165 | |
| 166 | static void send_complete(struct work_struct *work) |
| 167 | { |
| 168 | struct qib_cq *cq = container_of(work, struct qib_cq, comptask); |
| 169 | |
| 170 | /* |
| 171 | * The completion handler will most likely rearm the notification |
| 172 | * and poll for all pending entries. If a new completion entry |
| 173 | * is added while we are in this routine, queue_work() |
| 174 | * won't call us again until we return so we check triggered to |
| 175 | * see if we need to call the handler again. |
| 176 | */ |
| 177 | for (;;) { |
| 178 | u8 triggered = cq->triggered; |
| 179 | |
| 180 | /* |
| 181 | * IPoIB connected mode assumes the callback is from a |
| 182 | * soft IRQ. We simulate this by blocking "bottom halves". |
| 183 | * See the implementation for ipoib_cm_handle_tx_wc(), |
| 184 | * netif_tx_lock_bh() and netif_tx_lock(). |
| 185 | */ |
| 186 | local_bh_disable(); |
| 187 | cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); |
| 188 | local_bh_enable(); |
| 189 | |
| 190 | if (cq->triggered == triggered) |
| 191 | return; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | /** |
| 196 | * qib_create_cq - create a completion queue |
| 197 | * @ibdev: the device this completion queue is attached to |
| 198 | * @entries: the minimum size of the completion queue |
| 199 | * @context: unused by the QLogic_IB driver |
| 200 | * @udata: user data for libibverbs.so |
| 201 | * |
| 202 | * Returns a pointer to the completion queue or negative errno values |
| 203 | * for failure. |
| 204 | * |
| 205 | * Called by ib_create_cq() in the generic verbs code. |
| 206 | */ |
| 207 | struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, |
| 208 | int comp_vector, struct ib_ucontext *context, |
| 209 | struct ib_udata *udata) |
| 210 | { |
| 211 | struct qib_ibdev *dev = to_idev(ibdev); |
| 212 | struct qib_cq *cq; |
| 213 | struct qib_cq_wc *wc; |
| 214 | struct ib_cq *ret; |
| 215 | u32 sz; |
| 216 | |
| 217 | if (entries < 1 || entries > ib_qib_max_cqes) { |
| 218 | ret = ERR_PTR(-EINVAL); |
| 219 | goto done; |
| 220 | } |
| 221 | |
| 222 | /* Allocate the completion queue structure. */ |
| 223 | cq = kmalloc(sizeof(*cq), GFP_KERNEL); |
| 224 | if (!cq) { |
| 225 | ret = ERR_PTR(-ENOMEM); |
| 226 | goto done; |
| 227 | } |
| 228 | |
| 229 | /* |
| 230 | * Allocate the completion queue entries and head/tail pointers. |
| 231 | * This is allocated separately so that it can be resized and |
| 232 | * also mapped into user space. |
| 233 | * We need to use vmalloc() in order to support mmap and large |
| 234 | * numbers of entries. |
| 235 | */ |
| 236 | sz = sizeof(*wc); |
| 237 | if (udata && udata->outlen >= sizeof(__u64)) |
| 238 | sz += sizeof(struct ib_uverbs_wc) * (entries + 1); |
| 239 | else |
| 240 | sz += sizeof(struct ib_wc) * (entries + 1); |
| 241 | wc = vmalloc_user(sz); |
| 242 | if (!wc) { |
| 243 | ret = ERR_PTR(-ENOMEM); |
| 244 | goto bail_cq; |
| 245 | } |
| 246 | |
| 247 | /* |
| 248 | * Return the address of the WC as the offset to mmap. |
| 249 | * See qib_mmap() for details. |
| 250 | */ |
| 251 | if (udata && udata->outlen >= sizeof(__u64)) { |
| 252 | int err; |
| 253 | |
| 254 | cq->ip = qib_create_mmap_info(dev, sz, context, wc); |
| 255 | if (!cq->ip) { |
| 256 | ret = ERR_PTR(-ENOMEM); |
| 257 | goto bail_wc; |
| 258 | } |
| 259 | |
| 260 | err = ib_copy_to_udata(udata, &cq->ip->offset, |
| 261 | sizeof(cq->ip->offset)); |
| 262 | if (err) { |
| 263 | ret = ERR_PTR(err); |
| 264 | goto bail_ip; |
| 265 | } |
| 266 | } else |
| 267 | cq->ip = NULL; |
| 268 | |
| 269 | spin_lock(&dev->n_cqs_lock); |
| 270 | if (dev->n_cqs_allocated == ib_qib_max_cqs) { |
| 271 | spin_unlock(&dev->n_cqs_lock); |
| 272 | ret = ERR_PTR(-ENOMEM); |
| 273 | goto bail_ip; |
| 274 | } |
| 275 | |
| 276 | dev->n_cqs_allocated++; |
| 277 | spin_unlock(&dev->n_cqs_lock); |
| 278 | |
| 279 | if (cq->ip) { |
| 280 | spin_lock_irq(&dev->pending_lock); |
| 281 | list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); |
| 282 | spin_unlock_irq(&dev->pending_lock); |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. |
| 287 | * The number of entries should be >= the number requested or return |
| 288 | * an error. |
| 289 | */ |
| 290 | cq->ibcq.cqe = entries; |
| 291 | cq->notify = IB_CQ_NONE; |
| 292 | cq->triggered = 0; |
| 293 | spin_lock_init(&cq->lock); |
| 294 | INIT_WORK(&cq->comptask, send_complete); |
| 295 | wc->head = 0; |
| 296 | wc->tail = 0; |
| 297 | cq->queue = wc; |
| 298 | |
| 299 | ret = &cq->ibcq; |
| 300 | |
| 301 | goto done; |
| 302 | |
| 303 | bail_ip: |
| 304 | kfree(cq->ip); |
| 305 | bail_wc: |
| 306 | vfree(wc); |
| 307 | bail_cq: |
| 308 | kfree(cq); |
| 309 | done: |
| 310 | return ret; |
| 311 | } |
| 312 | |
| 313 | /** |
| 314 | * qib_destroy_cq - destroy a completion queue |
| 315 | * @ibcq: the completion queue to destroy. |
| 316 | * |
| 317 | * Returns 0 for success. |
| 318 | * |
| 319 | * Called by ib_destroy_cq() in the generic verbs code. |
| 320 | */ |
| 321 | int qib_destroy_cq(struct ib_cq *ibcq) |
| 322 | { |
| 323 | struct qib_ibdev *dev = to_idev(ibcq->device); |
| 324 | struct qib_cq *cq = to_icq(ibcq); |
| 325 | |
| 326 | flush_work(&cq->comptask); |
| 327 | spin_lock(&dev->n_cqs_lock); |
| 328 | dev->n_cqs_allocated--; |
| 329 | spin_unlock(&dev->n_cqs_lock); |
| 330 | if (cq->ip) |
| 331 | kref_put(&cq->ip->ref, qib_release_mmap_info); |
| 332 | else |
| 333 | vfree(cq->queue); |
| 334 | kfree(cq); |
| 335 | |
| 336 | return 0; |
| 337 | } |
| 338 | |
| 339 | /** |
| 340 | * qib_req_notify_cq - change the notification type for a completion queue |
| 341 | * @ibcq: the completion queue |
| 342 | * @notify_flags: the type of notification to request |
| 343 | * |
| 344 | * Returns 0 for success. |
| 345 | * |
| 346 | * This may be called from interrupt context. Also called by |
| 347 | * ib_req_notify_cq() in the generic verbs code. |
| 348 | */ |
| 349 | int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) |
| 350 | { |
| 351 | struct qib_cq *cq = to_icq(ibcq); |
| 352 | unsigned long flags; |
| 353 | int ret = 0; |
| 354 | |
| 355 | spin_lock_irqsave(&cq->lock, flags); |
| 356 | /* |
| 357 | * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow |
| 358 | * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). |
| 359 | */ |
| 360 | if (cq->notify != IB_CQ_NEXT_COMP) |
| 361 | cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; |
| 362 | |
| 363 | if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && |
| 364 | cq->queue->head != cq->queue->tail) |
| 365 | ret = 1; |
| 366 | |
| 367 | spin_unlock_irqrestore(&cq->lock, flags); |
| 368 | |
| 369 | return ret; |
| 370 | } |
| 371 | |
| 372 | /** |
| 373 | * qib_resize_cq - change the size of the CQ |
| 374 | * @ibcq: the completion queue |
| 375 | * |
| 376 | * Returns 0 for success. |
| 377 | */ |
| 378 | int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) |
| 379 | { |
| 380 | struct qib_cq *cq = to_icq(ibcq); |
| 381 | struct qib_cq_wc *old_wc; |
| 382 | struct qib_cq_wc *wc; |
| 383 | u32 head, tail, n; |
| 384 | int ret; |
| 385 | u32 sz; |
| 386 | |
| 387 | if (cqe < 1 || cqe > ib_qib_max_cqes) { |
| 388 | ret = -EINVAL; |
| 389 | goto bail; |
| 390 | } |
| 391 | |
| 392 | /* |
| 393 | * Need to use vmalloc() if we want to support large #s of entries. |
| 394 | */ |
| 395 | sz = sizeof(*wc); |
| 396 | if (udata && udata->outlen >= sizeof(__u64)) |
| 397 | sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); |
| 398 | else |
| 399 | sz += sizeof(struct ib_wc) * (cqe + 1); |
| 400 | wc = vmalloc_user(sz); |
| 401 | if (!wc) { |
| 402 | ret = -ENOMEM; |
| 403 | goto bail; |
| 404 | } |
| 405 | |
| 406 | /* Check that we can write the offset to mmap. */ |
| 407 | if (udata && udata->outlen >= sizeof(__u64)) { |
| 408 | __u64 offset = 0; |
| 409 | |
| 410 | ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); |
| 411 | if (ret) |
| 412 | goto bail_free; |
| 413 | } |
| 414 | |
| 415 | spin_lock_irq(&cq->lock); |
| 416 | /* |
| 417 | * Make sure head and tail are sane since they |
| 418 | * might be user writable. |
| 419 | */ |
| 420 | old_wc = cq->queue; |
| 421 | head = old_wc->head; |
| 422 | if (head > (u32) cq->ibcq.cqe) |
| 423 | head = (u32) cq->ibcq.cqe; |
| 424 | tail = old_wc->tail; |
| 425 | if (tail > (u32) cq->ibcq.cqe) |
| 426 | tail = (u32) cq->ibcq.cqe; |
| 427 | if (head < tail) |
| 428 | n = cq->ibcq.cqe + 1 + head - tail; |
| 429 | else |
| 430 | n = head - tail; |
| 431 | if (unlikely((u32)cqe < n)) { |
| 432 | ret = -EINVAL; |
| 433 | goto bail_unlock; |
| 434 | } |
| 435 | for (n = 0; tail != head; n++) { |
| 436 | if (cq->ip) |
| 437 | wc->uqueue[n] = old_wc->uqueue[tail]; |
| 438 | else |
| 439 | wc->kqueue[n] = old_wc->kqueue[tail]; |
| 440 | if (tail == (u32) cq->ibcq.cqe) |
| 441 | tail = 0; |
| 442 | else |
| 443 | tail++; |
| 444 | } |
| 445 | cq->ibcq.cqe = cqe; |
| 446 | wc->head = n; |
| 447 | wc->tail = 0; |
| 448 | cq->queue = wc; |
| 449 | spin_unlock_irq(&cq->lock); |
| 450 | |
| 451 | vfree(old_wc); |
| 452 | |
| 453 | if (cq->ip) { |
| 454 | struct qib_ibdev *dev = to_idev(ibcq->device); |
| 455 | struct qib_mmap_info *ip = cq->ip; |
| 456 | |
| 457 | qib_update_mmap_info(dev, ip, sz, wc); |
| 458 | |
| 459 | /* |
| 460 | * Return the offset to mmap. |
| 461 | * See qib_mmap() for details. |
| 462 | */ |
| 463 | if (udata && udata->outlen >= sizeof(__u64)) { |
| 464 | ret = ib_copy_to_udata(udata, &ip->offset, |
| 465 | sizeof(ip->offset)); |
| 466 | if (ret) |
| 467 | goto bail; |
| 468 | } |
| 469 | |
| 470 | spin_lock_irq(&dev->pending_lock); |
| 471 | if (list_empty(&ip->pending_mmaps)) |
| 472 | list_add(&ip->pending_mmaps, &dev->pending_mmaps); |
| 473 | spin_unlock_irq(&dev->pending_lock); |
| 474 | } |
| 475 | |
| 476 | ret = 0; |
| 477 | goto bail; |
| 478 | |
| 479 | bail_unlock: |
| 480 | spin_unlock_irq(&cq->lock); |
| 481 | bail_free: |
| 482 | vfree(wc); |
| 483 | bail: |
| 484 | return ret; |
| 485 | } |