blob: f39d3135a9efc024e43115774e5dd910f0ec4e84 [file] [log] [blame]
Sudeep Dutte9089f42015-04-29 05:32:35 -07001/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2014 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#include <linux/scif.h>
19#include "scif_main.h"
20#include "scif_map.h"
21
22static const char * const scif_ep_states[] = {
23 "Unbound",
24 "Bound",
25 "Listening",
26 "Connected",
27 "Connecting",
28 "Mapping",
29 "Closing",
30 "Close Listening",
31 "Disconnected",
32 "Zombie"};
33
34enum conn_async_state {
35 ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
36 ASYNC_CONN_INPROGRESS, /* async connect in progress */
37 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
38};
39
40scif_epd_t scif_open(void)
41{
42 struct scif_endpt *ep;
43
44 might_sleep();
45 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
46 if (!ep)
47 goto err_ep_alloc;
48
49 ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
50 if (!ep->qp_info.qp)
51 goto err_qp_alloc;
52
53 spin_lock_init(&ep->lock);
54 mutex_init(&ep->sendlock);
55 mutex_init(&ep->recvlock);
56
57 ep->state = SCIFEP_UNBOUND;
58 dev_dbg(scif_info.mdev.this_device,
59 "SCIFAPI open: ep %p success\n", ep);
60 return ep;
61
62err_qp_alloc:
63 kfree(ep);
64err_ep_alloc:
65 return NULL;
66}
67EXPORT_SYMBOL_GPL(scif_open);
68
69/*
70 * scif_disconnect_ep - Disconnects the endpoint if found
71 * @epd: The end point returned from scif_open()
72 */
73static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
74{
75 struct scifmsg msg;
76 struct scif_endpt *fep = NULL;
77 struct scif_endpt *tmpep;
78 struct list_head *pos, *tmpq;
79 int err;
80
81 /*
82 * Wake up any threads blocked in send()/recv() before closing
83 * out the connection. Grabbing and releasing the send/recv lock
84 * will ensure that any blocked senders/receivers have exited for
85 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
86 * close. Ring 3 endpoints are not affected since close will not
87 * be called while there are IOCTLs executing.
88 */
89 wake_up_interruptible(&ep->sendwq);
90 wake_up_interruptible(&ep->recvwq);
91 mutex_lock(&ep->sendlock);
92 mutex_unlock(&ep->sendlock);
93 mutex_lock(&ep->recvlock);
94 mutex_unlock(&ep->recvlock);
95
96 /* Remove from the connected list */
97 mutex_lock(&scif_info.connlock);
98 list_for_each_safe(pos, tmpq, &scif_info.connected) {
99 tmpep = list_entry(pos, struct scif_endpt, list);
100 if (tmpep == ep) {
101 list_del(pos);
102 fep = tmpep;
103 spin_lock(&ep->lock);
104 break;
105 }
106 }
107
108 if (!fep) {
109 /*
110 * The other side has completed the disconnect before
111 * the end point can be removed from the list. Therefore
112 * the ep lock is not locked, traverse the disconnected
113 * list to find the endpoint and release the conn lock.
114 */
115 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
116 tmpep = list_entry(pos, struct scif_endpt, list);
117 if (tmpep == ep) {
118 list_del(pos);
119 break;
120 }
121 }
122 mutex_unlock(&scif_info.connlock);
123 return NULL;
124 }
125
126 init_completion(&ep->discon);
127 msg.uop = SCIF_DISCNCT;
128 msg.src = ep->port;
129 msg.dst = ep->peer;
130 msg.payload[0] = (u64)ep;
131 msg.payload[1] = ep->remote_ep;
132
133 err = scif_nodeqp_send(ep->remote_dev, &msg);
134 spin_unlock(&ep->lock);
135 mutex_unlock(&scif_info.connlock);
136
137 if (!err)
138 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
139 wait_for_completion_timeout(&ep->discon,
140 SCIF_NODE_ALIVE_TIMEOUT);
141 return ep;
142}
143
144int scif_close(scif_epd_t epd)
145{
146 struct scif_endpt *ep = (struct scif_endpt *)epd;
147 struct scif_endpt *tmpep;
148 struct list_head *pos, *tmpq;
149 enum scif_epd_state oldstate;
150 bool flush_conn;
151
152 dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
153 ep, scif_ep_states[ep->state]);
154 might_sleep();
155 spin_lock(&ep->lock);
156 flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
157 spin_unlock(&ep->lock);
158
159 if (flush_conn)
160 flush_work(&scif_info.conn_work);
161
162 spin_lock(&ep->lock);
163 oldstate = ep->state;
164
165 ep->state = SCIFEP_CLOSING;
166
167 switch (oldstate) {
168 case SCIFEP_ZOMBIE:
169 case SCIFEP_DISCONNECTED:
170 spin_unlock(&ep->lock);
171 /* Remove from the disconnected list */
172 mutex_lock(&scif_info.connlock);
173 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
174 tmpep = list_entry(pos, struct scif_endpt, list);
175 if (tmpep == ep) {
176 list_del(pos);
177 break;
178 }
179 }
180 mutex_unlock(&scif_info.connlock);
181 break;
182 case SCIFEP_UNBOUND:
183 case SCIFEP_BOUND:
184 case SCIFEP_CONNECTING:
185 spin_unlock(&ep->lock);
186 break;
187 case SCIFEP_MAPPING:
188 case SCIFEP_CONNECTED:
189 case SCIFEP_CLOSING:
190 {
191 spin_unlock(&ep->lock);
192 scif_disconnect_ep(ep);
193 break;
194 }
195 case SCIFEP_LISTENING:
196 case SCIFEP_CLLISTEN:
197 {
198 struct scif_conreq *conreq;
199 struct scifmsg msg;
200 struct scif_endpt *aep;
201
202 spin_unlock(&ep->lock);
203 spin_lock(&scif_info.eplock);
204
205 /* remove from listen list */
206 list_for_each_safe(pos, tmpq, &scif_info.listen) {
207 tmpep = list_entry(pos, struct scif_endpt, list);
208 if (tmpep == ep)
209 list_del(pos);
210 }
211 /* Remove any dangling accepts */
212 while (ep->acceptcnt) {
213 aep = list_first_entry(&ep->li_accept,
214 struct scif_endpt, liacceptlist);
215 list_del(&aep->liacceptlist);
216 scif_put_port(aep->port.port);
217 list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
218 tmpep = list_entry(pos, struct scif_endpt,
219 miacceptlist);
220 if (tmpep == aep) {
221 list_del(pos);
222 break;
223 }
224 }
225 spin_unlock(&scif_info.eplock);
226 mutex_lock(&scif_info.connlock);
227 list_for_each_safe(pos, tmpq, &scif_info.connected) {
228 tmpep = list_entry(pos,
229 struct scif_endpt, list);
230 if (tmpep == aep) {
231 list_del(pos);
232 break;
233 }
234 }
235 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
236 tmpep = list_entry(pos,
237 struct scif_endpt, list);
238 if (tmpep == aep) {
239 list_del(pos);
240 break;
241 }
242 }
243 mutex_unlock(&scif_info.connlock);
244 scif_teardown_ep(aep);
245 spin_lock(&scif_info.eplock);
246 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
247 ep->acceptcnt--;
248 }
249
250 spin_lock(&ep->lock);
251 spin_unlock(&scif_info.eplock);
252
253 /* Remove and reject any pending connection requests. */
254 while (ep->conreqcnt) {
255 conreq = list_first_entry(&ep->conlist,
256 struct scif_conreq, list);
257 list_del(&conreq->list);
258
259 msg.uop = SCIF_CNCT_REJ;
260 msg.dst.node = conreq->msg.src.node;
261 msg.dst.port = conreq->msg.src.port;
262 msg.payload[0] = conreq->msg.payload[0];
263 msg.payload[1] = conreq->msg.payload[1];
264 /*
265 * No Error Handling on purpose for scif_nodeqp_send().
266 * If the remote node is lost we still want free the
267 * connection requests on the self node.
268 */
269 scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
270 &msg);
271 ep->conreqcnt--;
272 kfree(conreq);
273 }
274
275 spin_unlock(&ep->lock);
276 /* If a kSCIF accept is waiting wake it up */
277 wake_up_interruptible(&ep->conwq);
278 break;
279 }
280 }
281 scif_put_port(ep->port.port);
282 scif_teardown_ep(ep);
283 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
284 return 0;
285}
286EXPORT_SYMBOL_GPL(scif_close);
287
288/**
289 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
290 * accept new connections.
291 * @epd: The end point returned from scif_open()
292 */
293int __scif_flush(scif_epd_t epd)
294{
295 struct scif_endpt *ep = (struct scif_endpt *)epd;
296
297 switch (ep->state) {
298 case SCIFEP_LISTENING:
299 {
300 ep->state = SCIFEP_CLLISTEN;
301
302 /* If an accept is waiting wake it up */
303 wake_up_interruptible(&ep->conwq);
304 break;
305 }
306 default:
307 break;
308 }
309 return 0;
310}
311
312int scif_bind(scif_epd_t epd, u16 pn)
313{
314 struct scif_endpt *ep = (struct scif_endpt *)epd;
315 int ret = 0;
316 int tmp;
317
318 dev_dbg(scif_info.mdev.this_device,
319 "SCIFAPI bind: ep %p %s requested port number %d\n",
320 ep, scif_ep_states[ep->state], pn);
321 if (pn) {
322 /*
323 * Similar to IETF RFC 1700, SCIF ports below
324 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
325 * processes or by processes executed by privileged users.
326 */
327 if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
328 ret = -EACCES;
329 goto scif_bind_admin_exit;
330 }
331 }
332
333 spin_lock(&ep->lock);
334 if (ep->state == SCIFEP_BOUND) {
335 ret = -EINVAL;
336 goto scif_bind_exit;
337 } else if (ep->state != SCIFEP_UNBOUND) {
338 ret = -EISCONN;
339 goto scif_bind_exit;
340 }
341
342 if (pn) {
343 tmp = scif_rsrv_port(pn);
344 if (tmp != pn) {
345 ret = -EINVAL;
346 goto scif_bind_exit;
347 }
348 } else {
349 pn = scif_get_new_port();
350 if (!pn) {
351 ret = -ENOSPC;
352 goto scif_bind_exit;
353 }
354 }
355
356 ep->state = SCIFEP_BOUND;
357 ep->port.node = scif_info.nodeid;
358 ep->port.port = pn;
359 ep->conn_async_state = ASYNC_CONN_IDLE;
360 ret = pn;
361 dev_dbg(scif_info.mdev.this_device,
362 "SCIFAPI bind: bound to port number %d\n", pn);
363scif_bind_exit:
364 spin_unlock(&ep->lock);
365scif_bind_admin_exit:
366 return ret;
367}
368EXPORT_SYMBOL_GPL(scif_bind);
369
370int scif_listen(scif_epd_t epd, int backlog)
371{
372 struct scif_endpt *ep = (struct scif_endpt *)epd;
373
374 dev_dbg(scif_info.mdev.this_device,
375 "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
376 spin_lock(&ep->lock);
377 switch (ep->state) {
378 case SCIFEP_ZOMBIE:
379 case SCIFEP_CLOSING:
380 case SCIFEP_CLLISTEN:
381 case SCIFEP_UNBOUND:
382 case SCIFEP_DISCONNECTED:
383 spin_unlock(&ep->lock);
384 return -EINVAL;
385 case SCIFEP_LISTENING:
386 case SCIFEP_CONNECTED:
387 case SCIFEP_CONNECTING:
388 case SCIFEP_MAPPING:
389 spin_unlock(&ep->lock);
390 return -EISCONN;
391 case SCIFEP_BOUND:
392 break;
393 }
394
395 ep->state = SCIFEP_LISTENING;
396 ep->backlog = backlog;
397
398 ep->conreqcnt = 0;
399 ep->acceptcnt = 0;
400 INIT_LIST_HEAD(&ep->conlist);
401 init_waitqueue_head(&ep->conwq);
402 INIT_LIST_HEAD(&ep->li_accept);
403 spin_unlock(&ep->lock);
404
405 /*
406 * Listen status is complete so delete the qp information not needed
407 * on a listen before placing on the list of listening ep's
408 */
409 scif_teardown_ep(ep);
410 ep->qp_info.qp = NULL;
411
412 spin_lock(&scif_info.eplock);
413 list_add_tail(&ep->list, &scif_info.listen);
414 spin_unlock(&scif_info.eplock);
415 return 0;
416}
417EXPORT_SYMBOL_GPL(scif_listen);
Nikhil Rao76371c72015-04-29 05:32:36 -0700418
419/*
420 ************************************************************************
421 * SCIF connection flow:
422 *
423 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
424 * connections via a SCIF_CNCT_REQ message
425 * 2) A SCIF endpoint can initiate a SCIF connection by calling
426 * scif_connect(..) which calls scif_setup_qp_connect(..) which
427 * allocates the local qp for the endpoint ring buffer and then sends
428 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
429 * a SCIF_CNCT_REJ message
430 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
431 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
432 * message otherwise
433 * 4) A thread blocked waiting for incoming connections allocates its local
434 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
435 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
436 * the node sends a SCIF_CNCT_REJ message
437 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
438 * connecting endpoint is woken up as part of handling
439 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
440 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
441 * success or a SCIF_CNCT_GNTNACK message on failure and completes
442 * the scif_connect(..) API
443 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
444 * in step 4 is woken up and completes the scif_accept(..) API
445 * 7) The SCIF connection is now established between the two SCIF endpoints.
446 */
447static int scif_conn_func(struct scif_endpt *ep)
448{
449 int err = 0;
450 struct scifmsg msg;
451 struct device *spdev;
452
453 /* Initiate the first part of the endpoint QP setup */
454 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
455 SCIF_ENDPT_QP_SIZE, ep->remote_dev);
456 if (err) {
457 dev_err(&ep->remote_dev->sdev->dev,
458 "%s err %d qp_offset 0x%llx\n",
459 __func__, err, ep->qp_info.qp_offset);
460 ep->state = SCIFEP_BOUND;
461 goto connect_error_simple;
462 }
463
464 spdev = scif_get_peer_dev(ep->remote_dev);
465 if (IS_ERR(spdev)) {
466 err = PTR_ERR(spdev);
467 goto cleanup_qp;
468 }
469 /* Format connect message and send it */
470 msg.src = ep->port;
471 msg.dst = ep->conn_port;
472 msg.uop = SCIF_CNCT_REQ;
473 msg.payload[0] = (u64)ep;
474 msg.payload[1] = ep->qp_info.qp_offset;
475 err = _scif_nodeqp_send(ep->remote_dev, &msg);
476 if (err)
477 goto connect_error_dec;
478 scif_put_peer_dev(spdev);
479 /*
480 * Wait for the remote node to respond with SCIF_CNCT_GNT or
481 * SCIF_CNCT_REJ message.
482 */
483 err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
484 SCIF_NODE_ALIVE_TIMEOUT);
485 if (!err) {
486 dev_err(&ep->remote_dev->sdev->dev,
487 "%s %d timeout\n", __func__, __LINE__);
488 ep->state = SCIFEP_BOUND;
489 }
490 spdev = scif_get_peer_dev(ep->remote_dev);
491 if (IS_ERR(spdev)) {
492 err = PTR_ERR(spdev);
493 goto cleanup_qp;
494 }
495 if (ep->state == SCIFEP_MAPPING) {
496 err = scif_setup_qp_connect_response(ep->remote_dev,
497 ep->qp_info.qp,
498 ep->qp_info.gnt_pld);
499 /*
500 * If the resource to map the queue are not available then
501 * we need to tell the other side to terminate the accept
502 */
503 if (err) {
504 dev_err(&ep->remote_dev->sdev->dev,
505 "%s %d err %d\n", __func__, __LINE__, err);
506 msg.uop = SCIF_CNCT_GNTNACK;
507 msg.payload[0] = ep->remote_ep;
508 _scif_nodeqp_send(ep->remote_dev, &msg);
509 ep->state = SCIFEP_BOUND;
510 goto connect_error_dec;
511 }
512
513 msg.uop = SCIF_CNCT_GNTACK;
514 msg.payload[0] = ep->remote_ep;
515 err = _scif_nodeqp_send(ep->remote_dev, &msg);
516 if (err) {
517 ep->state = SCIFEP_BOUND;
518 goto connect_error_dec;
519 }
520 ep->state = SCIFEP_CONNECTED;
521 mutex_lock(&scif_info.connlock);
522 list_add_tail(&ep->list, &scif_info.connected);
523 mutex_unlock(&scif_info.connlock);
524 dev_dbg(&ep->remote_dev->sdev->dev,
525 "SCIFAPI connect: ep %p connected\n", ep);
526 } else if (ep->state == SCIFEP_BOUND) {
527 dev_dbg(&ep->remote_dev->sdev->dev,
528 "SCIFAPI connect: ep %p connection refused\n", ep);
529 err = -ECONNREFUSED;
530 goto connect_error_dec;
531 }
532 scif_put_peer_dev(spdev);
533 return err;
534connect_error_dec:
535 scif_put_peer_dev(spdev);
536cleanup_qp:
537 scif_cleanup_ep_qp(ep);
538connect_error_simple:
539 return err;
540}
541
542/*
543 * scif_conn_handler:
544 *
545 * Workqueue handler for servicing non-blocking SCIF connect
546 *
547 */
548void scif_conn_handler(struct work_struct *work)
549{
550 struct scif_endpt *ep;
551
552 do {
553 ep = NULL;
554 spin_lock(&scif_info.nb_connect_lock);
555 if (!list_empty(&scif_info.nb_connect_list)) {
556 ep = list_first_entry(&scif_info.nb_connect_list,
557 struct scif_endpt, conn_list);
558 list_del(&ep->conn_list);
559 }
560 spin_unlock(&scif_info.nb_connect_lock);
561 if (ep)
562 ep->conn_err = scif_conn_func(ep);
563 } while (ep);
564}
565
566int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
567{
568 struct scif_endpt *ep = (struct scif_endpt *)epd;
569 int err = 0;
570 struct scif_dev *remote_dev;
571 struct device *spdev;
572
573 dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
574 scif_ep_states[ep->state]);
575
576 if (!scif_dev || dst->node > scif_info.maxid)
577 return -ENODEV;
578
579 might_sleep();
580
581 remote_dev = &scif_dev[dst->node];
582 spdev = scif_get_peer_dev(remote_dev);
583 if (IS_ERR(spdev)) {
584 err = PTR_ERR(spdev);
585 return err;
586 }
587
588 spin_lock(&ep->lock);
589 switch (ep->state) {
590 case SCIFEP_ZOMBIE:
591 case SCIFEP_CLOSING:
592 err = -EINVAL;
593 break;
594 case SCIFEP_DISCONNECTED:
595 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
596 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
597 else
598 err = -EINVAL;
599 break;
600 case SCIFEP_LISTENING:
601 case SCIFEP_CLLISTEN:
602 err = -EOPNOTSUPP;
603 break;
604 case SCIFEP_CONNECTING:
605 case SCIFEP_MAPPING:
606 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
607 err = -EINPROGRESS;
608 else
609 err = -EISCONN;
610 break;
611 case SCIFEP_CONNECTED:
612 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
613 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
614 else
615 err = -EISCONN;
616 break;
617 case SCIFEP_UNBOUND:
618 ep->port.port = scif_get_new_port();
619 if (!ep->port.port) {
620 err = -ENOSPC;
621 } else {
622 ep->port.node = scif_info.nodeid;
623 ep->conn_async_state = ASYNC_CONN_IDLE;
624 }
625 /* Fall through */
626 case SCIFEP_BOUND:
627 /*
628 * If a non-blocking connect has been already initiated
629 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
630 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
631 * SCIF_BOUND due an error in the connection process
632 * (e.g., connection refused) If conn_async_state is
633 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
634 * so that the error status can be collected. If the state is
635 * already ASYNC_CONN_FLUSH_WORK - then set the error to
636 * EINPROGRESS since some other thread is waiting to collect
637 * error status.
638 */
639 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
640 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
641 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
642 err = -EINPROGRESS;
643 } else {
644 ep->conn_port = *dst;
645 init_waitqueue_head(&ep->sendwq);
646 init_waitqueue_head(&ep->recvwq);
647 init_waitqueue_head(&ep->conwq);
648 ep->conn_async_state = 0;
649
650 if (unlikely(non_block))
651 ep->conn_async_state = ASYNC_CONN_INPROGRESS;
652 }
653 break;
654 }
655
656 if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
657 goto connect_simple_unlock1;
658
659 ep->state = SCIFEP_CONNECTING;
660 ep->remote_dev = &scif_dev[dst->node];
661 ep->qp_info.qp->magic = SCIFEP_MAGIC;
662 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
663 spin_lock(&scif_info.nb_connect_lock);
664 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
665 spin_unlock(&scif_info.nb_connect_lock);
666 err = -EINPROGRESS;
667 schedule_work(&scif_info.conn_work);
668 }
669connect_simple_unlock1:
670 spin_unlock(&ep->lock);
671 scif_put_peer_dev(spdev);
672 if (err) {
673 return err;
674 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
675 flush_work(&scif_info.conn_work);
676 err = ep->conn_err;
677 spin_lock(&ep->lock);
678 ep->conn_async_state = ASYNC_CONN_IDLE;
679 spin_unlock(&ep->lock);
680 } else {
681 err = scif_conn_func(ep);
682 }
683 return err;
684}
685
686int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
687{
688 return __scif_connect(epd, dst, false);
689}
690EXPORT_SYMBOL_GPL(scif_connect);
691
692/**
693 * scif_accept() - Accept a connection request from the remote node
694 *
695 * The function accepts a connection request from the remote node. Successful
696 * complete is indicate by a new end point being created and passed back
697 * to the caller for future reference.
698 *
699 * Upon successful complete a zero will be returned and the peer information
700 * will be filled in.
701 *
702 * If the end point is not in the listening state -EINVAL will be returned.
703 *
704 * If during the connection sequence resource allocation fails the -ENOMEM
705 * will be returned.
706 *
707 * If the function is called with the ASYNC flag set and no connection requests
708 * are pending it will return -EAGAIN.
709 *
710 * If the remote side is not sending any connection requests the caller may
711 * terminate this function with a signal. If so a -EINTR will be returned.
712 */
713int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
714 scif_epd_t *newepd, int flags)
715{
716 struct scif_endpt *lep = (struct scif_endpt *)epd;
717 struct scif_endpt *cep;
718 struct scif_conreq *conreq;
719 struct scifmsg msg;
720 int err;
721 struct device *spdev;
722
723 dev_dbg(scif_info.mdev.this_device,
724 "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
725
726 if (flags & ~SCIF_ACCEPT_SYNC)
727 return -EINVAL;
728
729 if (!peer || !newepd)
730 return -EINVAL;
731
732 might_sleep();
733 spin_lock(&lep->lock);
734 if (lep->state != SCIFEP_LISTENING) {
735 spin_unlock(&lep->lock);
736 return -EINVAL;
737 }
738
739 if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
740 /* No connection request present and we do not want to wait */
741 spin_unlock(&lep->lock);
742 return -EAGAIN;
743 }
744
745 lep->files = current->files;
746retry_connection:
747 spin_unlock(&lep->lock);
748 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
749 err = wait_event_interruptible(lep->conwq,
750 (lep->conreqcnt ||
751 (lep->state != SCIFEP_LISTENING)));
752 if (err)
753 return err;
754
755 if (lep->state != SCIFEP_LISTENING)
756 return -EINTR;
757
758 spin_lock(&lep->lock);
759
760 if (!lep->conreqcnt)
761 goto retry_connection;
762
763 /* Get the first connect request off the list */
764 conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
765 list_del(&conreq->list);
766 lep->conreqcnt--;
767 spin_unlock(&lep->lock);
768
769 /* Fill in the peer information */
770 peer->node = conreq->msg.src.node;
771 peer->port = conreq->msg.src.port;
772
773 cep = kzalloc(sizeof(*cep), GFP_KERNEL);
774 if (!cep) {
775 err = -ENOMEM;
776 goto scif_accept_error_epalloc;
777 }
778 spin_lock_init(&cep->lock);
779 mutex_init(&cep->sendlock);
780 mutex_init(&cep->recvlock);
781 cep->state = SCIFEP_CONNECTING;
782 cep->remote_dev = &scif_dev[peer->node];
783 cep->remote_ep = conreq->msg.payload[0];
784
785 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
786 if (!cep->qp_info.qp) {
787 err = -ENOMEM;
788 goto scif_accept_error_qpalloc;
789 }
790
791 cep->qp_info.qp->magic = SCIFEP_MAGIC;
792 spdev = scif_get_peer_dev(cep->remote_dev);
793 if (IS_ERR(spdev)) {
794 err = PTR_ERR(spdev);
795 goto scif_accept_error_map;
796 }
797 err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
798 conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
799 cep->remote_dev);
800 if (err) {
801 dev_dbg(&cep->remote_dev->sdev->dev,
802 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
803 lep, cep, err, cep->qp_info.qp_offset);
804 scif_put_peer_dev(spdev);
805 goto scif_accept_error_map;
806 }
807
808 cep->port.node = lep->port.node;
809 cep->port.port = lep->port.port;
810 cep->peer.node = peer->node;
811 cep->peer.port = peer->port;
812 init_waitqueue_head(&cep->sendwq);
813 init_waitqueue_head(&cep->recvwq);
814 init_waitqueue_head(&cep->conwq);
815
816 msg.uop = SCIF_CNCT_GNT;
817 msg.src = cep->port;
818 msg.payload[0] = cep->remote_ep;
819 msg.payload[1] = cep->qp_info.qp_offset;
820 msg.payload[2] = (u64)cep;
821
822 err = _scif_nodeqp_send(cep->remote_dev, &msg);
823 scif_put_peer_dev(spdev);
824 if (err)
825 goto scif_accept_error_map;
826retry:
827 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
828 err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
829 SCIF_NODE_ACCEPT_TIMEOUT);
830 if (!err && scifdev_alive(cep))
831 goto retry;
832 err = !err ? -ENODEV : 0;
833 if (err)
834 goto scif_accept_error_map;
835 kfree(conreq);
836
837 spin_lock(&cep->lock);
838
839 if (cep->state == SCIFEP_CLOSING) {
840 /*
841 * Remote failed to allocate resources and NAKed the grant.
842 * There is at this point nothing referencing the new end point.
843 */
844 spin_unlock(&cep->lock);
845 scif_teardown_ep(cep);
846 kfree(cep);
847
848 /* If call with sync flag then go back and wait. */
849 if (flags & SCIF_ACCEPT_SYNC) {
850 spin_lock(&lep->lock);
851 goto retry_connection;
852 }
853 return -EAGAIN;
854 }
855
856 scif_get_port(cep->port.port);
857 *newepd = (scif_epd_t)cep;
858 spin_unlock(&cep->lock);
859 return 0;
860scif_accept_error_map:
861 scif_teardown_ep(cep);
862scif_accept_error_qpalloc:
863 kfree(cep);
864scif_accept_error_epalloc:
865 msg.uop = SCIF_CNCT_REJ;
866 msg.dst.node = conreq->msg.src.node;
867 msg.dst.port = conreq->msg.src.port;
868 msg.payload[0] = conreq->msg.payload[0];
869 msg.payload[1] = conreq->msg.payload[1];
870 scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
871 kfree(conreq);
872 return err;
873}
874EXPORT_SYMBOL_GPL(scif_accept);
Sudeep Duttfdd9fd52015-04-29 05:32:37 -0700875
876/*
877 * scif_msg_param_check:
878 * @epd: The end point returned from scif_open()
879 * @len: Length to receive
880 * @flags: blocking or non blocking
881 *
882 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
883 */
884static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
885{
886 int ret = -EINVAL;
887
888 if (len < 0)
889 goto err_ret;
890 if (flags && (!(flags & SCIF_RECV_BLOCK)))
891 goto err_ret;
892 ret = 0;
893err_ret:
894 return ret;
895}
896
897static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
898{
899 struct scif_endpt *ep = (struct scif_endpt *)epd;
900 struct scifmsg notif_msg;
901 int curr_xfer_len = 0, sent_len = 0, write_count;
902 int ret = 0;
903 struct scif_qp *qp = ep->qp_info.qp;
904
905 if (flags & SCIF_SEND_BLOCK)
906 might_sleep();
907
908 spin_lock(&ep->lock);
909 while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
910 write_count = scif_rb_space(&qp->outbound_q);
911 if (write_count) {
912 /* Best effort to send as much data as possible */
913 curr_xfer_len = min(len - sent_len, write_count);
914 ret = scif_rb_write(&qp->outbound_q, msg,
915 curr_xfer_len);
916 if (ret < 0)
917 break;
918 /* Success. Update write pointer */
919 scif_rb_commit(&qp->outbound_q);
920 /*
921 * Send a notification to the peer about the
922 * produced data message.
923 */
924 notif_msg.src = ep->port;
925 notif_msg.uop = SCIF_CLIENT_SENT;
926 notif_msg.payload[0] = ep->remote_ep;
927 ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
928 if (ret)
929 break;
930 sent_len += curr_xfer_len;
931 msg = msg + curr_xfer_len;
932 continue;
933 }
934 curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
935 /* Not enough RB space. return for the Non Blocking case */
936 if (!(flags & SCIF_SEND_BLOCK))
937 break;
938
939 spin_unlock(&ep->lock);
940 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
941 ret =
942 wait_event_interruptible(ep->sendwq,
943 (SCIFEP_CONNECTED != ep->state) ||
944 (scif_rb_space(&qp->outbound_q) >=
945 curr_xfer_len));
946 spin_lock(&ep->lock);
947 if (ret)
948 break;
949 }
950 if (sent_len)
951 ret = sent_len;
952 else if (!ret && SCIFEP_CONNECTED != ep->state)
953 ret = SCIFEP_DISCONNECTED == ep->state ?
954 -ECONNRESET : -ENOTCONN;
955 spin_unlock(&ep->lock);
956 return ret;
957}
958
959static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
960{
961 int read_size;
962 struct scif_endpt *ep = (struct scif_endpt *)epd;
963 struct scifmsg notif_msg;
964 int curr_recv_len = 0, remaining_len = len, read_count;
965 int ret = 0;
966 struct scif_qp *qp = ep->qp_info.qp;
967
968 if (flags & SCIF_RECV_BLOCK)
969 might_sleep();
970 spin_lock(&ep->lock);
971 while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
972 SCIFEP_DISCONNECTED == ep->state)) {
973 read_count = scif_rb_count(&qp->inbound_q, remaining_len);
974 if (read_count) {
975 /*
976 * Best effort to recv as much data as there
977 * are bytes to read in the RB particularly
978 * important for the Non Blocking case.
979 */
980 curr_recv_len = min(remaining_len, read_count);
981 read_size = scif_rb_get_next(&qp->inbound_q,
982 msg, curr_recv_len);
983 if (ep->state == SCIFEP_CONNECTED) {
984 /*
985 * Update the read pointer only if the endpoint
986 * is still connected else the read pointer
987 * might no longer exist since the peer has
988 * freed resources!
989 */
990 scif_rb_update_read_ptr(&qp->inbound_q);
991 /*
992 * Send a notification to the peer about the
993 * consumed data message only if the EP is in
994 * SCIFEP_CONNECTED state.
995 */
996 notif_msg.src = ep->port;
997 notif_msg.uop = SCIF_CLIENT_RCVD;
998 notif_msg.payload[0] = ep->remote_ep;
999 ret = _scif_nodeqp_send(ep->remote_dev,
1000 &notif_msg);
1001 if (ret)
1002 break;
1003 }
1004 remaining_len -= curr_recv_len;
1005 msg = msg + curr_recv_len;
1006 continue;
1007 }
1008 /*
1009 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1010 * we will keep looping forever.
1011 */
1012 if (ep->state == SCIFEP_DISCONNECTED)
1013 break;
1014 /*
1015 * Return in the Non Blocking case if there is no data
1016 * to read in this iteration.
1017 */
1018 if (!(flags & SCIF_RECV_BLOCK))
1019 break;
1020 curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1021 spin_unlock(&ep->lock);
1022 /*
1023 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1024 * or until other side disconnects.
1025 */
1026 ret =
1027 wait_event_interruptible(ep->recvwq,
1028 SCIFEP_CONNECTED != ep->state ||
1029 scif_rb_count(&qp->inbound_q,
1030 curr_recv_len)
1031 >= curr_recv_len);
1032 spin_lock(&ep->lock);
1033 if (ret)
1034 break;
1035 }
1036 if (len - remaining_len)
1037 ret = len - remaining_len;
1038 else if (!ret && ep->state != SCIFEP_CONNECTED)
1039 ret = ep->state == SCIFEP_DISCONNECTED ?
1040 -ECONNRESET : -ENOTCONN;
1041 spin_unlock(&ep->lock);
1042 return ret;
1043}
1044
1045/**
1046 * scif_user_send() - Send data to connection queue
1047 * @epd: The end point returned from scif_open()
1048 * @msg: Address to place data
1049 * @len: Length to receive
1050 * @flags: blocking or non blocking
1051 *
1052 * This function is called from the driver IOCTL entry point
1053 * only and is a wrapper for _scif_send().
1054 */
1055int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1056{
1057 struct scif_endpt *ep = (struct scif_endpt *)epd;
1058 int err = 0;
1059 int sent_len = 0;
1060 char *tmp;
1061 int loop_len;
1062 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1063
1064 dev_dbg(scif_info.mdev.this_device,
1065 "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1066 if (!len)
1067 return 0;
1068
1069 err = scif_msg_param_check(epd, len, flags);
1070 if (err)
1071 goto send_err;
1072
1073 tmp = kmalloc(chunk_len, GFP_KERNEL);
1074 if (!tmp) {
1075 err = -ENOMEM;
1076 goto send_err;
1077 }
1078 /*
1079 * Grabbing the lock before breaking up the transfer in
1080 * multiple chunks is required to ensure that messages do
1081 * not get fragmented and reordered.
1082 */
1083 mutex_lock(&ep->sendlock);
1084 while (sent_len != len) {
1085 loop_len = len - sent_len;
1086 loop_len = min(chunk_len, loop_len);
1087 if (copy_from_user(tmp, msg, loop_len)) {
1088 err = -EFAULT;
1089 goto send_free_err;
1090 }
1091 err = _scif_send(epd, tmp, loop_len, flags);
1092 if (err < 0)
1093 goto send_free_err;
1094 sent_len += err;
1095 msg += err;
1096 if (err != loop_len)
1097 goto send_free_err;
1098 }
1099send_free_err:
1100 mutex_unlock(&ep->sendlock);
1101 kfree(tmp);
1102send_err:
1103 return err < 0 ? err : sent_len;
1104}
1105
1106/**
1107 * scif_user_recv() - Receive data from connection queue
1108 * @epd: The end point returned from scif_open()
1109 * @msg: Address to place data
1110 * @len: Length to receive
1111 * @flags: blocking or non blocking
1112 *
1113 * This function is called from the driver IOCTL entry point
1114 * only and is a wrapper for _scif_recv().
1115 */
1116int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1117{
1118 struct scif_endpt *ep = (struct scif_endpt *)epd;
1119 int err = 0;
1120 int recv_len = 0;
1121 char *tmp;
1122 int loop_len;
1123 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1124
1125 dev_dbg(scif_info.mdev.this_device,
1126 "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1127 if (!len)
1128 return 0;
1129
1130 err = scif_msg_param_check(epd, len, flags);
1131 if (err)
1132 goto recv_err;
1133
1134 tmp = kmalloc(chunk_len, GFP_KERNEL);
1135 if (!tmp) {
1136 err = -ENOMEM;
1137 goto recv_err;
1138 }
1139 /*
1140 * Grabbing the lock before breaking up the transfer in
1141 * multiple chunks is required to ensure that messages do
1142 * not get fragmented and reordered.
1143 */
1144 mutex_lock(&ep->recvlock);
1145 while (recv_len != len) {
1146 loop_len = len - recv_len;
1147 loop_len = min(chunk_len, loop_len);
1148 err = _scif_recv(epd, tmp, loop_len, flags);
1149 if (err < 0)
1150 goto recv_free_err;
1151 if (copy_to_user(msg, tmp, err)) {
1152 err = -EFAULT;
1153 goto recv_free_err;
1154 }
1155 recv_len += err;
1156 msg += err;
1157 if (err != loop_len)
1158 goto recv_free_err;
1159 }
1160recv_free_err:
1161 mutex_unlock(&ep->recvlock);
1162 kfree(tmp);
1163recv_err:
1164 return err < 0 ? err : recv_len;
1165}
1166
1167/**
1168 * scif_send() - Send data to connection queue
1169 * @epd: The end point returned from scif_open()
1170 * @msg: Address to place data
1171 * @len: Length to receive
1172 * @flags: blocking or non blocking
1173 *
1174 * This function is called from the kernel mode only and is
1175 * a wrapper for _scif_send().
1176 */
1177int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1178{
1179 struct scif_endpt *ep = (struct scif_endpt *)epd;
1180 int ret;
1181
1182 dev_dbg(scif_info.mdev.this_device,
1183 "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1184 if (!len)
1185 return 0;
1186
1187 ret = scif_msg_param_check(epd, len, flags);
1188 if (ret)
1189 return ret;
1190 if (!ep->remote_dev)
1191 return -ENOTCONN;
1192 /*
1193 * Grab the mutex lock in the blocking case only
1194 * to ensure messages do not get fragmented/reordered.
1195 * The non blocking mode is protected using spin locks
1196 * in _scif_send().
1197 */
1198 if (flags & SCIF_SEND_BLOCK)
1199 mutex_lock(&ep->sendlock);
1200
1201 ret = _scif_send(epd, msg, len, flags);
1202
1203 if (flags & SCIF_SEND_BLOCK)
1204 mutex_unlock(&ep->sendlock);
1205 return ret;
1206}
1207EXPORT_SYMBOL_GPL(scif_send);
1208
1209/**
1210 * scif_recv() - Receive data from connection queue
1211 * @epd: The end point returned from scif_open()
1212 * @msg: Address to place data
1213 * @len: Length to receive
1214 * @flags: blocking or non blocking
1215 *
1216 * This function is called from the kernel mode only and is
1217 * a wrapper for _scif_recv().
1218 */
1219int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1220{
1221 struct scif_endpt *ep = (struct scif_endpt *)epd;
1222 int ret;
1223
1224 dev_dbg(scif_info.mdev.this_device,
1225 "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1226 if (!len)
1227 return 0;
1228
1229 ret = scif_msg_param_check(epd, len, flags);
1230 if (ret)
1231 return ret;
1232 /*
1233 * Grab the mutex lock in the blocking case only
1234 * to ensure messages do not get fragmented/reordered.
1235 * The non blocking mode is protected using spin locks
1236 * in _scif_send().
1237 */
1238 if (flags & SCIF_RECV_BLOCK)
1239 mutex_lock(&ep->recvlock);
1240
1241 ret = _scif_recv(epd, msg, len, flags);
1242
1243 if (flags & SCIF_RECV_BLOCK)
1244 mutex_unlock(&ep->recvlock);
1245
1246 return ret;
1247}
1248EXPORT_SYMBOL_GPL(scif_recv);
1249
1250int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1251{
1252 int online = 0;
1253 int offset = 0;
1254 int node;
1255
1256 if (!scif_is_mgmt_node())
1257 scif_get_node_info();
1258
1259 *self = scif_info.nodeid;
1260 mutex_lock(&scif_info.conflock);
1261 len = min_t(int, len, scif_info.total);
1262 for (node = 0; node <= scif_info.maxid; node++) {
1263 if (_scifdev_alive(&scif_dev[node])) {
1264 online++;
1265 if (offset < len)
1266 nodes[offset++] = node;
1267 }
1268 }
1269 dev_dbg(scif_info.mdev.this_device,
1270 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1271 scif_info.total, online, offset);
1272 mutex_unlock(&scif_info.conflock);
1273
1274 return online;
1275}
1276EXPORT_SYMBOL_GPL(scif_get_node_ids);