Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 3 | * |
| 4 | * Socket Closing - normal and abnormal |
| 5 | * |
| 6 | * Copyright IBM Corp. 2016 |
| 7 | * |
| 8 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/workqueue.h> |
Ingo Molnar | c3edc40 | 2017-02-02 08:35:14 +0100 | [diff] [blame] | 12 | #include <linux/sched/signal.h> |
| 13 | |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 14 | #include <net/sock.h> |
| 15 | |
| 16 | #include "smc.h" |
| 17 | #include "smc_tx.h" |
| 18 | #include "smc_cdc.h" |
| 19 | #include "smc_close.h" |
| 20 | |
| 21 | #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) |
| 22 | |
| 23 | static void smc_close_cleanup_listen(struct sock *parent) |
| 24 | { |
| 25 | struct sock *sk; |
| 26 | |
| 27 | /* Close non-accepted connections */ |
| 28 | while ((sk = smc_accept_dequeue(parent, NULL))) |
| 29 | smc_close_non_accepted(sk); |
| 30 | } |
| 31 | |
| 32 | static void smc_close_wait_tx_pends(struct smc_sock *smc) |
| 33 | { |
| 34 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 35 | struct sock *sk = &smc->sk; |
| 36 | signed long timeout; |
| 37 | |
| 38 | timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; |
| 39 | add_wait_queue(sk_sleep(sk), &wait); |
| 40 | while (!signal_pending(current) && timeout) { |
| 41 | int rc; |
| 42 | |
| 43 | rc = sk_wait_event(sk, &timeout, |
| 44 | !smc_cdc_tx_has_pending(&smc->conn), |
| 45 | &wait); |
| 46 | if (rc) |
| 47 | break; |
| 48 | } |
| 49 | remove_wait_queue(sk_sleep(sk), &wait); |
| 50 | } |
| 51 | |
| 52 | /* wait for sndbuf data being transmitted */ |
| 53 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
| 54 | { |
| 55 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 56 | struct sock *sk = &smc->sk; |
| 57 | |
| 58 | if (!timeout) |
| 59 | return; |
| 60 | |
| 61 | if (!smc_tx_prepared_sends(&smc->conn)) |
| 62 | return; |
| 63 | |
| 64 | smc->wait_close_tx_prepared = 1; |
| 65 | add_wait_queue(sk_sleep(sk), &wait); |
| 66 | while (!signal_pending(current) && timeout) { |
| 67 | int rc; |
| 68 | |
| 69 | rc = sk_wait_event(sk, &timeout, |
| 70 | !smc_tx_prepared_sends(&smc->conn) || |
| 71 | (sk->sk_err == ECONNABORTED) || |
| 72 | (sk->sk_err == ECONNRESET), |
| 73 | &wait); |
| 74 | if (rc) |
| 75 | break; |
| 76 | } |
| 77 | remove_wait_queue(sk_sleep(sk), &wait); |
| 78 | smc->wait_close_tx_prepared = 0; |
| 79 | } |
| 80 | |
| 81 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
| 82 | { |
| 83 | if (smc->wait_close_tx_prepared) |
| 84 | /* wake up socket closing */ |
| 85 | smc->sk.sk_state_change(&smc->sk); |
| 86 | } |
| 87 | |
| 88 | static int smc_close_wr(struct smc_connection *conn) |
| 89 | { |
| 90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
| 91 | |
| 92 | return smc_cdc_get_slot_and_msg_send(conn); |
| 93 | } |
| 94 | |
| 95 | static int smc_close_final(struct smc_connection *conn) |
| 96 | { |
| 97 | if (atomic_read(&conn->bytes_to_rcv)) |
| 98 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 99 | else |
| 100 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
| 101 | |
| 102 | return smc_cdc_get_slot_and_msg_send(conn); |
| 103 | } |
| 104 | |
| 105 | static int smc_close_abort(struct smc_connection *conn) |
| 106 | { |
| 107 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 108 | |
| 109 | return smc_cdc_get_slot_and_msg_send(conn); |
| 110 | } |
| 111 | |
| 112 | /* terminate smc socket abnormally - active abort |
| 113 | * RDMA communication no longer possible |
| 114 | */ |
| 115 | void smc_close_active_abort(struct smc_sock *smc) |
| 116 | { |
| 117 | struct smc_cdc_conn_state_flags *txflags = |
| 118 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 119 | |
| 120 | bh_lock_sock(&smc->sk); |
| 121 | smc->sk.sk_err = ECONNABORTED; |
| 122 | if (smc->clcsock && smc->clcsock->sk) { |
| 123 | smc->clcsock->sk->sk_err = ECONNABORTED; |
| 124 | smc->clcsock->sk->sk_state_change(smc->clcsock->sk); |
| 125 | } |
| 126 | switch (smc->sk.sk_state) { |
| 127 | case SMC_INIT: |
| 128 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 129 | break; |
| 130 | case SMC_APPCLOSEWAIT1: |
| 131 | case SMC_APPCLOSEWAIT2: |
| 132 | txflags->peer_conn_abort = 1; |
| 133 | sock_release(smc->clcsock); |
| 134 | if (!smc_cdc_rxed_any_close(&smc->conn)) |
| 135 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 136 | else |
| 137 | smc->sk.sk_state = SMC_CLOSED; |
| 138 | break; |
| 139 | case SMC_PEERCLOSEWAIT1: |
| 140 | case SMC_PEERCLOSEWAIT2: |
| 141 | if (!txflags->peer_conn_closed) { |
| 142 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 143 | txflags->peer_conn_abort = 1; |
| 144 | sock_release(smc->clcsock); |
| 145 | } else { |
| 146 | smc->sk.sk_state = SMC_CLOSED; |
| 147 | } |
| 148 | break; |
| 149 | case SMC_PROCESSABORT: |
| 150 | case SMC_APPFINCLOSEWAIT: |
| 151 | if (!txflags->peer_conn_closed) { |
| 152 | txflags->peer_conn_abort = 1; |
| 153 | sock_release(smc->clcsock); |
| 154 | } |
| 155 | smc->sk.sk_state = SMC_CLOSED; |
| 156 | break; |
| 157 | case SMC_PEERFINCLOSEWAIT: |
| 158 | case SMC_PEERABORTWAIT: |
| 159 | case SMC_CLOSED: |
| 160 | break; |
| 161 | } |
| 162 | |
| 163 | sock_set_flag(&smc->sk, SOCK_DEAD); |
| 164 | bh_unlock_sock(&smc->sk); |
| 165 | smc->sk.sk_state_change(&smc->sk); |
| 166 | } |
| 167 | |
| 168 | int smc_close_active(struct smc_sock *smc) |
| 169 | { |
| 170 | struct smc_cdc_conn_state_flags *txflags = |
| 171 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 172 | long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 173 | struct smc_connection *conn = &smc->conn; |
| 174 | struct sock *sk = &smc->sk; |
| 175 | int old_state; |
| 176 | int rc = 0; |
| 177 | |
| 178 | if (sock_flag(sk, SOCK_LINGER) && |
| 179 | !(current->flags & PF_EXITING)) |
| 180 | timeout = sk->sk_lingertime; |
| 181 | |
| 182 | again: |
| 183 | old_state = sk->sk_state; |
| 184 | switch (old_state) { |
| 185 | case SMC_INIT: |
| 186 | sk->sk_state = SMC_CLOSED; |
| 187 | if (smc->smc_listen_work.func) |
| 188 | flush_work(&smc->smc_listen_work); |
| 189 | sock_put(sk); |
| 190 | break; |
| 191 | case SMC_LISTEN: |
| 192 | sk->sk_state = SMC_CLOSED; |
| 193 | sk->sk_state_change(sk); /* wake up accept */ |
| 194 | if (smc->clcsock && smc->clcsock->sk) { |
| 195 | rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); |
| 196 | /* wake up kernel_accept of smc_tcp_listen_worker */ |
| 197 | smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); |
| 198 | } |
| 199 | release_sock(sk); |
| 200 | smc_close_cleanup_listen(sk); |
| 201 | flush_work(&smc->tcp_listen_work); |
| 202 | lock_sock(sk); |
| 203 | break; |
| 204 | case SMC_ACTIVE: |
| 205 | smc_close_stream_wait(smc, timeout); |
| 206 | release_sock(sk); |
| 207 | cancel_work_sync(&conn->tx_work); |
| 208 | lock_sock(sk); |
| 209 | if (sk->sk_state == SMC_ACTIVE) { |
| 210 | /* send close request */ |
| 211 | rc = smc_close_final(conn); |
| 212 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 213 | } else { |
| 214 | /* peer event has changed the state */ |
| 215 | goto again; |
| 216 | } |
| 217 | break; |
| 218 | case SMC_APPFINCLOSEWAIT: |
| 219 | /* socket already shutdown wr or both (active close) */ |
| 220 | if (txflags->peer_done_writing && |
| 221 | !txflags->peer_conn_closed) { |
| 222 | /* just shutdown wr done, send close request */ |
| 223 | rc = smc_close_final(conn); |
| 224 | } |
| 225 | sk->sk_state = SMC_CLOSED; |
| 226 | smc_close_wait_tx_pends(smc); |
| 227 | break; |
| 228 | case SMC_APPCLOSEWAIT1: |
| 229 | case SMC_APPCLOSEWAIT2: |
| 230 | if (!smc_cdc_rxed_any_close(conn)) |
| 231 | smc_close_stream_wait(smc, timeout); |
| 232 | release_sock(sk); |
| 233 | cancel_work_sync(&conn->tx_work); |
| 234 | lock_sock(sk); |
| 235 | if (sk->sk_err != ECONNABORTED) { |
| 236 | /* confirm close from peer */ |
| 237 | rc = smc_close_final(conn); |
| 238 | if (rc) |
| 239 | break; |
| 240 | } |
| 241 | if (smc_cdc_rxed_any_close(conn)) |
| 242 | /* peer has closed the socket already */ |
| 243 | sk->sk_state = SMC_CLOSED; |
| 244 | else |
| 245 | /* peer has just issued a shutdown write */ |
| 246 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
| 247 | smc_close_wait_tx_pends(smc); |
| 248 | break; |
| 249 | case SMC_PEERCLOSEWAIT1: |
| 250 | case SMC_PEERCLOSEWAIT2: |
| 251 | case SMC_PEERFINCLOSEWAIT: |
| 252 | /* peer sending PeerConnectionClosed will cause transition */ |
| 253 | break; |
| 254 | case SMC_PROCESSABORT: |
| 255 | cancel_work_sync(&conn->tx_work); |
| 256 | smc_close_abort(conn); |
| 257 | sk->sk_state = SMC_CLOSED; |
| 258 | smc_close_wait_tx_pends(smc); |
| 259 | break; |
| 260 | case SMC_PEERABORTWAIT: |
| 261 | case SMC_CLOSED: |
| 262 | /* nothing to do, add tracing in future patch */ |
| 263 | break; |
| 264 | } |
| 265 | |
| 266 | if (old_state != sk->sk_state) |
| 267 | sk->sk_state_change(&smc->sk); |
| 268 | return rc; |
| 269 | } |
| 270 | |
| 271 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
| 272 | { |
| 273 | struct smc_cdc_conn_state_flags *txflags = |
| 274 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 275 | struct sock *sk = &smc->sk; |
| 276 | |
| 277 | switch (sk->sk_state) { |
| 278 | case SMC_ACTIVE: |
| 279 | case SMC_APPFINCLOSEWAIT: |
| 280 | case SMC_APPCLOSEWAIT1: |
| 281 | case SMC_APPCLOSEWAIT2: |
| 282 | smc_close_abort(&smc->conn); |
| 283 | sk->sk_state = SMC_PROCESSABORT; |
| 284 | break; |
| 285 | case SMC_PEERCLOSEWAIT1: |
| 286 | case SMC_PEERCLOSEWAIT2: |
| 287 | if (txflags->peer_done_writing && |
| 288 | !txflags->peer_conn_closed) { |
| 289 | /* just shutdown, but not yet closed locally */ |
| 290 | smc_close_abort(&smc->conn); |
| 291 | sk->sk_state = SMC_PROCESSABORT; |
| 292 | } else { |
| 293 | sk->sk_state = SMC_CLOSED; |
| 294 | } |
| 295 | break; |
| 296 | case SMC_PEERFINCLOSEWAIT: |
| 297 | case SMC_PEERABORTWAIT: |
| 298 | sk->sk_state = SMC_CLOSED; |
| 299 | break; |
| 300 | case SMC_INIT: |
| 301 | case SMC_PROCESSABORT: |
| 302 | /* nothing to do, add tracing in future patch */ |
| 303 | break; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, |
| 308 | * or peer_done_writing. |
| 309 | * Called under tasklet context. |
| 310 | */ |
| 311 | void smc_close_passive_received(struct smc_sock *smc) |
| 312 | { |
| 313 | struct smc_cdc_conn_state_flags *rxflags = |
| 314 | &smc->conn.local_rx_ctrl.conn_state_flags; |
| 315 | struct sock *sk = &smc->sk; |
| 316 | int old_state; |
| 317 | |
| 318 | sk->sk_shutdown |= RCV_SHUTDOWN; |
| 319 | if (smc->clcsock && smc->clcsock->sk) |
| 320 | smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; |
| 321 | sock_set_flag(&smc->sk, SOCK_DONE); |
| 322 | |
| 323 | old_state = sk->sk_state; |
| 324 | |
| 325 | if (rxflags->peer_conn_abort) { |
| 326 | smc_close_passive_abort_received(smc); |
| 327 | goto wakeup; |
| 328 | } |
| 329 | |
| 330 | switch (sk->sk_state) { |
| 331 | case SMC_INIT: |
| 332 | if (atomic_read(&smc->conn.bytes_to_rcv) || |
| 333 | (rxflags->peer_done_writing && |
| 334 | !rxflags->peer_conn_closed)) |
| 335 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 336 | else |
| 337 | sk->sk_state = SMC_CLOSED; |
| 338 | break; |
| 339 | case SMC_ACTIVE: |
| 340 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 341 | break; |
| 342 | case SMC_PEERCLOSEWAIT1: |
| 343 | if (rxflags->peer_done_writing) |
| 344 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
| 345 | /* fall through to check for closing */ |
| 346 | case SMC_PEERCLOSEWAIT2: |
| 347 | case SMC_PEERFINCLOSEWAIT: |
| 348 | if (!smc_cdc_rxed_any_close(&smc->conn)) |
| 349 | break; |
| 350 | if (sock_flag(sk, SOCK_DEAD) && |
| 351 | (sk->sk_shutdown == SHUTDOWN_MASK)) { |
| 352 | /* smc_release has already been called locally */ |
| 353 | sk->sk_state = SMC_CLOSED; |
| 354 | } else { |
| 355 | /* just shutdown, but not yet closed locally */ |
| 356 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
| 357 | } |
| 358 | break; |
| 359 | case SMC_APPCLOSEWAIT1: |
| 360 | case SMC_APPCLOSEWAIT2: |
| 361 | case SMC_APPFINCLOSEWAIT: |
| 362 | case SMC_PEERABORTWAIT: |
| 363 | case SMC_PROCESSABORT: |
| 364 | case SMC_CLOSED: |
| 365 | /* nothing to do, add tracing in future patch */ |
| 366 | break; |
| 367 | } |
| 368 | |
| 369 | wakeup: |
| 370 | if (old_state != sk->sk_state) |
| 371 | sk->sk_state_change(sk); |
| 372 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
| 373 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
| 374 | |
| 375 | if ((sk->sk_state == SMC_CLOSED) && |
| 376 | (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) { |
| 377 | smc_conn_free(&smc->conn); |
| 378 | schedule_delayed_work(&smc->sock_put_work, |
| 379 | SMC_CLOSE_SOCK_PUT_DELAY); |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | void smc_close_sock_put_work(struct work_struct *work) |
| 384 | { |
| 385 | struct smc_sock *smc = container_of(to_delayed_work(work), |
| 386 | struct smc_sock, |
| 387 | sock_put_work); |
| 388 | |
Ursula Braun | f16a7dd | 2017-01-09 16:55:26 +0100 | [diff] [blame] | 389 | smc->sk.sk_prot->unhash(&smc->sk); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 390 | sock_put(&smc->sk); |
| 391 | } |
| 392 | |
| 393 | int smc_close_shutdown_write(struct smc_sock *smc) |
| 394 | { |
| 395 | struct smc_connection *conn = &smc->conn; |
| 396 | long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 397 | struct sock *sk = &smc->sk; |
| 398 | int old_state; |
| 399 | int rc = 0; |
| 400 | |
| 401 | if (sock_flag(sk, SOCK_LINGER)) |
| 402 | timeout = sk->sk_lingertime; |
| 403 | |
| 404 | again: |
| 405 | old_state = sk->sk_state; |
| 406 | switch (old_state) { |
| 407 | case SMC_ACTIVE: |
| 408 | smc_close_stream_wait(smc, timeout); |
| 409 | release_sock(sk); |
| 410 | cancel_work_sync(&conn->tx_work); |
| 411 | lock_sock(sk); |
| 412 | /* send close wr request */ |
| 413 | rc = smc_close_wr(conn); |
| 414 | if (sk->sk_state == SMC_ACTIVE) |
| 415 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 416 | else |
| 417 | goto again; |
| 418 | break; |
| 419 | case SMC_APPCLOSEWAIT1: |
| 420 | /* passive close */ |
| 421 | if (!smc_cdc_rxed_any_close(conn)) |
| 422 | smc_close_stream_wait(smc, timeout); |
| 423 | release_sock(sk); |
| 424 | cancel_work_sync(&conn->tx_work); |
| 425 | lock_sock(sk); |
| 426 | /* confirm close from peer */ |
| 427 | rc = smc_close_wr(conn); |
| 428 | sk->sk_state = SMC_APPCLOSEWAIT2; |
| 429 | break; |
| 430 | case SMC_APPCLOSEWAIT2: |
| 431 | case SMC_PEERFINCLOSEWAIT: |
| 432 | case SMC_PEERCLOSEWAIT1: |
| 433 | case SMC_PEERCLOSEWAIT2: |
| 434 | case SMC_APPFINCLOSEWAIT: |
| 435 | case SMC_PROCESSABORT: |
| 436 | case SMC_PEERABORTWAIT: |
| 437 | /* nothing to do, add tracing in future patch */ |
| 438 | break; |
| 439 | } |
| 440 | |
| 441 | if (old_state != sk->sk_state) |
| 442 | sk->sk_state_change(&smc->sk); |
| 443 | return rc; |
| 444 | } |