Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 3 | * |
| 4 | * Socket Closing - normal and abnormal |
| 5 | * |
| 6 | * Copyright IBM Corp. 2016 |
| 7 | * |
| 8 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/workqueue.h> |
Ingo Molnar | c3edc40 | 2017-02-02 08:35:14 +0100 | [diff] [blame] | 12 | #include <linux/sched/signal.h> |
| 13 | |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 14 | #include <net/sock.h> |
| 15 | |
| 16 | #include "smc.h" |
| 17 | #include "smc_tx.h" |
| 18 | #include "smc_cdc.h" |
| 19 | #include "smc_close.h" |
| 20 | |
| 21 | #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) |
| 22 | |
| 23 | static void smc_close_cleanup_listen(struct sock *parent) |
| 24 | { |
| 25 | struct sock *sk; |
| 26 | |
| 27 | /* Close non-accepted connections */ |
| 28 | while ((sk = smc_accept_dequeue(parent, NULL))) |
| 29 | smc_close_non_accepted(sk); |
| 30 | } |
| 31 | |
| 32 | static void smc_close_wait_tx_pends(struct smc_sock *smc) |
| 33 | { |
| 34 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 35 | struct sock *sk = &smc->sk; |
| 36 | signed long timeout; |
| 37 | |
| 38 | timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; |
| 39 | add_wait_queue(sk_sleep(sk), &wait); |
| 40 | while (!signal_pending(current) && timeout) { |
| 41 | int rc; |
| 42 | |
| 43 | rc = sk_wait_event(sk, &timeout, |
| 44 | !smc_cdc_tx_has_pending(&smc->conn), |
| 45 | &wait); |
| 46 | if (rc) |
| 47 | break; |
| 48 | } |
| 49 | remove_wait_queue(sk_sleep(sk), &wait); |
| 50 | } |
| 51 | |
| 52 | /* wait for sndbuf data being transmitted */ |
| 53 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
| 54 | { |
| 55 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 56 | struct sock *sk = &smc->sk; |
| 57 | |
| 58 | if (!timeout) |
| 59 | return; |
| 60 | |
| 61 | if (!smc_tx_prepared_sends(&smc->conn)) |
| 62 | return; |
| 63 | |
| 64 | smc->wait_close_tx_prepared = 1; |
| 65 | add_wait_queue(sk_sleep(sk), &wait); |
| 66 | while (!signal_pending(current) && timeout) { |
| 67 | int rc; |
| 68 | |
| 69 | rc = sk_wait_event(sk, &timeout, |
| 70 | !smc_tx_prepared_sends(&smc->conn) || |
| 71 | (sk->sk_err == ECONNABORTED) || |
| 72 | (sk->sk_err == ECONNRESET), |
| 73 | &wait); |
| 74 | if (rc) |
| 75 | break; |
| 76 | } |
| 77 | remove_wait_queue(sk_sleep(sk), &wait); |
| 78 | smc->wait_close_tx_prepared = 0; |
| 79 | } |
| 80 | |
| 81 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
| 82 | { |
| 83 | if (smc->wait_close_tx_prepared) |
| 84 | /* wake up socket closing */ |
| 85 | smc->sk.sk_state_change(&smc->sk); |
| 86 | } |
| 87 | |
| 88 | static int smc_close_wr(struct smc_connection *conn) |
| 89 | { |
| 90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
| 91 | |
| 92 | return smc_cdc_get_slot_and_msg_send(conn); |
| 93 | } |
| 94 | |
| 95 | static int smc_close_final(struct smc_connection *conn) |
| 96 | { |
| 97 | if (atomic_read(&conn->bytes_to_rcv)) |
| 98 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 99 | else |
| 100 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
| 101 | |
| 102 | return smc_cdc_get_slot_and_msg_send(conn); |
| 103 | } |
| 104 | |
| 105 | static int smc_close_abort(struct smc_connection *conn) |
| 106 | { |
| 107 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 108 | |
| 109 | return smc_cdc_get_slot_and_msg_send(conn); |
| 110 | } |
| 111 | |
| 112 | /* terminate smc socket abnormally - active abort |
| 113 | * RDMA communication no longer possible |
| 114 | */ |
| 115 | void smc_close_active_abort(struct smc_sock *smc) |
| 116 | { |
| 117 | struct smc_cdc_conn_state_flags *txflags = |
| 118 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 119 | |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 120 | smc->sk.sk_err = ECONNABORTED; |
| 121 | if (smc->clcsock && smc->clcsock->sk) { |
| 122 | smc->clcsock->sk->sk_err = ECONNABORTED; |
| 123 | smc->clcsock->sk->sk_state_change(smc->clcsock->sk); |
| 124 | } |
| 125 | switch (smc->sk.sk_state) { |
| 126 | case SMC_INIT: |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 127 | case SMC_ACTIVE: |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 128 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 129 | break; |
| 130 | case SMC_APPCLOSEWAIT1: |
| 131 | case SMC_APPCLOSEWAIT2: |
| 132 | txflags->peer_conn_abort = 1; |
| 133 | sock_release(smc->clcsock); |
| 134 | if (!smc_cdc_rxed_any_close(&smc->conn)) |
| 135 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 136 | else |
| 137 | smc->sk.sk_state = SMC_CLOSED; |
| 138 | break; |
| 139 | case SMC_PEERCLOSEWAIT1: |
| 140 | case SMC_PEERCLOSEWAIT2: |
| 141 | if (!txflags->peer_conn_closed) { |
| 142 | smc->sk.sk_state = SMC_PEERABORTWAIT; |
| 143 | txflags->peer_conn_abort = 1; |
| 144 | sock_release(smc->clcsock); |
| 145 | } else { |
| 146 | smc->sk.sk_state = SMC_CLOSED; |
| 147 | } |
| 148 | break; |
| 149 | case SMC_PROCESSABORT: |
| 150 | case SMC_APPFINCLOSEWAIT: |
| 151 | if (!txflags->peer_conn_closed) { |
| 152 | txflags->peer_conn_abort = 1; |
| 153 | sock_release(smc->clcsock); |
| 154 | } |
| 155 | smc->sk.sk_state = SMC_CLOSED; |
| 156 | break; |
| 157 | case SMC_PEERFINCLOSEWAIT: |
| 158 | case SMC_PEERABORTWAIT: |
| 159 | case SMC_CLOSED: |
| 160 | break; |
| 161 | } |
| 162 | |
| 163 | sock_set_flag(&smc->sk, SOCK_DEAD); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 164 | smc->sk.sk_state_change(&smc->sk); |
| 165 | } |
| 166 | |
| 167 | int smc_close_active(struct smc_sock *smc) |
| 168 | { |
| 169 | struct smc_cdc_conn_state_flags *txflags = |
| 170 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 171 | long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 172 | struct smc_connection *conn = &smc->conn; |
| 173 | struct sock *sk = &smc->sk; |
| 174 | int old_state; |
| 175 | int rc = 0; |
| 176 | |
| 177 | if (sock_flag(sk, SOCK_LINGER) && |
| 178 | !(current->flags & PF_EXITING)) |
| 179 | timeout = sk->sk_lingertime; |
| 180 | |
| 181 | again: |
| 182 | old_state = sk->sk_state; |
| 183 | switch (old_state) { |
| 184 | case SMC_INIT: |
| 185 | sk->sk_state = SMC_CLOSED; |
| 186 | if (smc->smc_listen_work.func) |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 187 | cancel_work_sync(&smc->smc_listen_work); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 188 | sock_put(sk); |
| 189 | break; |
| 190 | case SMC_LISTEN: |
| 191 | sk->sk_state = SMC_CLOSED; |
| 192 | sk->sk_state_change(sk); /* wake up accept */ |
| 193 | if (smc->clcsock && smc->clcsock->sk) { |
| 194 | rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); |
| 195 | /* wake up kernel_accept of smc_tcp_listen_worker */ |
| 196 | smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); |
| 197 | } |
| 198 | release_sock(sk); |
| 199 | smc_close_cleanup_listen(sk); |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 200 | cancel_work_sync(&smc->smc_listen_work); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 201 | lock_sock(sk); |
| 202 | break; |
| 203 | case SMC_ACTIVE: |
| 204 | smc_close_stream_wait(smc, timeout); |
| 205 | release_sock(sk); |
| 206 | cancel_work_sync(&conn->tx_work); |
| 207 | lock_sock(sk); |
| 208 | if (sk->sk_state == SMC_ACTIVE) { |
| 209 | /* send close request */ |
| 210 | rc = smc_close_final(conn); |
| 211 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 212 | } else { |
| 213 | /* peer event has changed the state */ |
| 214 | goto again; |
| 215 | } |
| 216 | break; |
| 217 | case SMC_APPFINCLOSEWAIT: |
| 218 | /* socket already shutdown wr or both (active close) */ |
| 219 | if (txflags->peer_done_writing && |
| 220 | !txflags->peer_conn_closed) { |
| 221 | /* just shutdown wr done, send close request */ |
| 222 | rc = smc_close_final(conn); |
| 223 | } |
| 224 | sk->sk_state = SMC_CLOSED; |
| 225 | smc_close_wait_tx_pends(smc); |
| 226 | break; |
| 227 | case SMC_APPCLOSEWAIT1: |
| 228 | case SMC_APPCLOSEWAIT2: |
| 229 | if (!smc_cdc_rxed_any_close(conn)) |
| 230 | smc_close_stream_wait(smc, timeout); |
| 231 | release_sock(sk); |
| 232 | cancel_work_sync(&conn->tx_work); |
| 233 | lock_sock(sk); |
| 234 | if (sk->sk_err != ECONNABORTED) { |
| 235 | /* confirm close from peer */ |
| 236 | rc = smc_close_final(conn); |
| 237 | if (rc) |
| 238 | break; |
| 239 | } |
| 240 | if (smc_cdc_rxed_any_close(conn)) |
| 241 | /* peer has closed the socket already */ |
| 242 | sk->sk_state = SMC_CLOSED; |
| 243 | else |
| 244 | /* peer has just issued a shutdown write */ |
| 245 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
| 246 | smc_close_wait_tx_pends(smc); |
| 247 | break; |
| 248 | case SMC_PEERCLOSEWAIT1: |
| 249 | case SMC_PEERCLOSEWAIT2: |
| 250 | case SMC_PEERFINCLOSEWAIT: |
| 251 | /* peer sending PeerConnectionClosed will cause transition */ |
| 252 | break; |
| 253 | case SMC_PROCESSABORT: |
| 254 | cancel_work_sync(&conn->tx_work); |
| 255 | smc_close_abort(conn); |
| 256 | sk->sk_state = SMC_CLOSED; |
| 257 | smc_close_wait_tx_pends(smc); |
| 258 | break; |
| 259 | case SMC_PEERABORTWAIT: |
| 260 | case SMC_CLOSED: |
| 261 | /* nothing to do, add tracing in future patch */ |
| 262 | break; |
| 263 | } |
| 264 | |
| 265 | if (old_state != sk->sk_state) |
| 266 | sk->sk_state_change(&smc->sk); |
| 267 | return rc; |
| 268 | } |
| 269 | |
| 270 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
| 271 | { |
| 272 | struct smc_cdc_conn_state_flags *txflags = |
| 273 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 274 | struct sock *sk = &smc->sk; |
| 275 | |
| 276 | switch (sk->sk_state) { |
| 277 | case SMC_ACTIVE: |
| 278 | case SMC_APPFINCLOSEWAIT: |
| 279 | case SMC_APPCLOSEWAIT1: |
| 280 | case SMC_APPCLOSEWAIT2: |
| 281 | smc_close_abort(&smc->conn); |
| 282 | sk->sk_state = SMC_PROCESSABORT; |
| 283 | break; |
| 284 | case SMC_PEERCLOSEWAIT1: |
| 285 | case SMC_PEERCLOSEWAIT2: |
| 286 | if (txflags->peer_done_writing && |
| 287 | !txflags->peer_conn_closed) { |
| 288 | /* just shutdown, but not yet closed locally */ |
| 289 | smc_close_abort(&smc->conn); |
| 290 | sk->sk_state = SMC_PROCESSABORT; |
| 291 | } else { |
| 292 | sk->sk_state = SMC_CLOSED; |
| 293 | } |
| 294 | break; |
| 295 | case SMC_PEERFINCLOSEWAIT: |
| 296 | case SMC_PEERABORTWAIT: |
| 297 | sk->sk_state = SMC_CLOSED; |
| 298 | break; |
| 299 | case SMC_INIT: |
| 300 | case SMC_PROCESSABORT: |
| 301 | /* nothing to do, add tracing in future patch */ |
| 302 | break; |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, |
| 307 | * or peer_done_writing. |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 308 | */ |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 309 | static void smc_close_passive_work(struct work_struct *work) |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 310 | { |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 311 | struct smc_connection *conn = container_of(work, |
| 312 | struct smc_connection, |
| 313 | close_work); |
| 314 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 315 | struct smc_cdc_conn_state_flags *rxflags; |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 316 | struct sock *sk = &smc->sk; |
| 317 | int old_state; |
| 318 | |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 319 | lock_sock(&smc->sk); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 320 | old_state = sk->sk_state; |
| 321 | |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 322 | if (!conn->alert_token_local) { |
| 323 | /* abnormal termination */ |
| 324 | smc_close_active_abort(smc); |
| 325 | goto wakeup; |
| 326 | } |
| 327 | |
| 328 | rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 329 | if (rxflags->peer_conn_abort) { |
| 330 | smc_close_passive_abort_received(smc); |
| 331 | goto wakeup; |
| 332 | } |
| 333 | |
| 334 | switch (sk->sk_state) { |
| 335 | case SMC_INIT: |
| 336 | if (atomic_read(&smc->conn.bytes_to_rcv) || |
| 337 | (rxflags->peer_done_writing && |
| 338 | !rxflags->peer_conn_closed)) |
| 339 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 340 | else |
| 341 | sk->sk_state = SMC_CLOSED; |
| 342 | break; |
| 343 | case SMC_ACTIVE: |
| 344 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 345 | break; |
| 346 | case SMC_PEERCLOSEWAIT1: |
| 347 | if (rxflags->peer_done_writing) |
| 348 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
| 349 | /* fall through to check for closing */ |
| 350 | case SMC_PEERCLOSEWAIT2: |
| 351 | case SMC_PEERFINCLOSEWAIT: |
| 352 | if (!smc_cdc_rxed_any_close(&smc->conn)) |
| 353 | break; |
| 354 | if (sock_flag(sk, SOCK_DEAD) && |
| 355 | (sk->sk_shutdown == SHUTDOWN_MASK)) { |
| 356 | /* smc_release has already been called locally */ |
| 357 | sk->sk_state = SMC_CLOSED; |
| 358 | } else { |
| 359 | /* just shutdown, but not yet closed locally */ |
| 360 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
| 361 | } |
| 362 | break; |
| 363 | case SMC_APPCLOSEWAIT1: |
| 364 | case SMC_APPCLOSEWAIT2: |
| 365 | case SMC_APPFINCLOSEWAIT: |
| 366 | case SMC_PEERABORTWAIT: |
| 367 | case SMC_PROCESSABORT: |
| 368 | case SMC_CLOSED: |
| 369 | /* nothing to do, add tracing in future patch */ |
| 370 | break; |
| 371 | } |
| 372 | |
| 373 | wakeup: |
| 374 | if (old_state != sk->sk_state) |
| 375 | sk->sk_state_change(sk); |
| 376 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
| 377 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
| 378 | |
| 379 | if ((sk->sk_state == SMC_CLOSED) && |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 380 | (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 381 | smc_conn_free(&smc->conn); |
| 382 | schedule_delayed_work(&smc->sock_put_work, |
| 383 | SMC_CLOSE_SOCK_PUT_DELAY); |
| 384 | } |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 385 | release_sock(&smc->sk); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 386 | } |
| 387 | |
| 388 | void smc_close_sock_put_work(struct work_struct *work) |
| 389 | { |
| 390 | struct smc_sock *smc = container_of(to_delayed_work(work), |
| 391 | struct smc_sock, |
| 392 | sock_put_work); |
| 393 | |
Ursula Braun | f16a7dd | 2017-01-09 16:55:26 +0100 | [diff] [blame] | 394 | smc->sk.sk_prot->unhash(&smc->sk); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 395 | sock_put(&smc->sk); |
| 396 | } |
| 397 | |
| 398 | int smc_close_shutdown_write(struct smc_sock *smc) |
| 399 | { |
| 400 | struct smc_connection *conn = &smc->conn; |
| 401 | long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 402 | struct sock *sk = &smc->sk; |
| 403 | int old_state; |
| 404 | int rc = 0; |
| 405 | |
| 406 | if (sock_flag(sk, SOCK_LINGER)) |
| 407 | timeout = sk->sk_lingertime; |
| 408 | |
| 409 | again: |
| 410 | old_state = sk->sk_state; |
| 411 | switch (old_state) { |
| 412 | case SMC_ACTIVE: |
| 413 | smc_close_stream_wait(smc, timeout); |
| 414 | release_sock(sk); |
| 415 | cancel_work_sync(&conn->tx_work); |
| 416 | lock_sock(sk); |
| 417 | /* send close wr request */ |
| 418 | rc = smc_close_wr(conn); |
| 419 | if (sk->sk_state == SMC_ACTIVE) |
| 420 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 421 | else |
| 422 | goto again; |
| 423 | break; |
| 424 | case SMC_APPCLOSEWAIT1: |
| 425 | /* passive close */ |
| 426 | if (!smc_cdc_rxed_any_close(conn)) |
| 427 | smc_close_stream_wait(smc, timeout); |
| 428 | release_sock(sk); |
| 429 | cancel_work_sync(&conn->tx_work); |
| 430 | lock_sock(sk); |
| 431 | /* confirm close from peer */ |
| 432 | rc = smc_close_wr(conn); |
| 433 | sk->sk_state = SMC_APPCLOSEWAIT2; |
| 434 | break; |
| 435 | case SMC_APPCLOSEWAIT2: |
| 436 | case SMC_PEERFINCLOSEWAIT: |
| 437 | case SMC_PEERCLOSEWAIT1: |
| 438 | case SMC_PEERCLOSEWAIT2: |
| 439 | case SMC_APPFINCLOSEWAIT: |
| 440 | case SMC_PROCESSABORT: |
| 441 | case SMC_PEERABORTWAIT: |
| 442 | /* nothing to do, add tracing in future patch */ |
| 443 | break; |
| 444 | } |
| 445 | |
| 446 | if (old_state != sk->sk_state) |
| 447 | sk->sk_state_change(&smc->sk); |
| 448 | return rc; |
| 449 | } |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame^] | 450 | |
| 451 | /* Initialize close properties on connection establishment. */ |
| 452 | void smc_close_init(struct smc_sock *smc) |
| 453 | { |
| 454 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); |
| 455 | } |