Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 3 | * |
| 4 | * Basic Transport Functions exploiting Infiniband API |
| 5 | * |
| 6 | * Copyright IBM Corp. 2016 |
| 7 | * |
| 8 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/socket.h> |
| 12 | #include <linux/if_vlan.h> |
| 13 | #include <linux/random.h> |
| 14 | #include <linux/workqueue.h> |
| 15 | #include <net/tcp.h> |
| 16 | #include <net/sock.h> |
| 17 | #include <rdma/ib_verbs.h> |
| 18 | |
| 19 | #include "smc.h" |
| 20 | #include "smc_clc.h" |
| 21 | #include "smc_core.h" |
| 22 | #include "smc_ib.h" |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 23 | #include "smc_wr.h" |
Ursula Braun | 9bf9abe | 2017-01-09 16:55:21 +0100 | [diff] [blame] | 24 | #include "smc_llc.h" |
Ursula Braun | 5f08318 | 2017-01-09 16:55:22 +0100 | [diff] [blame] | 25 | #include "smc_cdc.h" |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 26 | #include "smc_close.h" |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 27 | |
Ursula Braun | 9bf9abe | 2017-01-09 16:55:21 +0100 | [diff] [blame] | 28 | #define SMC_LGR_NUM_INCR 256 |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 29 | #define SMC_LGR_FREE_DELAY (600 * HZ) |
| 30 | |
Ursula Braun | 9bf9abe | 2017-01-09 16:55:21 +0100 | [diff] [blame] | 31 | static u32 smc_lgr_num; /* unique link group number */ |
| 32 | |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 33 | /* Register connection's alert token in our lookup structure. |
| 34 | * To use rbtrees we have to implement our own insert core. |
| 35 | * Requires @conns_lock |
| 36 | * @smc connection to register |
| 37 | * Returns 0 on success, != otherwise. |
| 38 | */ |
| 39 | static void smc_lgr_add_alert_token(struct smc_connection *conn) |
| 40 | { |
| 41 | struct rb_node **link, *parent = NULL; |
| 42 | u32 token = conn->alert_token_local; |
| 43 | |
| 44 | link = &conn->lgr->conns_all.rb_node; |
| 45 | while (*link) { |
| 46 | struct smc_connection *cur = rb_entry(*link, |
| 47 | struct smc_connection, alert_node); |
| 48 | |
| 49 | parent = *link; |
| 50 | if (cur->alert_token_local > token) |
| 51 | link = &parent->rb_left; |
| 52 | else |
| 53 | link = &parent->rb_right; |
| 54 | } |
| 55 | /* Put the new node there */ |
| 56 | rb_link_node(&conn->alert_node, parent, link); |
| 57 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); |
| 58 | } |
| 59 | |
| 60 | /* Register connection in link group by assigning an alert token |
| 61 | * registered in a search tree. |
| 62 | * Requires @conns_lock |
| 63 | * Note that '0' is a reserved value and not assigned. |
| 64 | */ |
| 65 | static void smc_lgr_register_conn(struct smc_connection *conn) |
| 66 | { |
| 67 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 68 | static atomic_t nexttoken = ATOMIC_INIT(0); |
| 69 | |
| 70 | /* find a new alert_token_local value not yet used by some connection |
| 71 | * in this link group |
| 72 | */ |
| 73 | sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ |
| 74 | while (!conn->alert_token_local) { |
| 75 | conn->alert_token_local = atomic_inc_return(&nexttoken); |
| 76 | if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) |
| 77 | conn->alert_token_local = 0; |
| 78 | } |
| 79 | smc_lgr_add_alert_token(conn); |
| 80 | conn->lgr->conns_num++; |
| 81 | } |
| 82 | |
| 83 | /* Unregister connection and reset the alert token of the given connection< |
| 84 | */ |
| 85 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) |
| 86 | { |
| 87 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 88 | struct smc_link_group *lgr = conn->lgr; |
| 89 | |
| 90 | rb_erase(&conn->alert_node, &lgr->conns_all); |
| 91 | lgr->conns_num--; |
| 92 | conn->alert_token_local = 0; |
| 93 | conn->lgr = NULL; |
| 94 | sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ |
| 95 | } |
| 96 | |
| 97 | /* Unregister connection and trigger lgr freeing if applicable |
| 98 | */ |
| 99 | static void smc_lgr_unregister_conn(struct smc_connection *conn) |
| 100 | { |
| 101 | struct smc_link_group *lgr = conn->lgr; |
| 102 | int reduced = 0; |
| 103 | |
| 104 | write_lock_bh(&lgr->conns_lock); |
| 105 | if (conn->alert_token_local) { |
| 106 | reduced = 1; |
| 107 | __smc_lgr_unregister_conn(conn); |
| 108 | } |
| 109 | write_unlock_bh(&lgr->conns_lock); |
| 110 | if (reduced && !lgr->conns_num) |
| 111 | schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); |
| 112 | } |
| 113 | |
| 114 | static void smc_lgr_free_work(struct work_struct *work) |
| 115 | { |
| 116 | struct smc_link_group *lgr = container_of(to_delayed_work(work), |
| 117 | struct smc_link_group, |
| 118 | free_work); |
| 119 | bool conns; |
| 120 | |
| 121 | spin_lock_bh(&smc_lgr_list.lock); |
| 122 | read_lock_bh(&lgr->conns_lock); |
| 123 | conns = RB_EMPTY_ROOT(&lgr->conns_all); |
| 124 | read_unlock_bh(&lgr->conns_lock); |
| 125 | if (!conns) { /* number of lgr connections is no longer zero */ |
| 126 | spin_unlock_bh(&smc_lgr_list.lock); |
| 127 | return; |
| 128 | } |
| 129 | list_del_init(&lgr->list); /* remove from smc_lgr_list */ |
| 130 | spin_unlock_bh(&smc_lgr_list.lock); |
| 131 | smc_lgr_free(lgr); |
| 132 | } |
| 133 | |
| 134 | /* create a new SMC link group */ |
| 135 | static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, |
| 136 | struct smc_ib_device *smcibdev, u8 ibport, |
| 137 | char *peer_systemid, unsigned short vlan_id) |
| 138 | { |
| 139 | struct smc_link_group *lgr; |
| 140 | struct smc_link *lnk; |
| 141 | u8 rndvec[3]; |
| 142 | int rc = 0; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 143 | int i; |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 144 | |
| 145 | lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); |
| 146 | if (!lgr) { |
| 147 | rc = -ENOMEM; |
| 148 | goto out; |
| 149 | } |
| 150 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
| 151 | lgr->sync_err = false; |
| 152 | lgr->daddr = peer_in_addr; |
| 153 | memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); |
| 154 | lgr->vlan_id = vlan_id; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 155 | rwlock_init(&lgr->sndbufs_lock); |
| 156 | rwlock_init(&lgr->rmbs_lock); |
| 157 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
| 158 | INIT_LIST_HEAD(&lgr->sndbufs[i]); |
| 159 | INIT_LIST_HEAD(&lgr->rmbs[i]); |
| 160 | } |
Ursula Braun | 9bf9abe | 2017-01-09 16:55:21 +0100 | [diff] [blame] | 161 | smc_lgr_num += SMC_LGR_NUM_INCR; |
| 162 | memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 163 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
| 164 | lgr->conns_all = RB_ROOT; |
| 165 | |
| 166 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
| 167 | /* initialize link */ |
| 168 | lnk->smcibdev = smcibdev; |
| 169 | lnk->ibport = ibport; |
| 170 | lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 171 | if (!smcibdev->initialized) |
| 172 | smc_ib_setup_per_ibdev(smcibdev); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 173 | get_random_bytes(rndvec, sizeof(rndvec)); |
| 174 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 175 | rc = smc_wr_alloc_link_mem(lnk); |
| 176 | if (rc) |
| 177 | goto free_lgr; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 178 | rc = smc_ib_create_protection_domain(lnk); |
| 179 | if (rc) |
| 180 | goto free_link_mem; |
| 181 | rc = smc_ib_create_queue_pair(lnk); |
| 182 | if (rc) |
| 183 | goto dealloc_pd; |
| 184 | rc = smc_wr_create_link(lnk); |
| 185 | if (rc) |
| 186 | goto destroy_qp; |
Ursula Braun | 9bf9abe | 2017-01-09 16:55:21 +0100 | [diff] [blame] | 187 | init_completion(&lnk->llc_confirm); |
| 188 | init_completion(&lnk->llc_confirm_resp); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 189 | |
| 190 | smc->conn.lgr = lgr; |
| 191 | rwlock_init(&lgr->conns_lock); |
| 192 | spin_lock_bh(&smc_lgr_list.lock); |
| 193 | list_add(&lgr->list, &smc_lgr_list.list); |
| 194 | spin_unlock_bh(&smc_lgr_list.lock); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 195 | return 0; |
| 196 | |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 197 | destroy_qp: |
| 198 | smc_ib_destroy_queue_pair(lnk); |
| 199 | dealloc_pd: |
| 200 | smc_ib_dealloc_protection_domain(lnk); |
| 201 | free_link_mem: |
| 202 | smc_wr_free_link_mem(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 203 | free_lgr: |
| 204 | kfree(lgr); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 205 | out: |
| 206 | return rc; |
| 207 | } |
| 208 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 209 | static void smc_buf_unuse(struct smc_connection *conn) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 210 | { |
| 211 | if (conn->sndbuf_desc) { |
| 212 | conn->sndbuf_desc->used = 0; |
| 213 | conn->sndbuf_size = 0; |
| 214 | } |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 215 | if (conn->rmb_desc) { |
Ursula Braun | 897e1c2 | 2017-07-28 13:56:16 +0200 | [diff] [blame] | 216 | conn->rmb_desc->reused = true; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 217 | conn->rmb_desc->used = 0; |
| 218 | conn->rmbe_size = 0; |
| 219 | } |
| 220 | } |
| 221 | |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 222 | /* remove a finished connection from its link group */ |
| 223 | void smc_conn_free(struct smc_connection *conn) |
| 224 | { |
| 225 | struct smc_link_group *lgr = conn->lgr; |
| 226 | |
| 227 | if (!lgr) |
| 228 | return; |
Ursula Braun | 5f08318 | 2017-01-09 16:55:22 +0100 | [diff] [blame] | 229 | smc_cdc_tx_dismiss_slots(conn); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 230 | smc_lgr_unregister_conn(conn); |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 231 | smc_buf_unuse(conn); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 232 | } |
| 233 | |
| 234 | static void smc_link_clear(struct smc_link *lnk) |
| 235 | { |
| 236 | lnk->peer_qpn = 0; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 237 | smc_ib_modify_qp_reset(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 238 | smc_wr_free_link(lnk); |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 239 | smc_ib_destroy_queue_pair(lnk); |
| 240 | smc_ib_dealloc_protection_domain(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 241 | smc_wr_free_link_mem(lnk); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 242 | } |
| 243 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 244 | static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, |
| 245 | bool is_rmb) |
| 246 | { |
| 247 | if (is_rmb) { |
| 248 | if (buf_desc->mr_rx[SMC_SINGLE_LINK]) |
| 249 | smc_ib_put_memory_region( |
| 250 | buf_desc->mr_rx[SMC_SINGLE_LINK]); |
| 251 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, |
| 252 | DMA_FROM_DEVICE); |
| 253 | } else { |
| 254 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, |
| 255 | DMA_TO_DEVICE); |
| 256 | } |
| 257 | sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); |
| 258 | if (buf_desc->cpu_addr) |
| 259 | free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order); |
| 260 | kfree(buf_desc); |
| 261 | } |
| 262 | |
| 263 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 264 | { |
Ursula Braun | 9d8fb61 | 2017-07-28 13:56:19 +0200 | [diff] [blame] | 265 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 266 | struct smc_buf_desc *buf_desc, *bf_desc; |
| 267 | struct list_head *buf_list; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 268 | int i; |
| 269 | |
| 270 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 271 | if (is_rmb) |
| 272 | buf_list = &lgr->rmbs[i]; |
| 273 | else |
| 274 | buf_list = &lgr->sndbufs[i]; |
| 275 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 276 | list) { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 277 | list_del(&buf_desc->list); |
| 278 | smc_buf_free(buf_desc, lnk, is_rmb); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 283 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 284 | { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 285 | /* free send buffers */ |
| 286 | __smc_lgr_free_bufs(lgr, false); |
| 287 | /* free rmbs */ |
| 288 | __smc_lgr_free_bufs(lgr, true); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 289 | } |
| 290 | |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 291 | /* remove a link group */ |
| 292 | void smc_lgr_free(struct smc_link_group *lgr) |
| 293 | { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 294 | smc_lgr_free_bufs(lgr); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 295 | smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); |
| 296 | kfree(lgr); |
| 297 | } |
| 298 | |
| 299 | /* terminate linkgroup abnormally */ |
| 300 | void smc_lgr_terminate(struct smc_link_group *lgr) |
| 301 | { |
| 302 | struct smc_connection *conn; |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 303 | struct smc_sock *smc; |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 304 | struct rb_node *node; |
| 305 | |
| 306 | spin_lock_bh(&smc_lgr_list.lock); |
| 307 | if (list_empty(&lgr->list)) { |
| 308 | /* termination already triggered */ |
| 309 | spin_unlock_bh(&smc_lgr_list.lock); |
| 310 | return; |
| 311 | } |
| 312 | /* do not use this link group for new connections */ |
| 313 | list_del_init(&lgr->list); |
| 314 | spin_unlock_bh(&smc_lgr_list.lock); |
| 315 | |
| 316 | write_lock_bh(&lgr->conns_lock); |
| 317 | node = rb_first(&lgr->conns_all); |
| 318 | while (node) { |
| 319 | conn = rb_entry(node, struct smc_connection, alert_node); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 320 | smc = container_of(conn, struct smc_sock, conn); |
| 321 | sock_hold(&smc->sk); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 322 | __smc_lgr_unregister_conn(conn); |
Ursula Braun | 46c28db | 2017-04-10 14:58:01 +0200 | [diff] [blame] | 323 | schedule_work(&conn->close_work); |
Ursula Braun | b38d732 | 2017-01-09 16:55:25 +0100 | [diff] [blame] | 324 | sock_put(&smc->sk); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 325 | node = rb_first(&lgr->conns_all); |
| 326 | } |
| 327 | write_unlock_bh(&lgr->conns_lock); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 328 | } |
| 329 | |
| 330 | /* Determine vlan of internal TCP socket. |
| 331 | * @vlan_id: address to store the determined vlan id into |
| 332 | */ |
| 333 | static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) |
| 334 | { |
| 335 | struct dst_entry *dst = sk_dst_get(clcsock->sk); |
| 336 | int rc = 0; |
| 337 | |
| 338 | *vlan_id = 0; |
| 339 | if (!dst) { |
| 340 | rc = -ENOTCONN; |
| 341 | goto out; |
| 342 | } |
| 343 | if (!dst->dev) { |
| 344 | rc = -ENODEV; |
| 345 | goto out_rel; |
| 346 | } |
| 347 | |
| 348 | if (is_vlan_dev(dst->dev)) |
| 349 | *vlan_id = vlan_dev_vlan_id(dst->dev); |
| 350 | |
| 351 | out_rel: |
| 352 | dst_release(dst); |
| 353 | out: |
| 354 | return rc; |
| 355 | } |
| 356 | |
| 357 | /* determine the link gid matching the vlan id of the link group */ |
| 358 | static int smc_link_determine_gid(struct smc_link_group *lgr) |
| 359 | { |
| 360 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
| 361 | struct ib_gid_attr gattr; |
| 362 | union ib_gid gid; |
| 363 | int i; |
| 364 | |
| 365 | if (!lgr->vlan_id) { |
| 366 | lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; |
| 367 | return 0; |
| 368 | } |
| 369 | |
| 370 | for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; |
| 371 | i++) { |
| 372 | if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, |
| 373 | &gattr)) |
| 374 | continue; |
| 375 | if (gattr.ndev && |
| 376 | (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { |
| 377 | lnk->gid = gid; |
| 378 | return 0; |
| 379 | } |
| 380 | } |
| 381 | return -ENODEV; |
| 382 | } |
| 383 | |
| 384 | /* create a new SMC connection (and a new link group if necessary) */ |
| 385 | int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, |
| 386 | struct smc_ib_device *smcibdev, u8 ibport, |
| 387 | struct smc_clc_msg_local *lcl, int srv_first_contact) |
| 388 | { |
| 389 | struct smc_connection *conn = &smc->conn; |
| 390 | struct smc_link_group *lgr; |
| 391 | unsigned short vlan_id; |
| 392 | enum smc_lgr_role role; |
| 393 | int local_contact = SMC_FIRST_CONTACT; |
| 394 | int rc = 0; |
| 395 | |
| 396 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
| 397 | rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); |
| 398 | if (rc) |
| 399 | return rc; |
| 400 | |
| 401 | if ((role == SMC_CLNT) && srv_first_contact) |
| 402 | /* create new link group as well */ |
| 403 | goto create; |
| 404 | |
| 405 | /* determine if an existing link group can be reused */ |
| 406 | spin_lock_bh(&smc_lgr_list.lock); |
| 407 | list_for_each_entry(lgr, &smc_lgr_list.list, list) { |
| 408 | write_lock_bh(&lgr->conns_lock); |
| 409 | if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, |
| 410 | SMC_SYSTEMID_LEN) && |
| 411 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, |
| 412 | SMC_GID_SIZE) && |
| 413 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, |
| 414 | sizeof(lcl->mac)) && |
| 415 | !lgr->sync_err && |
| 416 | (lgr->role == role) && |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 417 | (lgr->vlan_id == vlan_id) && |
| 418 | ((role == SMC_CLNT) || |
| 419 | (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 420 | /* link group found */ |
| 421 | local_contact = SMC_REUSE_CONTACT; |
| 422 | conn->lgr = lgr; |
| 423 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ |
| 424 | write_unlock_bh(&lgr->conns_lock); |
| 425 | break; |
| 426 | } |
| 427 | write_unlock_bh(&lgr->conns_lock); |
| 428 | } |
| 429 | spin_unlock_bh(&smc_lgr_list.lock); |
| 430 | |
| 431 | if (role == SMC_CLNT && !srv_first_contact && |
| 432 | (local_contact == SMC_FIRST_CONTACT)) { |
| 433 | /* Server reuses a link group, but Client wants to start |
| 434 | * a new one |
| 435 | * send out_of_sync decline, reason synchr. error |
| 436 | */ |
| 437 | return -ENOLINK; |
| 438 | } |
| 439 | |
| 440 | create: |
| 441 | if (local_contact == SMC_FIRST_CONTACT) { |
| 442 | rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport, |
| 443 | lcl->id_for_peer, vlan_id); |
| 444 | if (rc) |
| 445 | goto out; |
| 446 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ |
| 447 | rc = smc_link_determine_gid(conn->lgr); |
| 448 | } |
Ursula Braun | 5f08318 | 2017-01-09 16:55:22 +0100 | [diff] [blame] | 449 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; |
| 450 | conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg); |
| 451 | #ifndef KERNEL_HAS_ATOMIC64 |
| 452 | spin_lock_init(&conn->acurs_lock); |
| 453 | #endif |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 454 | |
| 455 | out: |
| 456 | return rc ? rc : local_contact; |
| 457 | } |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 458 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 459 | /* try to reuse a sndbuf or rmb description slot for a certain |
| 460 | * buffer size; if not available, return NULL |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 461 | */ |
| 462 | static inline |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 463 | struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, |
| 464 | int compressed_bufsize, |
| 465 | rwlock_t *lock, |
| 466 | struct list_head *buf_list) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 467 | { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 468 | struct smc_buf_desc *buf_slot; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 469 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 470 | read_lock_bh(lock); |
| 471 | list_for_each_entry(buf_slot, buf_list, list) { |
| 472 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { |
| 473 | read_unlock_bh(lock); |
| 474 | return buf_slot; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 475 | } |
| 476 | } |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 477 | read_unlock_bh(lock); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 478 | return NULL; |
| 479 | } |
| 480 | |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 481 | /* one of the conditions for announcing a receiver's current window size is |
| 482 | * that it "results in a minimum increase in the window size of 10% of the |
| 483 | * receive buffer space" [RFC7609] |
| 484 | */ |
| 485 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) |
| 486 | { |
| 487 | return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); |
| 488 | } |
| 489 | |
Ursula Braun | b33982c | 2017-07-28 13:56:21 +0200 | [diff] [blame] | 490 | static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, |
| 491 | bool is_rmb, int bufsize) |
| 492 | { |
| 493 | struct smc_buf_desc *buf_desc; |
| 494 | struct smc_link *lnk; |
| 495 | int rc; |
| 496 | |
| 497 | /* try to alloc a new buffer */ |
| 498 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); |
| 499 | if (!buf_desc) |
| 500 | return ERR_PTR(-ENOMEM); |
| 501 | |
| 502 | buf_desc->cpu_addr = |
| 503 | (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | |
| 504 | __GFP_NOMEMALLOC | |
| 505 | __GFP_NORETRY | __GFP_ZERO, |
| 506 | get_order(bufsize)); |
| 507 | if (!buf_desc->cpu_addr) { |
| 508 | kfree(buf_desc); |
| 509 | return ERR_PTR(-EAGAIN); |
| 510 | } |
| 511 | buf_desc->order = get_order(bufsize); |
| 512 | |
| 513 | /* build the sg table from the pages */ |
| 514 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
| 515 | rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, |
| 516 | GFP_KERNEL); |
| 517 | if (rc) { |
| 518 | smc_buf_free(buf_desc, lnk, is_rmb); |
| 519 | return ERR_PTR(rc); |
| 520 | } |
| 521 | sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, |
| 522 | buf_desc->cpu_addr, bufsize); |
| 523 | |
| 524 | /* map sg table to DMA address */ |
| 525 | rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, |
| 526 | is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
| 527 | /* SMC protocol depends on mapping to one DMA address only */ |
| 528 | if (rc != 1) { |
| 529 | smc_buf_free(buf_desc, lnk, is_rmb); |
| 530 | return ERR_PTR(-EAGAIN); |
| 531 | } |
| 532 | |
| 533 | /* create a new memory region for the RMB */ |
| 534 | if (is_rmb) { |
| 535 | rc = smc_ib_get_memory_region(lnk->roce_pd, |
| 536 | IB_ACCESS_REMOTE_WRITE | |
| 537 | IB_ACCESS_LOCAL_WRITE, |
| 538 | buf_desc); |
| 539 | if (rc) { |
| 540 | smc_buf_free(buf_desc, lnk, is_rmb); |
| 541 | return ERR_PTR(rc); |
| 542 | } |
| 543 | } |
| 544 | |
| 545 | return buf_desc; |
| 546 | } |
| 547 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 548 | static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 549 | { |
| 550 | struct smc_connection *conn = &smc->conn; |
| 551 | struct smc_link_group *lgr = conn->lgr; |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 552 | struct smc_buf_desc *buf_desc = NULL; |
| 553 | struct list_head *buf_list; |
Ursula Braun | c45abf3 | 2017-07-28 13:56:14 +0200 | [diff] [blame] | 554 | int bufsize, bufsize_short; |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 555 | int sk_buf_size; |
| 556 | rwlock_t *lock; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 557 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 558 | if (is_rmb) |
| 559 | /* use socket recv buffer size (w/o overhead) as start value */ |
| 560 | sk_buf_size = smc->sk.sk_rcvbuf / 2; |
| 561 | else |
| 562 | /* use socket send buffer size (w/o overhead) as start value */ |
| 563 | sk_buf_size = smc->sk.sk_sndbuf / 2; |
| 564 | |
Ursula Braun | c45abf3 | 2017-07-28 13:56:14 +0200 | [diff] [blame] | 565 | for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); |
| 566 | bufsize_short >= 0; bufsize_short--) { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 567 | |
| 568 | if (is_rmb) { |
| 569 | lock = &lgr->rmbs_lock; |
| 570 | buf_list = &lgr->rmbs[bufsize_short]; |
| 571 | } else { |
| 572 | lock = &lgr->sndbufs_lock; |
| 573 | buf_list = &lgr->sndbufs[bufsize_short]; |
| 574 | } |
Ursula Braun | c45abf3 | 2017-07-28 13:56:14 +0200 | [diff] [blame] | 575 | bufsize = smc_uncompress_bufsize(bufsize_short); |
Ursula Braun | 9d8fb61 | 2017-07-28 13:56:19 +0200 | [diff] [blame] | 576 | if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) |
| 577 | continue; |
| 578 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 579 | /* check for reusable slot in the link group */ |
| 580 | buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); |
| 581 | if (buf_desc) { |
| 582 | memset(buf_desc->cpu_addr, 0, bufsize); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 583 | break; /* found reusable slot */ |
| 584 | } |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 585 | |
Ursula Braun | b33982c | 2017-07-28 13:56:21 +0200 | [diff] [blame] | 586 | buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); |
| 587 | if (PTR_ERR(buf_desc) == -ENOMEM) |
| 588 | break; |
| 589 | if (IS_ERR(buf_desc)) |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 590 | continue; |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 591 | |
| 592 | buf_desc->used = 1; |
| 593 | write_lock_bh(lock); |
| 594 | list_add(&buf_desc->list, buf_list); |
| 595 | write_unlock_bh(lock); |
| 596 | break; /* found */ |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 597 | } |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 598 | |
Ursula Braun | b33982c | 2017-07-28 13:56:21 +0200 | [diff] [blame] | 599 | if (IS_ERR(buf_desc)) |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 600 | return -ENOMEM; |
| 601 | |
| 602 | if (is_rmb) { |
| 603 | conn->rmb_desc = buf_desc; |
Ursula Braun | c45abf3 | 2017-07-28 13:56:14 +0200 | [diff] [blame] | 604 | conn->rmbe_size = bufsize; |
| 605 | conn->rmbe_size_short = bufsize_short; |
| 606 | smc->sk.sk_rcvbuf = bufsize * 2; |
Ursula Braun | 5f08318 | 2017-01-09 16:55:22 +0100 | [diff] [blame] | 607 | atomic_set(&conn->bytes_to_rcv, 0); |
Ursula Braun | c45abf3 | 2017-07-28 13:56:14 +0200 | [diff] [blame] | 608 | conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 609 | } else { |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 610 | conn->sndbuf_desc = buf_desc; |
| 611 | conn->sndbuf_size = bufsize; |
| 612 | smc->sk.sk_sndbuf = bufsize * 2; |
| 613 | atomic_set(&conn->sndbuf_space, bufsize); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 614 | } |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 615 | return 0; |
| 616 | } |
| 617 | |
Ursula Braun | 10428dd | 2017-07-28 13:56:22 +0200 | [diff] [blame^] | 618 | void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) |
| 619 | { |
| 620 | struct smc_link_group *lgr = conn->lgr; |
| 621 | |
| 622 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 623 | conn->sndbuf_desc, DMA_TO_DEVICE); |
| 624 | } |
| 625 | |
| 626 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) |
| 627 | { |
| 628 | struct smc_link_group *lgr = conn->lgr; |
| 629 | |
| 630 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 631 | conn->sndbuf_desc, DMA_TO_DEVICE); |
| 632 | } |
| 633 | |
| 634 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) |
| 635 | { |
| 636 | struct smc_link_group *lgr = conn->lgr; |
| 637 | |
| 638 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 639 | conn->rmb_desc, DMA_FROM_DEVICE); |
| 640 | } |
| 641 | |
| 642 | void smc_rmb_sync_sg_for_device(struct smc_connection *conn) |
| 643 | { |
| 644 | struct smc_link_group *lgr = conn->lgr; |
| 645 | |
| 646 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 647 | conn->rmb_desc, DMA_FROM_DEVICE); |
| 648 | } |
| 649 | |
Ursula Braun | 3e03472 | 2017-07-28 13:56:20 +0200 | [diff] [blame] | 650 | /* create the send and receive buffer for an SMC socket; |
| 651 | * receive buffers are called RMBs; |
| 652 | * (even though the SMC protocol allows more than one RMB-element per RMB, |
| 653 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an |
| 654 | * extra RMB for every connection in a link group |
| 655 | */ |
| 656 | int smc_buf_create(struct smc_sock *smc) |
| 657 | { |
| 658 | int rc; |
| 659 | |
| 660 | /* create send buffer */ |
| 661 | rc = __smc_buf_create(smc, false); |
| 662 | if (rc) |
| 663 | return rc; |
| 664 | /* create rmb */ |
| 665 | rc = __smc_buf_create(smc, true); |
| 666 | if (rc) |
| 667 | smc_buf_free(smc->conn.sndbuf_desc, |
| 668 | &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); |
| 669 | return rc; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 670 | } |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 671 | |
| 672 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) |
| 673 | { |
| 674 | int i; |
| 675 | |
| 676 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { |
| 677 | if (!test_and_set_bit(i, lgr->rtokens_used_mask)) |
| 678 | return i; |
| 679 | } |
| 680 | return -ENOSPC; |
| 681 | } |
| 682 | |
| 683 | /* save rkey and dma_addr received from peer during clc handshake */ |
| 684 | int smc_rmb_rtoken_handling(struct smc_connection *conn, |
| 685 | struct smc_clc_msg_accept_confirm *clc) |
| 686 | { |
| 687 | u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr); |
| 688 | struct smc_link_group *lgr = conn->lgr; |
| 689 | u32 rkey = ntohl(clc->rmb_rkey); |
| 690 | int i; |
| 691 | |
| 692 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
| 693 | if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && |
Ursula Braun | 263eec9 | 2017-05-15 17:33:37 +0200 | [diff] [blame] | 694 | (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame] | 695 | test_bit(i, lgr->rtokens_used_mask)) { |
| 696 | conn->rtoken_idx = i; |
| 697 | return 0; |
| 698 | } |
| 699 | } |
| 700 | conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr); |
| 701 | if (conn->rtoken_idx < 0) |
| 702 | return conn->rtoken_idx; |
| 703 | lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey; |
| 704 | lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr; |
| 705 | return 0; |
| 706 | } |