Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 3 | * |
| 4 | * Basic Transport Functions exploiting Infiniband API |
| 5 | * |
| 6 | * Copyright IBM Corp. 2016 |
| 7 | * |
| 8 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/socket.h> |
| 12 | #include <linux/if_vlan.h> |
| 13 | #include <linux/random.h> |
| 14 | #include <linux/workqueue.h> |
| 15 | #include <net/tcp.h> |
| 16 | #include <net/sock.h> |
| 17 | #include <rdma/ib_verbs.h> |
| 18 | |
| 19 | #include "smc.h" |
| 20 | #include "smc_clc.h" |
| 21 | #include "smc_core.h" |
| 22 | #include "smc_ib.h" |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 23 | #include "smc_wr.h" |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 24 | |
| 25 | #define SMC_LGR_FREE_DELAY (600 * HZ) |
| 26 | |
| 27 | /* Register connection's alert token in our lookup structure. |
| 28 | * To use rbtrees we have to implement our own insert core. |
| 29 | * Requires @conns_lock |
| 30 | * @smc connection to register |
| 31 | * Returns 0 on success, != otherwise. |
| 32 | */ |
| 33 | static void smc_lgr_add_alert_token(struct smc_connection *conn) |
| 34 | { |
| 35 | struct rb_node **link, *parent = NULL; |
| 36 | u32 token = conn->alert_token_local; |
| 37 | |
| 38 | link = &conn->lgr->conns_all.rb_node; |
| 39 | while (*link) { |
| 40 | struct smc_connection *cur = rb_entry(*link, |
| 41 | struct smc_connection, alert_node); |
| 42 | |
| 43 | parent = *link; |
| 44 | if (cur->alert_token_local > token) |
| 45 | link = &parent->rb_left; |
| 46 | else |
| 47 | link = &parent->rb_right; |
| 48 | } |
| 49 | /* Put the new node there */ |
| 50 | rb_link_node(&conn->alert_node, parent, link); |
| 51 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); |
| 52 | } |
| 53 | |
| 54 | /* Register connection in link group by assigning an alert token |
| 55 | * registered in a search tree. |
| 56 | * Requires @conns_lock |
| 57 | * Note that '0' is a reserved value and not assigned. |
| 58 | */ |
| 59 | static void smc_lgr_register_conn(struct smc_connection *conn) |
| 60 | { |
| 61 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 62 | static atomic_t nexttoken = ATOMIC_INIT(0); |
| 63 | |
| 64 | /* find a new alert_token_local value not yet used by some connection |
| 65 | * in this link group |
| 66 | */ |
| 67 | sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ |
| 68 | while (!conn->alert_token_local) { |
| 69 | conn->alert_token_local = atomic_inc_return(&nexttoken); |
| 70 | if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) |
| 71 | conn->alert_token_local = 0; |
| 72 | } |
| 73 | smc_lgr_add_alert_token(conn); |
| 74 | conn->lgr->conns_num++; |
| 75 | } |
| 76 | |
| 77 | /* Unregister connection and reset the alert token of the given connection< |
| 78 | */ |
| 79 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) |
| 80 | { |
| 81 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 82 | struct smc_link_group *lgr = conn->lgr; |
| 83 | |
| 84 | rb_erase(&conn->alert_node, &lgr->conns_all); |
| 85 | lgr->conns_num--; |
| 86 | conn->alert_token_local = 0; |
| 87 | conn->lgr = NULL; |
| 88 | sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ |
| 89 | } |
| 90 | |
| 91 | /* Unregister connection and trigger lgr freeing if applicable |
| 92 | */ |
| 93 | static void smc_lgr_unregister_conn(struct smc_connection *conn) |
| 94 | { |
| 95 | struct smc_link_group *lgr = conn->lgr; |
| 96 | int reduced = 0; |
| 97 | |
| 98 | write_lock_bh(&lgr->conns_lock); |
| 99 | if (conn->alert_token_local) { |
| 100 | reduced = 1; |
| 101 | __smc_lgr_unregister_conn(conn); |
| 102 | } |
| 103 | write_unlock_bh(&lgr->conns_lock); |
| 104 | if (reduced && !lgr->conns_num) |
| 105 | schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); |
| 106 | } |
| 107 | |
| 108 | static void smc_lgr_free_work(struct work_struct *work) |
| 109 | { |
| 110 | struct smc_link_group *lgr = container_of(to_delayed_work(work), |
| 111 | struct smc_link_group, |
| 112 | free_work); |
| 113 | bool conns; |
| 114 | |
| 115 | spin_lock_bh(&smc_lgr_list.lock); |
| 116 | read_lock_bh(&lgr->conns_lock); |
| 117 | conns = RB_EMPTY_ROOT(&lgr->conns_all); |
| 118 | read_unlock_bh(&lgr->conns_lock); |
| 119 | if (!conns) { /* number of lgr connections is no longer zero */ |
| 120 | spin_unlock_bh(&smc_lgr_list.lock); |
| 121 | return; |
| 122 | } |
| 123 | list_del_init(&lgr->list); /* remove from smc_lgr_list */ |
| 124 | spin_unlock_bh(&smc_lgr_list.lock); |
| 125 | smc_lgr_free(lgr); |
| 126 | } |
| 127 | |
| 128 | /* create a new SMC link group */ |
| 129 | static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, |
| 130 | struct smc_ib_device *smcibdev, u8 ibport, |
| 131 | char *peer_systemid, unsigned short vlan_id) |
| 132 | { |
| 133 | struct smc_link_group *lgr; |
| 134 | struct smc_link *lnk; |
| 135 | u8 rndvec[3]; |
| 136 | int rc = 0; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 137 | int i; |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 138 | |
| 139 | lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); |
| 140 | if (!lgr) { |
| 141 | rc = -ENOMEM; |
| 142 | goto out; |
| 143 | } |
| 144 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
| 145 | lgr->sync_err = false; |
| 146 | lgr->daddr = peer_in_addr; |
| 147 | memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); |
| 148 | lgr->vlan_id = vlan_id; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 149 | rwlock_init(&lgr->sndbufs_lock); |
| 150 | rwlock_init(&lgr->rmbs_lock); |
| 151 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
| 152 | INIT_LIST_HEAD(&lgr->sndbufs[i]); |
| 153 | INIT_LIST_HEAD(&lgr->rmbs[i]); |
| 154 | } |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 155 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
| 156 | lgr->conns_all = RB_ROOT; |
| 157 | |
| 158 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
| 159 | /* initialize link */ |
| 160 | lnk->smcibdev = smcibdev; |
| 161 | lnk->ibport = ibport; |
| 162 | lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 163 | if (!smcibdev->initialized) |
| 164 | smc_ib_setup_per_ibdev(smcibdev); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 165 | get_random_bytes(rndvec, sizeof(rndvec)); |
| 166 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 167 | rc = smc_wr_alloc_link_mem(lnk); |
| 168 | if (rc) |
| 169 | goto free_lgr; |
| 170 | init_waitqueue_head(&lnk->wr_tx_wait); |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 171 | rc = smc_ib_create_protection_domain(lnk); |
| 172 | if (rc) |
| 173 | goto free_link_mem; |
| 174 | rc = smc_ib_create_queue_pair(lnk); |
| 175 | if (rc) |
| 176 | goto dealloc_pd; |
| 177 | rc = smc_wr_create_link(lnk); |
| 178 | if (rc) |
| 179 | goto destroy_qp; |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 180 | |
| 181 | smc->conn.lgr = lgr; |
| 182 | rwlock_init(&lgr->conns_lock); |
| 183 | spin_lock_bh(&smc_lgr_list.lock); |
| 184 | list_add(&lgr->list, &smc_lgr_list.list); |
| 185 | spin_unlock_bh(&smc_lgr_list.lock); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 186 | return 0; |
| 187 | |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 188 | destroy_qp: |
| 189 | smc_ib_destroy_queue_pair(lnk); |
| 190 | dealloc_pd: |
| 191 | smc_ib_dealloc_protection_domain(lnk); |
| 192 | free_link_mem: |
| 193 | smc_wr_free_link_mem(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 194 | free_lgr: |
| 195 | kfree(lgr); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 196 | out: |
| 197 | return rc; |
| 198 | } |
| 199 | |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 200 | static void smc_sndbuf_unuse(struct smc_connection *conn) |
| 201 | { |
| 202 | if (conn->sndbuf_desc) { |
| 203 | conn->sndbuf_desc->used = 0; |
| 204 | conn->sndbuf_size = 0; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | static void smc_rmb_unuse(struct smc_connection *conn) |
| 209 | { |
| 210 | if (conn->rmb_desc) { |
| 211 | conn->rmb_desc->used = 0; |
| 212 | conn->rmbe_size = 0; |
| 213 | } |
| 214 | } |
| 215 | |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 216 | /* remove a finished connection from its link group */ |
| 217 | void smc_conn_free(struct smc_connection *conn) |
| 218 | { |
| 219 | struct smc_link_group *lgr = conn->lgr; |
| 220 | |
| 221 | if (!lgr) |
| 222 | return; |
| 223 | smc_lgr_unregister_conn(conn); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 224 | smc_rmb_unuse(conn); |
| 225 | smc_sndbuf_unuse(conn); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 226 | } |
| 227 | |
| 228 | static void smc_link_clear(struct smc_link *lnk) |
| 229 | { |
| 230 | lnk->peer_qpn = 0; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 231 | smc_ib_modify_qp_reset(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 232 | smc_wr_free_link(lnk); |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 233 | smc_ib_destroy_queue_pair(lnk); |
| 234 | smc_ib_dealloc_protection_domain(lnk); |
Ursula Braun | f38ba179 | 2017-01-09 16:55:19 +0100 | [diff] [blame] | 235 | smc_wr_free_link_mem(lnk); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 236 | } |
| 237 | |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 238 | static void smc_lgr_free_sndbufs(struct smc_link_group *lgr) |
| 239 | { |
| 240 | struct smc_buf_desc *sndbuf_desc, *bf_desc; |
| 241 | int i; |
| 242 | |
| 243 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
| 244 | list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i], |
| 245 | list) { |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 246 | list_del(&sndbuf_desc->list); |
| 247 | smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 248 | smc_uncompress_bufsize(i), |
| 249 | sndbuf_desc, DMA_TO_DEVICE); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 250 | kfree(sndbuf_desc->cpu_addr); |
| 251 | kfree(sndbuf_desc); |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | static void smc_lgr_free_rmbs(struct smc_link_group *lgr) |
| 257 | { |
| 258 | struct smc_buf_desc *rmb_desc, *bf_desc; |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 259 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 260 | int i; |
| 261 | |
| 262 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
| 263 | list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i], |
| 264 | list) { |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 265 | list_del(&rmb_desc->list); |
| 266 | smc_ib_buf_unmap(lnk->smcibdev, |
| 267 | smc_uncompress_bufsize(i), |
| 268 | rmb_desc, DMA_FROM_DEVICE); |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 269 | kfree(rmb_desc->cpu_addr); |
| 270 | kfree(rmb_desc); |
| 271 | } |
| 272 | } |
| 273 | } |
| 274 | |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 275 | /* remove a link group */ |
| 276 | void smc_lgr_free(struct smc_link_group *lgr) |
| 277 | { |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 278 | smc_lgr_free_rmbs(lgr); |
| 279 | smc_lgr_free_sndbufs(lgr); |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 280 | smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); |
| 281 | kfree(lgr); |
| 282 | } |
| 283 | |
| 284 | /* terminate linkgroup abnormally */ |
| 285 | void smc_lgr_terminate(struct smc_link_group *lgr) |
| 286 | { |
| 287 | struct smc_connection *conn; |
| 288 | struct rb_node *node; |
| 289 | |
| 290 | spin_lock_bh(&smc_lgr_list.lock); |
| 291 | if (list_empty(&lgr->list)) { |
| 292 | /* termination already triggered */ |
| 293 | spin_unlock_bh(&smc_lgr_list.lock); |
| 294 | return; |
| 295 | } |
| 296 | /* do not use this link group for new connections */ |
| 297 | list_del_init(&lgr->list); |
| 298 | spin_unlock_bh(&smc_lgr_list.lock); |
| 299 | |
| 300 | write_lock_bh(&lgr->conns_lock); |
| 301 | node = rb_first(&lgr->conns_all); |
| 302 | while (node) { |
| 303 | conn = rb_entry(node, struct smc_connection, alert_node); |
| 304 | __smc_lgr_unregister_conn(conn); |
| 305 | node = rb_first(&lgr->conns_all); |
| 306 | } |
| 307 | write_unlock_bh(&lgr->conns_lock); |
| 308 | schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); |
| 309 | } |
| 310 | |
| 311 | /* Determine vlan of internal TCP socket. |
| 312 | * @vlan_id: address to store the determined vlan id into |
| 313 | */ |
| 314 | static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) |
| 315 | { |
| 316 | struct dst_entry *dst = sk_dst_get(clcsock->sk); |
| 317 | int rc = 0; |
| 318 | |
| 319 | *vlan_id = 0; |
| 320 | if (!dst) { |
| 321 | rc = -ENOTCONN; |
| 322 | goto out; |
| 323 | } |
| 324 | if (!dst->dev) { |
| 325 | rc = -ENODEV; |
| 326 | goto out_rel; |
| 327 | } |
| 328 | |
| 329 | if (is_vlan_dev(dst->dev)) |
| 330 | *vlan_id = vlan_dev_vlan_id(dst->dev); |
| 331 | |
| 332 | out_rel: |
| 333 | dst_release(dst); |
| 334 | out: |
| 335 | return rc; |
| 336 | } |
| 337 | |
| 338 | /* determine the link gid matching the vlan id of the link group */ |
| 339 | static int smc_link_determine_gid(struct smc_link_group *lgr) |
| 340 | { |
| 341 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
| 342 | struct ib_gid_attr gattr; |
| 343 | union ib_gid gid; |
| 344 | int i; |
| 345 | |
| 346 | if (!lgr->vlan_id) { |
| 347 | lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; |
| 348 | return 0; |
| 349 | } |
| 350 | |
| 351 | for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; |
| 352 | i++) { |
| 353 | if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, |
| 354 | &gattr)) |
| 355 | continue; |
| 356 | if (gattr.ndev && |
| 357 | (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { |
| 358 | lnk->gid = gid; |
| 359 | return 0; |
| 360 | } |
| 361 | } |
| 362 | return -ENODEV; |
| 363 | } |
| 364 | |
| 365 | /* create a new SMC connection (and a new link group if necessary) */ |
| 366 | int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, |
| 367 | struct smc_ib_device *smcibdev, u8 ibport, |
| 368 | struct smc_clc_msg_local *lcl, int srv_first_contact) |
| 369 | { |
| 370 | struct smc_connection *conn = &smc->conn; |
| 371 | struct smc_link_group *lgr; |
| 372 | unsigned short vlan_id; |
| 373 | enum smc_lgr_role role; |
| 374 | int local_contact = SMC_FIRST_CONTACT; |
| 375 | int rc = 0; |
| 376 | |
| 377 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
| 378 | rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); |
| 379 | if (rc) |
| 380 | return rc; |
| 381 | |
| 382 | if ((role == SMC_CLNT) && srv_first_contact) |
| 383 | /* create new link group as well */ |
| 384 | goto create; |
| 385 | |
| 386 | /* determine if an existing link group can be reused */ |
| 387 | spin_lock_bh(&smc_lgr_list.lock); |
| 388 | list_for_each_entry(lgr, &smc_lgr_list.list, list) { |
| 389 | write_lock_bh(&lgr->conns_lock); |
| 390 | if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, |
| 391 | SMC_SYSTEMID_LEN) && |
| 392 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, |
| 393 | SMC_GID_SIZE) && |
| 394 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, |
| 395 | sizeof(lcl->mac)) && |
| 396 | !lgr->sync_err && |
| 397 | (lgr->role == role) && |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 398 | (lgr->vlan_id == vlan_id) && |
| 399 | ((role == SMC_CLNT) || |
| 400 | (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { |
Ursula Braun | 0cfdd8f | 2017-01-09 16:55:17 +0100 | [diff] [blame] | 401 | /* link group found */ |
| 402 | local_contact = SMC_REUSE_CONTACT; |
| 403 | conn->lgr = lgr; |
| 404 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ |
| 405 | write_unlock_bh(&lgr->conns_lock); |
| 406 | break; |
| 407 | } |
| 408 | write_unlock_bh(&lgr->conns_lock); |
| 409 | } |
| 410 | spin_unlock_bh(&smc_lgr_list.lock); |
| 411 | |
| 412 | if (role == SMC_CLNT && !srv_first_contact && |
| 413 | (local_contact == SMC_FIRST_CONTACT)) { |
| 414 | /* Server reuses a link group, but Client wants to start |
| 415 | * a new one |
| 416 | * send out_of_sync decline, reason synchr. error |
| 417 | */ |
| 418 | return -ENOLINK; |
| 419 | } |
| 420 | |
| 421 | create: |
| 422 | if (local_contact == SMC_FIRST_CONTACT) { |
| 423 | rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport, |
| 424 | lcl->id_for_peer, vlan_id); |
| 425 | if (rc) |
| 426 | goto out; |
| 427 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ |
| 428 | rc = smc_link_determine_gid(conn->lgr); |
| 429 | } |
| 430 | |
| 431 | out: |
| 432 | return rc ? rc : local_contact; |
| 433 | } |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 434 | |
| 435 | /* try to reuse a sndbuf description slot of the sndbufs list for a certain |
| 436 | * buf_size; if not available, return NULL |
| 437 | */ |
| 438 | static inline |
| 439 | struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr, |
| 440 | int compressed_bufsize) |
| 441 | { |
| 442 | struct smc_buf_desc *sndbuf_slot; |
| 443 | |
| 444 | read_lock_bh(&lgr->sndbufs_lock); |
| 445 | list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize], |
| 446 | list) { |
| 447 | if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) { |
| 448 | read_unlock_bh(&lgr->sndbufs_lock); |
| 449 | return sndbuf_slot; |
| 450 | } |
| 451 | } |
| 452 | read_unlock_bh(&lgr->sndbufs_lock); |
| 453 | return NULL; |
| 454 | } |
| 455 | |
| 456 | /* try to reuse an rmb description slot of the rmbs list for a certain |
| 457 | * rmbe_size; if not available, return NULL |
| 458 | */ |
| 459 | static inline |
| 460 | struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr, |
| 461 | int compressed_bufsize) |
| 462 | { |
| 463 | struct smc_buf_desc *rmb_slot; |
| 464 | |
| 465 | read_lock_bh(&lgr->rmbs_lock); |
| 466 | list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize], |
| 467 | list) { |
| 468 | if (cmpxchg(&rmb_slot->used, 0, 1) == 0) { |
| 469 | read_unlock_bh(&lgr->rmbs_lock); |
| 470 | return rmb_slot; |
| 471 | } |
| 472 | } |
| 473 | read_unlock_bh(&lgr->rmbs_lock); |
| 474 | return NULL; |
| 475 | } |
| 476 | |
| 477 | /* create the tx buffer for an SMC socket */ |
| 478 | int smc_sndbuf_create(struct smc_sock *smc) |
| 479 | { |
| 480 | struct smc_connection *conn = &smc->conn; |
| 481 | struct smc_link_group *lgr = conn->lgr; |
| 482 | int tmp_bufsize, tmp_bufsize_short; |
| 483 | struct smc_buf_desc *sndbuf_desc; |
| 484 | int rc; |
| 485 | |
| 486 | /* use socket send buffer size (w/o overhead) as start value */ |
| 487 | for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); |
| 488 | tmp_bufsize_short >= 0; tmp_bufsize_short--) { |
| 489 | tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); |
| 490 | /* check for reusable sndbuf_slot in the link group */ |
| 491 | sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short); |
| 492 | if (sndbuf_desc) { |
| 493 | memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize); |
| 494 | break; /* found reusable slot */ |
| 495 | } |
| 496 | /* try to alloc a new send buffer */ |
| 497 | sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL); |
| 498 | if (!sndbuf_desc) |
| 499 | break; /* give up with -ENOMEM */ |
| 500 | sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize, |
| 501 | GFP_KERNEL | __GFP_NOWARN | |
| 502 | __GFP_NOMEMALLOC | |
| 503 | __GFP_NORETRY); |
| 504 | if (!sndbuf_desc->cpu_addr) { |
| 505 | kfree(sndbuf_desc); |
| 506 | /* if send buffer allocation has failed, |
| 507 | * try a smaller one |
| 508 | */ |
| 509 | continue; |
| 510 | } |
| 511 | rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 512 | tmp_bufsize, sndbuf_desc, |
| 513 | DMA_TO_DEVICE); |
| 514 | if (rc) { |
| 515 | kfree(sndbuf_desc->cpu_addr); |
| 516 | kfree(sndbuf_desc); |
| 517 | continue; /* if mapping failed, try smaller one */ |
| 518 | } |
| 519 | sndbuf_desc->used = 1; |
| 520 | write_lock_bh(&lgr->sndbufs_lock); |
| 521 | list_add(&sndbuf_desc->list, |
| 522 | &lgr->sndbufs[tmp_bufsize_short]); |
| 523 | write_unlock_bh(&lgr->sndbufs_lock); |
| 524 | break; |
| 525 | } |
| 526 | if (sndbuf_desc && sndbuf_desc->cpu_addr) { |
| 527 | conn->sndbuf_desc = sndbuf_desc; |
| 528 | conn->sndbuf_size = tmp_bufsize; |
| 529 | smc->sk.sk_sndbuf = tmp_bufsize * 2; |
| 530 | return 0; |
| 531 | } else { |
| 532 | return -ENOMEM; |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | /* create the RMB for an SMC socket (even though the SMC protocol |
| 537 | * allows more than one RMB-element per RMB, the Linux implementation |
| 538 | * uses just one RMB-element per RMB, i.e. uses an extra RMB for every |
| 539 | * connection in a link group |
| 540 | */ |
| 541 | int smc_rmb_create(struct smc_sock *smc) |
| 542 | { |
| 543 | struct smc_connection *conn = &smc->conn; |
| 544 | struct smc_link_group *lgr = conn->lgr; |
| 545 | int tmp_bufsize, tmp_bufsize_short; |
| 546 | struct smc_buf_desc *rmb_desc; |
| 547 | int rc; |
| 548 | |
| 549 | /* use socket recv buffer size (w/o overhead) as start value */ |
| 550 | for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2); |
| 551 | tmp_bufsize_short >= 0; tmp_bufsize_short--) { |
| 552 | tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); |
| 553 | /* check for reusable rmb_slot in the link group */ |
| 554 | rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short); |
| 555 | if (rmb_desc) { |
| 556 | memset(rmb_desc->cpu_addr, 0, tmp_bufsize); |
| 557 | break; /* found reusable slot */ |
| 558 | } |
| 559 | /* try to alloc a new RMB */ |
| 560 | rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL); |
| 561 | if (!rmb_desc) |
| 562 | break; /* give up with -ENOMEM */ |
| 563 | rmb_desc->cpu_addr = kzalloc(tmp_bufsize, |
| 564 | GFP_KERNEL | __GFP_NOWARN | |
| 565 | __GFP_NOMEMALLOC | |
| 566 | __GFP_NORETRY); |
| 567 | if (!rmb_desc->cpu_addr) { |
| 568 | kfree(rmb_desc); |
| 569 | /* if RMB allocation has failed, |
| 570 | * try a smaller one |
| 571 | */ |
| 572 | continue; |
| 573 | } |
| 574 | rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 575 | tmp_bufsize, rmb_desc, |
| 576 | DMA_FROM_DEVICE); |
| 577 | if (rc) { |
| 578 | kfree(rmb_desc->cpu_addr); |
| 579 | kfree(rmb_desc); |
| 580 | continue; /* if mapping failed, try smaller one */ |
| 581 | } |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 582 | rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd, |
| 583 | IB_ACCESS_REMOTE_WRITE | |
| 584 | IB_ACCESS_LOCAL_WRITE, |
| 585 | &rmb_desc->mr_rx[SMC_SINGLE_LINK]); |
| 586 | if (rc) { |
| 587 | smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
| 588 | tmp_bufsize, rmb_desc, |
| 589 | DMA_FROM_DEVICE); |
| 590 | kfree(rmb_desc->cpu_addr); |
| 591 | kfree(rmb_desc); |
| 592 | continue; |
| 593 | } |
Ursula Braun | cd6851f | 2017-01-09 16:55:18 +0100 | [diff] [blame] | 594 | rmb_desc->used = 1; |
| 595 | write_lock_bh(&lgr->rmbs_lock); |
| 596 | list_add(&rmb_desc->list, |
| 597 | &lgr->rmbs[tmp_bufsize_short]); |
| 598 | write_unlock_bh(&lgr->rmbs_lock); |
| 599 | break; |
| 600 | } |
| 601 | if (rmb_desc && rmb_desc->cpu_addr) { |
| 602 | conn->rmb_desc = rmb_desc; |
| 603 | conn->rmbe_size = tmp_bufsize; |
| 604 | conn->rmbe_size_short = tmp_bufsize_short; |
| 605 | smc->sk.sk_rcvbuf = tmp_bufsize * 2; |
| 606 | return 0; |
| 607 | } else { |
| 608 | return -ENOMEM; |
| 609 | } |
| 610 | } |
Ursula Braun | bd4ad57 | 2017-01-09 16:55:20 +0100 | [diff] [blame^] | 611 | |
| 612 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) |
| 613 | { |
| 614 | int i; |
| 615 | |
| 616 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { |
| 617 | if (!test_and_set_bit(i, lgr->rtokens_used_mask)) |
| 618 | return i; |
| 619 | } |
| 620 | return -ENOSPC; |
| 621 | } |
| 622 | |
| 623 | /* save rkey and dma_addr received from peer during clc handshake */ |
| 624 | int smc_rmb_rtoken_handling(struct smc_connection *conn, |
| 625 | struct smc_clc_msg_accept_confirm *clc) |
| 626 | { |
| 627 | u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr); |
| 628 | struct smc_link_group *lgr = conn->lgr; |
| 629 | u32 rkey = ntohl(clc->rmb_rkey); |
| 630 | int i; |
| 631 | |
| 632 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
| 633 | if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && |
| 634 | test_bit(i, lgr->rtokens_used_mask)) { |
| 635 | conn->rtoken_idx = i; |
| 636 | return 0; |
| 637 | } |
| 638 | } |
| 639 | conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr); |
| 640 | if (conn->rtoken_idx < 0) |
| 641 | return conn->rtoken_idx; |
| 642 | lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey; |
| 643 | lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr; |
| 644 | return 0; |
| 645 | } |