Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (45 commits)
RDMA/cxgb3: Fix error paths in post_send and post_recv
RDMA/nes: Fix stale ARP issue
RDMA/nes: FIN during MPA startup causes timeout
RDMA/nes: Free kmap() resources
RDMA/nes: Check for zero STag
RDMA/nes: Fix Xansation test crash on cm_node ref_count
RDMA/nes: Abnormal listener exit causes loopback node crash
RDMA/nes: Fix crash in nes_accept()
RDMA/nes: Resource not freed for REJECTed connections
RDMA/nes: MPA request/response error checking
RDMA/nes: Fix query of ORD values
RDMA/nes: Fix MAX_CM_BUFFER define
RDMA/nes: Pass correct size to ioremap_nocache()
RDMA/nes: Update copyright and branding string
RDMA/nes: Add max_cqe check to nes_create_cq()
RDMA/nes: Clean up struct nes_qp
RDMA/nes: Implement IB_SIGNAL_ALL_WR as an iWARP extension
RDMA/nes: Add additional SFP+ PHY uC status check and PHY reset
RDMA/nes: Correct fast memory registration implementation
IB/ehca: Fix error paths in post_send and post_recv
...
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 6d40f00..64eeb55 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -36,11 +36,11 @@
fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
In connected mode, the IB RC (Reliable Connected) transport is used.
- Connected mode is to takes advantage of the connected nature of the
- IB transport and allows an MTU up to the maximal IP packet size of
- 64K, which reduces the number of IP packets needed for handling
- large UDP datagrams, TCP segments, etc and increases the performance
- for large messages.
+ Connected mode takes advantage of the connected nature of the IB
+ transport and allows an MTU up to the maximal IP packet size of 64K,
+ which reduces the number of IP packets needed for handling large UDP
+ datagrams, TCP segments, etc and increases the performance for large
+ messages.
In connected mode, the interface's UD QP is still used for multicast
and communication with peers that don't support connected mode. In
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bd07803..abbb069 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -36,7 +36,6 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
-#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -92,22 +91,12 @@
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
- switch (dev->type) {
- case ARPHRD_INFINIBAND:
- dev_addr->dev_type = RDMA_NODE_IB_CA;
- break;
- case ARPHRD_ETHER:
- dev_addr->dev_type = RDMA_NODE_RNIC;
- break;
- default:
- return -EADDRNOTAVAIL;
- }
-
+ dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
- dev_addr->src_dev = dev;
+ dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +106,15 @@
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
+
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@@ -131,6 +129,7 @@
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
+ read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +138,7 @@
break;
}
}
+ read_unlock(&dev_base_lock);
break;
#endif
}
@@ -176,48 +176,9 @@
mutex_unlock(&lock);
}
-static void addr_send_arp(struct sockaddr *dst_in)
-{
- struct rtable *rt;
- struct flowi fl;
-
- memset(&fl, 0, sizeof fl);
-
- switch (dst_in->sa_family) {
- case AF_INET:
- fl.nl_u.ip4_u.daddr =
- ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- if (ip_route_output_key(&init_net, &rt, &fl))
- return;
-
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ip_rt_put(rt);
- break;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct dst_entry *dst;
-
- fl.nl_u.ip6_u.daddr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
-
- dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return;
-
- neigh_event_send(dst->neighbour, NULL);
- dst_release(dst);
- break;
- }
-#endif
- }
-}
-
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +190,22 @@
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
+ fl.oif = addr->bound_dev_if;
+
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = rt->rt_src;
+
+ if (rt->idev->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +213,14 @@
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh) {
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
+ if (neigh)
+ goto release;
goto put;
}
- if (!(neigh->nud_state & NUD_VALID)) {
- ret = -ENODATA;
- goto release;
- }
-
- if (!src_ip) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = rt->rt_src;
- }
-
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
@@ -265,52 +231,77 @@
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
- int ret = -ENODATA;
+ int ret;
memset(&fl, 0, sizeof fl);
- fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
- fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+ ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+ ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+ fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return ret;
+ if ((ret = dst->error))
+ goto put;
- if (dst->dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, dst->dev, NULL);
- } else {
- neigh = dst->neighbour;
- if (neigh && (neigh->nud_state & NUD_VALID))
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+ if (ipv6_addr_any(&fl.fl6_src)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl.fl6_dst, 0, &fl.fl6_src);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
}
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+
+ /* If the device does ARP internally, return 'done' */
+ if (dst->dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, dst->dev, NULL);
+ goto put;
+ }
+
+ neigh = dst->neighbour;
+ if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+ neigh_event_send(dst->neighbour, NULL);
+ ret = -ENODATA;
+ goto put;
+ }
+
+ ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
dst_release(dst);
return ret;
}
#else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
-static int addr_resolve_remote(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
- return addr4_resolve_remote((struct sockaddr_in *) src_in,
+ return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
- return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+ return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@@ -327,8 +318,7 @@
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -352,82 +342,6 @@
}
}
-static int addr_resolve_local(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct net_device *dev;
- int ret;
-
- switch (dst_in->sa_family) {
- case AF_INET:
- {
- __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
- __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- dev = ip_dev_find(&init_net, dst_ip);
- if (!dev)
- return -EADDRNOTAVAIL;
-
- if (ipv4_is_zeronet(src_ip)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv4_is_loopback(src_ip)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- dev_put(dev);
- break;
- }
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct in6_addr *a;
-
- for_each_netdev(&init_net, dev)
- if (ipv6_chk_addr(&init_net,
- &((struct sockaddr_in6 *) dst_in)->sin6_addr,
- dev, 1))
- break;
-
- if (!dev)
- return -EADDRNOTAVAIL;
-
- a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
- if (ipv6_addr_any(a)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in6 *) src_in)->sin6_addr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv6_addr_loopback(a)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- break;
- }
-#endif
-
- default:
- ret = -EADDRNOTAVAIL;
- break;
- }
-
- return ret;
-}
-
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +357,28 @@
if (!req)
return -ENOMEM;
- if (src_addr)
- memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
- memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+ req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -467,15 +387,16 @@
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
- kfree(req);
- break;
+ goto err;
}
return ret;
+err:
+ kfree(req);
+ return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0753178..fbdd731 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,17 +330,7 @@
union ib_gid gid;
int ret = -ENODEV;
- switch (rdma_node_get_transport(dev_addr->dev_type)) {
- case RDMA_TRANSPORT_IB:
- ib_addr_get_sgid(dev_addr, &gid);
- break;
- case RDMA_TRANSPORT_IWARP:
- iw_addr_get_sgid(dev_addr, &gid);
- break;
- default:
- return -ENODEV;
- }
-
+ rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
- ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto destroy_id;
+ if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+ ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ } else {
+ ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+ &rt->addr.dev_addr);
+ if (ret)
+ goto destroy_id;
+ }
+ rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
- if (ret)
- goto err;
+ if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+ ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+ &id->route.addr.dev_addr);
+ if (ret)
+ goto err;
+ }
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@@ -1474,15 +1472,6 @@
mutex_unlock(&lock);
}
-static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
-{
- struct sockaddr_storage addr_in;
-
- memset(&addr_in, 0, sizeof addr_in);
- addr_in.ss_family = af;
- return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-}
-
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
@@ -1490,7 +1479,8 @@
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
- ret = cma_bind_any(id, AF_INET);
+ ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+ ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
@@ -1565,8 +1555,8 @@
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
- ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
- ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+ rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
@@ -1781,7 +1771,11 @@
if (ret)
goto out;
- ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ id_priv->id.route.addr.dev_addr.dev_type =
+ (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+ ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+ rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1833,7 @@
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
- struct sockaddr_in *src_in, *dst_in;
+ struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@@ -1853,14 +1847,19 @@
goto err;
}
- ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
- ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
- src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
- dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+ src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ if (cma_zero_addr(src)) {
+ dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+ if ((src->sa_family = dst->sa_family) == AF_INET) {
+ ((struct sockaddr_in *) src)->sin_addr.s_addr =
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ } else {
+ ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ }
}
work->id = id_priv;
@@ -1878,10 +1877,14 @@
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
- if (src_addr && src_addr->sa_family)
- return rdma_bind_addr(id, src_addr);
- else
- return cma_bind_any(id, dst_addr->sa_family);
+ if (!src_addr || !src_addr->sa_family) {
+ src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+ if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+ ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+ ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ }
+ }
+ return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2080,25 @@
return ret;
}
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+ struct sockaddr *addr)
+{
+#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
+ struct sockaddr_in6 *sin6;
+
+ if (addr->sa_family != AF_INET6)
+ return 0;
+
+ sin6 = (struct sockaddr_in6 *) addr;
+ if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+ !sin6->sin6_scope_id)
+ return -EINVAL;
+
+ dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+ return 0;
+}
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
@@ -2089,7 +2111,13 @@
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
- if (!cma_any_addr(addr)) {
+ ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ if (ret)
+ goto err1;
+
+ if (cma_loopback_addr(addr)) {
+ ret = cma_bind_loopback(id_priv);
+ } else if (!cma_zero_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
@@ -2108,7 +2136,7 @@
return 0;
err2:
- if (!cma_any_addr(addr)) {
+ if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
- ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if ((addr->sa_family == AF_INET6)) {
+ ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ *mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
- ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@@ -2815,7 +2848,7 @@
dev_addr = &id_priv->id.route.addr.dev_addr;
- if ((dev_addr->src_dev == ndev) &&
+ if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8254371..7e1ffd8c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@
return ret ? ret : id;
}
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c..b2e16c3 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -43,6 +43,7 @@
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +563,10 @@
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
- ib_addr_get_dgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].dgid);
- ib_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_addr_get_dgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
@@ -812,6 +813,51 @@
return ret;
}
+static int ucma_set_ib_path(struct ucma_context *ctx,
+ struct ib_path_rec_data *path_data, size_t optlen)
+{
+ struct ib_sa_path_rec sa_path;
+ struct rdma_cm_event event;
+ int ret;
+
+ if (optlen % sizeof(*path_data))
+ return -EINVAL;
+
+ for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+ if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL))
+ break;
+ }
+
+ if (!optlen)
+ return -EINVAL;
+
+ ib_sa_unpack_path(path_data->path_rec, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ if (ret)
+ return ret;
+
+ memset(&event, 0, sizeof event);
+ event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+ void *optval, size_t optlen)
+{
+ int ret;
+
+ switch (optname) {
+ case RDMA_OPTION_IB_PATH:
+ ret = ucma_set_ib_path(ctx, optval, optlen);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
@@ -821,6 +867,9 @@
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
+ case RDMA_OPTION_IB:
+ ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+ break;
default:
ret = -ENOSYS;
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 56feab6..112d397 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -285,7 +285,7 @@
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
- ret = PTR_ERR(file->ucontext);
+ ret = PTR_ERR(ucontext);
goto err;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index a6d8944..ad51886 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -798,8 +798,10 @@
u8 actual_sge_count;
u32 msg_size;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
while (ib_wr) {
@@ -930,6 +932,7 @@
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -944,8 +947,10 @@
unsigned long lock_flags;
int err = 0;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
/*
* Try and post each work request
@@ -998,6 +1003,7 @@
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 1cecf98..3eb8cec 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -365,18 +365,19 @@
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -428,10 +429,8 @@
wr->opcode);
err = -EINVAL;
}
- if (err) {
- *bad_wr = wr;
+ if (err)
break;
- }
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -454,6 +453,10 @@
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -471,18 +474,19 @@
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -494,10 +498,10 @@
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
- if (err) {
- *bad_wr = wr;
+
+ if (err)
break;
- }
+
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -511,6 +515,10 @@
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index c825142..0136abd 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -375,6 +375,7 @@
extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
+extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 523e733c..3b87589 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -169,12 +169,15 @@
unsigned long flags;
u64 h_ret;
- spin_lock_irqsave(&eq->spinlock, flags);
ibmebus_free_irq(eq->ist, (void *)shca);
- h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+ spin_lock_irqsave(&shca_list_lock, flags);
+ eq->is_initialized = 0;
+ spin_unlock_irqrestore(&shca_list_lock, flags);
- spin_unlock_irqrestore(&eq->spinlock, flags);
+ tasklet_kill(&eq->interrupt_task);
+
+ h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fb2d83c..129a6be 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -123,7 +123,7 @@
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
-static DEFINE_SPINLOCK(shca_list_lock);
+DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 8fd88cd..e3ec7fd 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -400,7 +400,6 @@
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
- struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
@@ -412,8 +411,6 @@
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
@@ -433,8 +430,6 @@
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@@ -448,7 +443,6 @@
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@@ -457,7 +451,8 @@
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* LOCK the QUEUE */
@@ -476,24 +471,21 @@
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
- post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+ post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
- for (cur_send_wr = send_wr; cur_send_wr != NULL;
- cur_send_wr = cur_send_wr->next) {
- ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ while (send_wr) {
+ ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
- /* if one or more WQEs were successful, don't fail */
- if (wqe_cnt)
- ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
- } /* eof for cur_send_wr */
+ send_wr = send_wr->next;
+ }
post_send_exit0:
iosync(); /* serialize GAL register access */
@@ -503,6 +495,10 @@
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+ if (ret)
+ *bad_send_wr = send_wr;
return ret;
}
@@ -511,7 +507,6 @@
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
- struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
@@ -522,27 +517,23 @@
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
- /* loop processes list of send reqs */
- for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
- cur_recv_wr = cur_recv_wr->next) {
+ /* loop processes list of recv reqs */
+ while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_recv_wr)
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(dev, "Too many posted WQEs "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
/*
@@ -552,7 +543,7 @@
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
rq_map_idx);
/*
* if something failed,
@@ -560,22 +551,20 @@
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(dev, "Could not write WQE "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
- qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+ qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->cqe_req = 1;
wqe_cnt++;
- } /* eof for cur_recv_wr */
+ recv_wr = recv_wr->next;
+ } /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@@ -584,6 +573,11 @@
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+ if (ret)
+ *bad_recv_wr = recv_wr;
+
return ret;
}
@@ -597,6 +591,7 @@
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
+ *bad_recv_wr = recv_wr;
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 013d1380..d2787fe 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,6 +39,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -1697,7 +1698,7 @@
unsigned len, int avail)
{
unsigned long flags;
- unsigned end, cnt = 0, next;
+ unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
@@ -1748,12 +1749,7 @@
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
- next = find_first_bit(dd->ipath_pioavailkernel, end);
- while (next < end) {
- cnt++;
- next = find_next_bit(dd->ipath_pioavailkernel, end,
- next + 1);
- }
+ cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47..e596537 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- if (dev->dev->caps.max_gso_sz)
+ if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 256a00c..989555c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@
/*
* Largest possible UD header: send with GRH and immediate data.
*/
- MLX4_IB_UD_HEADER_SIZE = 72
+ MLX4_IB_UD_HEADER_SIZE = 72,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
};
struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@
};
enum {
- MLX4_IB_MIN_SQ_STRIDE = 6
+ MLX4_IB_MIN_SQ_STRIDE = 6,
+ MLX4_IB_CACHE_LINE_SIZE = 64,
};
static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
- ((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+ ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
@@ -897,7 +899,6 @@
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
- context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1467,16 +1468,12 @@
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
- __be32 *lso_hdr_sz)
+ __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
- /*
- * This is a temporary limitation and will be removed in
- * a forthcoming FW release:
- */
- if (unlikely(halign > 64))
- return -EINVAL;
+ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+ *blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1522,6 +1519,7 @@
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
+ __be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1530,6 +1528,7 @@
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
+ blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
@@ -1616,7 +1615,7 @@
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+ err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -1687,7 +1686,7 @@
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index d449eb6..846dc97 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -4,14 +4,13 @@
select LIBCRC32C
select INET_LRO
---help---
- This is a low-level driver for NetEffect RDMA enabled
- Network Interface Cards (RNIC).
+ This is the RDMA Network Interface Card (RNIC) driver for
+ NetEffect Ethernet Cluster Server Adapters.
config INFINIBAND_NES_DEBUG
bool "Verbose debugging output"
depends on INFINIBAND_NES
default n
---help---
- This option causes the NetEffect RNIC driver to produce debug
- messages. Select this if you are developing the driver
- or trying to diagnose a problem.
+ This option enables debug messages from the NetEffect RNIC
+ driver. Select this if you are diagnosing a problem.
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index cbde0cf..b9d09ba 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -521,7 +521,8 @@
spin_lock_init(&nesdev->indexed_regs_lock);
/* Remap the PCI registers in adapter BAR0 to kernel VA space */
- mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+ mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
+ pci_resource_len(pcidev, BAR_0));
if (mmio_regs == NULL) {
printk(KERN_ERR PFX "Unable to remap BAR0\n");
ret = -EIO;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bcc6abc..9884056 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 73473db..39468c27 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -52,6 +52,7 @@
#include <linux/random.h>
#include <linux/list.h>
#include <linux/threads.h>
+#include <linux/highmem.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -251,6 +252,33 @@
mpa_frame = (struct ietf_mpa_frame *)buffer;
cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+ /* make sure mpa private data len is less than 512 bytes */
+ if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
+ nes_debug(NES_DBG_CM, "The received Length of Private"
+ " Data field exceeds 512 octets\n");
+ return -EINVAL;
+ }
+ /*
+ * make sure MPA receiver interoperate with the
+ * received MPA version and MPA key information
+ *
+ */
+ if (mpa_frame->rev != mpa_version) {
+ nes_debug(NES_DBG_CM, "The received mpa version"
+ " can not be interoperated\n");
+ return -EINVAL;
+ }
+ if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+ nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+ return -EINVAL;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+ nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+ return -EINVAL;
+ }
+ }
if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
@@ -486,6 +514,8 @@
send_reset(cm_node, NULL);
break;
default:
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, NULL);
create_event(cm_node, NES_CM_EVENT_ABORTED);
}
}
@@ -949,6 +979,7 @@
reset_entry);
{
struct nes_cm_node *loopback = cm_node->loopbackpartner;
+ enum nes_cm_node_state old_state;
if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
rem_ref_cm_node(cm_node->cm_core, cm_node);
} else {
@@ -960,11 +991,12 @@
NES_CM_STATE_CLOSED;
WARN_ON(1);
} else {
- cm_node->state =
- NES_CM_STATE_CLOSED;
- rem_ref_cm_node(
- cm_node->cm_core,
- cm_node);
+ old_state = cm_node->state;
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != NES_CM_STATE_MPAREQ_RCVD)
+ rem_ref_cm_node(
+ cm_node->cm_core,
+ cm_node);
}
} else {
struct nes_cm_event event;
@@ -980,20 +1012,9 @@
loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
cm_event_connect_error(&event);
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
loopback->state = NES_CM_STATE_CLOSED;
- event.cm_node = cm_node;
- event.cm_info.rem_addr =
- cm_node->rem_addr;
- event.cm_info.loc_addr =
- cm_node->loc_addr;
- event.cm_info.rem_port =
- cm_node->rem_port;
- event.cm_info.loc_port =
- cm_node->loc_port;
- event.cm_info.cm_id = cm_node->cm_id;
- cm_event_reset(&event);
-
rem_ref_cm_node(cm_node->cm_core,
cm_node);
@@ -1077,12 +1098,13 @@
/**
* nes_addr_resolve_neigh
*/
-static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
{
struct rtable *rt;
struct flowi fl;
struct neighbour *neigh;
- int rc = -1;
+ int rc = arpindex;
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = htonl(dst_ip);
@@ -1098,6 +1120,21 @@
nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
" is %pM, Gateway is 0x%08X \n", dst_ip,
neigh->ha, ntohl(rt->rt_gateway));
+
+ if (arpindex >= 0) {
+ if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
+ neigh->ha, ETH_ALEN)){
+ /* Mac address same as in nes_arp_table */
+ neigh_release(neigh);
+ ip_rt_put(rt);
+ return rc;
+ }
+
+ nes_manage_arp_cache(nesvnic->netdev,
+ nesadapter->arp_table[arpindex].mac_addr,
+ dst_ip, NES_ARP_DELETE);
+ }
+
nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
dst_ip, NES_ARP_ADD);
rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
@@ -1113,7 +1150,6 @@
return rc;
}
-
/**
* make_cm_node - create a new instance of a cm node
*/
@@ -1123,6 +1159,7 @@
{
struct nes_cm_node *cm_node;
struct timespec ts;
+ int oldarpindex = 0;
int arpindex = 0;
struct nes_device *nesdev;
struct nes_adapter *nesadapter;
@@ -1176,17 +1213,18 @@
nesadapter = nesdev->nesadapter;
cm_node->loopbackpartner = NULL;
+
/* get the mac addr for the remote node */
if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
- else
- arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+ else {
+ oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+ arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+
+ }
if (arpindex < 0) {
- arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
- if (arpindex < 0) {
- kfree(cm_node);
- return NULL;
- }
+ kfree(cm_node);
+ return NULL;
}
/* copy the mac addr to node context */
@@ -1333,13 +1371,20 @@
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_SYN_SENT:
case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREQ_SENT:
case NES_CM_STATE_MPAREJ_RCVD:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_LAST_ACK;
send_fin(cm_node, NULL);
break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, NULL);
+ break;
case NES_CM_STATE_FIN_WAIT1:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
@@ -1590,6 +1635,7 @@
break;
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_TSA:
@@ -1641,9 +1687,15 @@
passive_open_err(cm_node, skb, 1);
break;
case NES_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ send_reset(cm_node, skb);
+ break;
case NES_CM_STATE_CLOSED:
cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_ESTABLISHED:
@@ -1712,8 +1764,13 @@
dev_kfree_skb_any(skb);
break;
case NES_CM_STATE_LISTENING:
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ send_reset(cm_node, skb);
+ break;
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_LAST_ACK:
@@ -1974,7 +2031,7 @@
if (!cm_node)
return NULL;
mpa_frame = &cm_node->mpa_frame;
- strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
mpa_frame->flags = IETF_MPA_FLAGS_CRC;
mpa_frame->rev = IETF_MPA_VERSION;
mpa_frame->priv_data_len = htons(private_data_len);
@@ -2102,30 +2159,39 @@
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_core, cm_node);
} else {
- ret = send_mpa_reject(cm_node);
- if (ret) {
- cm_node->state = NES_CM_STATE_CLOSED;
- err = send_reset(cm_node, NULL);
- if (err)
- WARN_ON(1);
- } else
- cm_id->add_ref(cm_id);
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ } else {
+ ret = send_mpa_reject(cm_node);
+ if (ret) {
+ cm_node->state = NES_CM_STATE_CLOSED;
+ err = send_reset(cm_node, NULL);
+ if (err)
+ WARN_ON(1);
+ } else
+ cm_id->add_ref(cm_id);
+ }
}
} else {
cm_node->cm_id = NULL;
- event.cm_node = loopback;
- event.cm_info.rem_addr = loopback->rem_addr;
- event.cm_info.loc_addr = loopback->loc_addr;
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- cm_event_mpa_reject(&event);
- rem_ref_cm_node(cm_core, cm_node);
- loopback->state = NES_CM_STATE_CLOSING;
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ rem_ref_cm_node(cm_core, loopback);
+ } else {
+ event.cm_node = loopback;
+ event.cm_info.rem_addr = loopback->rem_addr;
+ event.cm_info.loc_addr = loopback->loc_addr;
+ event.cm_info.rem_port = loopback->rem_port;
+ event.cm_info.loc_port = loopback->loc_port;
+ event.cm_info.cm_id = loopback->cm_id;
+ cm_event_mpa_reject(&event);
+ rem_ref_cm_node(cm_core, cm_node);
+ loopback->state = NES_CM_STATE_CLOSING;
- cm_id = loopback->cm_id;
- rem_ref_cm_node(cm_core, loopback);
- cm_id->rem_ref(cm_id);
+ cm_id = loopback->cm_id;
+ rem_ref_cm_node(cm_core, loopback);
+ cm_id->rem_ref(cm_id);
+ }
}
return ret;
@@ -2164,11 +2230,15 @@
case NES_CM_STATE_CLOSING:
ret = -1;
break;
- case NES_CM_STATE_MPAREJ_RCVD:
case NES_CM_STATE_LISTENING:
+ cleanup_retrans_entry(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_MPAREJ_RCVD:
case NES_CM_STATE_UNKNOWN:
case NES_CM_STATE_INITED:
case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_LISTENER_DESTROYED:
ret = rem_ref_cm_node(cm_core, cm_node);
break;
case NES_CM_STATE_TSA:
@@ -2687,8 +2757,6 @@
struct nes_pd *nespd;
u64 tagged_offset;
-
-
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
return -EINVAL;
@@ -2704,6 +2772,13 @@
"%s\n", cm_node, nesvnic, nesvnic->netdev,
nesvnic->netdev->name);
+ if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
+ if (cm_node->loopbackpartner)
+ rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ return -EINVAL;
+ }
+
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
@@ -2786,6 +2861,10 @@
cpu_to_le32(conn_param->private_data_len +
sizeof(struct ietf_mpa_frame));
wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
+ }
nesqp->nesqp_context->ird_ord_sizes |=
cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -2929,7 +3008,7 @@
if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
return -EINVAL;
- strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
+ memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
if (loopback) {
memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
loopback->mpa_frame.priv_data_len = pdata_len;
@@ -2974,6 +3053,9 @@
if (!nesdev)
return -EINVAL;
+ if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+ return -EINVAL;
+
nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
ntohl(nesvnic->local_ipaddr),
@@ -3251,6 +3333,11 @@
wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
+ }
+
/* use the reserved spot on the WQ for the extra first WQE */
nesqp->nesqp_context->ird_ord_sizes &=
cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -3346,7 +3433,7 @@
nesqp->cm_id = NULL;
cm_id->provider_data = NULL;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+ cm_event.status = -ECONNRESET;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
@@ -3390,6 +3477,8 @@
nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
nesqp = cm_id->provider_data;
+ if (!nesqp)
+ return;
nesqp->cm_id = NULL;
/* cm_id->provider_data = NULL; */
@@ -3401,8 +3490,8 @@
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- ret = cm_id->event_handler(cm_id, &cm_event);
cm_id->add_ref(cm_id);
+ ret = cm_id->event_handler(cm_id, &cm_event);
atomic_inc(&cm_closes);
cm_event.event = IW_CM_EVENT_CLOSE;
cm_event.status = IW_CM_EVENT_STATUS_OK;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 90e8e4d..d9825fd 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
#define IETF_MPA_KEY_SIZE 16
#define IETF_MPA_VERSION 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -169,7 +171,7 @@
#define NES_CM_DEF_SEQ2 0x18ed5740
#define NES_CM_DEF_LOCAL_ID2 0xb807
-#define MAX_CM_BUFFER 512
+#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
typedef u32 nes_addr_t;
@@ -198,6 +200,7 @@
NES_CM_STATE_TIME_WAIT,
NES_CM_STATE_LAST_ACK,
NES_CM_STATE_CLOSING,
+ NES_CM_STATE_LISTENER_DESTROYED,
NES_CM_STATE_CLOSED
};
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
index 0fb8d81..b4393a1 100644
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 3512d6d..b1c2cbb 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -424,8 +424,9 @@
nesadapter->base_pd = 1;
- nesadapter->device_cap_flags =
- IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+ nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -436,11 +437,12 @@
nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
- /* mark the usual suspect QPs and CQs as in use */
+ /* mark the usual suspect QPs, MR and CQs as in use */
for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
set_bit(u32temp, nesadapter->allocated_qps);
set_bit(u32temp, nesadapter->allocated_cqs);
}
+ set_bit(0, nesadapter->allocated_mrs);
for (u32temp = 0; u32temp < 20; u32temp++)
set_bit(u32temp, nesadapter->allocated_pds);
@@ -481,7 +483,7 @@
nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
nesadapter->max_sge = 4;
- nesadapter->max_cqe = 32767;
+ nesadapter->max_cqe = 32766;
if (nes_read_eeprom_values(nesdev, nesadapter)) {
printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@@ -1355,6 +1357,8 @@
}
if ((phy_type == NES_PHY_TYPE_ARGUS) ||
(phy_type == NES_PHY_TYPE_SFP_D)) {
+ u32 first_time = 1;
+
/* Check firmware heartbeat */
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
@@ -1362,8 +1366,13 @@
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (temp_phy_data != temp_phy_data2)
- return 0;
+ if (temp_phy_data != temp_phy_data2) {
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((temp_phy_data & 0xff) > 0x20)
+ return 0;
+ printk(PFX "Reinitializing PHY\n");
+ }
/* no heartbeat, configure the PHY */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
@@ -1399,7 +1408,7 @@
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
do {
if (counter++ > 150) {
- nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
+ printk(PFX "No PHY heartbeat\n");
break;
}
mdelay(1);
@@ -1413,11 +1422,20 @@
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if (counter++ > 300) {
- nes_debug(NES_DBG_PHY, "PHY did not track\n");
- break;
+ if (((temp_phy_data & 0xff) == 0x0) && first_time) {
+ first_time = 0;
+ counter = 0;
+ /* reset AMCC PHY and try again */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
+ continue;
+ } else {
+ printk(PFX "PHY did not track\n");
+ break;
+ }
}
mdelay(10);
- } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+ } while ((temp_phy_data & 0xff) < 0x30);
/* setup signal integrity */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index f28a41b..084be0e 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -546,11 +546,23 @@
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
};
+enum nes_iwarp_sq_fmr_opcodes {
+ NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
+ NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
+ NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
+};
+
+#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
+
enum nes_iwarp_sq_locinv_wqe_word_idx {
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
};
-
enum nes_iwarp_rq_wqe_word_idx {
NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@@ -1153,6 +1165,19 @@
/* TODO: need to add list for two level tables */
};
+#define NES_4K_PBL_CHUNK_SIZE 4096
+
+struct nes_fast_mr_wqe_pbl {
+ u64 *kva;
+ dma_addr_t paddr;
+};
+
+struct nes_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
+ u64 pbl;
+};
+
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index de18fdf..ab11027 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index cc90c14..71e133a 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@@ -86,6 +86,7 @@
IWNES_MEMREG_TYPE_CQ = 0x0002,
IWNES_MEMREG_TYPE_MW = 0x0003,
IWNES_MEMREG_TYPE_FMR = 0x0004,
+ IWNES_MEMREG_TYPE_FMEM = 0x0005,
};
struct nes_mem_reg_req {
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 9687c39..729d525 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index a680c42..64d3136 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -275,342 +275,236 @@
}
-/**
- * nes_alloc_fmr
+/*
+ * nes_alloc_fast_mr
*/
-static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
- int ibmr_access_flags,
- struct ib_fmr_attr *ibfmr_attr)
+static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+ u32 stag, u32 page_count)
{
- unsigned long flags;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_fmr *nesfmr;
- struct nes_cqp_request *cqp_request;
struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 opcode = 0;
- u8 stag_key = 0;
- int i=0;
- struct nes_vpbl vpbl;
+ u16 major_code;
+ u64 region_length = page_count * PAGE_SIZE;
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index);
- if (ret) {
- goto failed_resource_alloc;
- }
-
- nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
- if (!nesfmr) {
- ret = -ENOMEM;
- goto failed_fmr_alloc;
- }
-
- nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
- if (ibfmr_attr->max_pages == 1) {
- /* use zero length PBL */
- nesfmr->nesmr.pbl_4k = 0;
- nesfmr->nesmr.pbls_used = 0;
- } else if (ibfmr_attr->max_pages <= 32) {
- /* use PBL 256 */
- nesfmr->nesmr.pbl_4k = 0;
- nesfmr->nesmr.pbls_used = 1;
- } else if (ibfmr_attr->max_pages <= 512) {
- /* use 4K PBLs */
- nesfmr->nesmr.pbl_4k = 1;
- nesfmr->nesmr.pbls_used = 1;
- } else {
- /* use two level 4K PBLs */
- /* add support for two level 256B PBLs */
- nesfmr->nesmr.pbl_4k = 1;
- nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
- ((ibfmr_attr->max_pages & 511) ? 1 : 0);
- }
- /* Register the region with the adapter */
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- /* track PBL resources */
- if (nesfmr->nesmr.pbls_used != 0) {
- if (nesfmr->nesmr.pbl_4k) {
- if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_avail;
- } else {
- nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
- }
- } else {
- if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_avail;
- } else {
- nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
- }
- }
- }
-
- /* one level pbl */
- if (nesfmr->nesmr.pbls_used == 0) {
- nesfmr->root_vpbl.pbl_vbase = NULL;
- nes_debug(NES_DBG_MR, "zero level pbl \n");
- } else if (nesfmr->nesmr.pbls_used == 1) {
- /* can change it to kmalloc & dma_map_single */
- nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &nesfmr->root_vpbl.pbl_pbase);
- if (!nesfmr->root_vpbl.pbl_vbase) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- }
- nesfmr->leaf_pbl_cnt = 0;
- nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
- nesfmr->root_vpbl.pbl_vbase);
- }
- /* two level pbl */
- else {
- nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &nesfmr->root_vpbl.pbl_pbase);
- if (!nesfmr->root_vpbl.pbl_vbase) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- }
-
- nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
- nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_ATOMIC);
- if (!nesfmr->root_vpbl.leaf_vpbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_leaf_vpbl_alloc;
- }
-
- nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
- " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
- nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
-
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
-
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
-
- if (!vpbl.pbl_vbase) {
- ret = -ENOMEM;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- goto failed_leaf_vpbl_pages_alloc;
- }
-
- nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
-
- nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
- nesfmr->root_vpbl.pbl_vbase[i].pa_low,
- nesfmr->root_vpbl.pbl_vbase[i].pa_high,
- &nesfmr->root_vpbl.leaf_vpbl[i]);
- }
- }
- nesfmr->ib_qp = NULL;
- nesfmr->access_rights =0;
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
cqp_request = nes_get_cqp_request(nesdev);
if (cqp_request == NULL) {
nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- ret = -ENOMEM;
- goto failed_leaf_vpbl_pages_alloc;
+ return -ENOMEM;
}
+ nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
+ "region_length = %llu\n",
+ page_count, region_length);
cqp_request->waiting = 1;
cqp_wqe = &cqp_request->cqp_wqe;
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
- stag, stag_index);
-
- opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
-
- if (nesfmr->nesmr.pbl_4k == 1)
- opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
-
- if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
- NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
- nesfmr->access_rights |=
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
- NES_CQP_STAG_REM_ACC_EN;
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesadapter->free_4kpbl > 0) {
+ nesadapter->free_4kpbl--;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ } else {
+ /* No 4kpbl's available: */
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_debug(NES_DBG_MR, "Out of Pbls\n");
+ nes_free_cqp_request(nesdev, cqp_request);
+ return -ENOMEM;
}
- if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
- NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
- nesfmr->access_rights |=
- NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
- NES_CQP_STAG_REM_ACC_EN;
- }
+ opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
+ NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
+ NES_CQP_STAG_REM_ACC_EN;
+ /*
+ * The current OFED API does not support the zero based TO option.
+ * If added then need to changed the NES_CQP_STAG_VA* option. Also,
+ * the API does not support that ability to have the MR set for local
+ * access only when created and not allow the SQ op to override. Given
+ * this the remote enable must be set here.
+ */
nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
- cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
- (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+ cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+ cpu_to_le32(nespd->pd_id & 0x00007fff);
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+ barrier();
atomic_set(&cqp_request->refcount, 2);
nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
+ ret = wait_event_timeout(cqp_request->waitq,
+ (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- ret = (!ret) ? -ETIME : -EIO;
- goto failed_leaf_vpbl_pages_alloc;
- }
+ nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
+ "wait_event_timeout ret = %u, CQP Major:Minor codes = "
+ "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
+ cqp_request->minor_code);
+ major_code = cqp_request->major_code;
nes_put_cqp_request(nesdev, cqp_request);
- nesfmr->nesmr.ibfmr.lkey = stag;
- nesfmr->nesmr.ibfmr.rkey = stag;
- nesfmr->attr = *ibfmr_attr;
- return &nesfmr->nesmr.ibfmr;
-
- failed_leaf_vpbl_pages_alloc:
- /* unroll all allocated pages */
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
- if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- }
- if (nesfmr->root_vpbl.leaf_vpbl)
- kfree(nesfmr->root_vpbl.leaf_vpbl);
-
- failed_leaf_vpbl_alloc:
- if (nesfmr->leaf_pbl_cnt == 0) {
- if (nesfmr->root_vpbl.pbl_vbase)
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
- } else
- pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
-
- failed_vpbl_alloc:
- if (nesfmr->nesmr.pbls_used != 0) {
+ if (!ret || major_code) {
spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesfmr->nesmr.pbl_4k)
- nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
- else
- nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
+ nesadapter->free_4kpbl++;
spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
}
-failed_vpbl_avail:
- kfree(nesfmr);
-
- failed_fmr_alloc:
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-
- failed_resource_alloc:
- return ERR_PTR(ret);
+ if (!ret)
+ return -ETIME;
+ else if (major_code)
+ return -EIO;
+ return 0;
}
-
-/**
- * nes_dealloc_fmr
+/*
+ * nes_alloc_fast_reg_mr
*/
-static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len)
{
- unsigned long flags;
- struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
- struct nes_fmr *nesfmr = to_nesfmr(nesmr);
- struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- int i = 0;
- int rc;
- /* free the resources */
- if (nesfmr->leaf_pbl_cnt == 0) {
- /* single PBL case */
- if (nesfmr->root_vpbl.pbl_vbase)
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
+ u32 next_stag_index;
+ u8 stag_key = 0;
+ u32 driver_key = 0;
+ int err = 0;
+ u32 stag_index = 0;
+ struct nes_mr *nesmr;
+ u32 stag;
+ int ret;
+ struct ib_mr *ibmr;
+/*
+ * Note: Set to always use a fixed length single page entry PBL. This is to allow
+ * for the fast_reg_mr operation to always know the size of the PBL.
+ */
+ if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+ return ERR_PTR(-E2BIG);
+
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+ nesadapter->max_mr, &stag_index,
+ &next_stag_index);
+ if (err)
+ return ERR_PTR(err);
+
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+
+ nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
+ stag, stag_index);
+
+ ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
+
+ if (ret == 0) {
+ nesmr->ibmr.rkey = stag;
+ nesmr->ibmr.lkey = stag;
+ nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
+ ibmr = &nesmr->ibmr;
} else {
- for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(nesfmr->root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
+ kfree(nesmr);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
}
- nesmr->ibmw.device = ibfmr->device;
- nesmr->ibmw.pd = ibfmr->pd;
- nesmr->ibmw.rkey = ibfmr->rkey;
- nesmr->ibmw.uobject = NULL;
+ return ibmr;
+}
- rc = nes_dealloc_mw(&nesmr->ibmw);
+/*
+ * nes_alloc_fast_reg_page_list
+ */
+static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
+ struct ib_device *ibdev,
+ int page_list_len)
+{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct ib_fast_reg_page_list *pifrpl;
+ struct nes_ib_fast_reg_page_list *pnesfrpl;
- if ((rc == 0) && (nesfmr->nesmr.pbls_used != 0)) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesfmr->nesmr.pbl_4k) {
- nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
- WARN_ON(nesadapter->free_4kpbl > nesadapter->max_4kpbl);
- } else {
- nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
- WARN_ON(nesadapter->free_256pbl > nesadapter->max_256pbl);
- }
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+ return ERR_PTR(-E2BIG);
+ /*
+ * Allocate the ib_fast_reg_page_list structure, the
+ * nes_fast_bpl structure, and the PLB table.
+ */
+ pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
+ page_list_len * sizeof(u64), GFP_KERNEL);
+
+ if (!pnesfrpl)
+ return ERR_PTR(-ENOMEM);
+
+ pifrpl = &pnesfrpl->ibfrpl;
+ pifrpl->page_list = &pnesfrpl->pbl;
+ pifrpl->max_page_list_len = page_list_len;
+ /*
+ * Allocate the WQE PBL
+ */
+ pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
+ page_list_len * sizeof(u64),
+ &pnesfrpl->nes_wqe_pbl.paddr);
+
+ if (!pnesfrpl->nes_wqe_pbl.kva) {
+ kfree(pnesfrpl);
+ return ERR_PTR(-ENOMEM);
}
+ nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
+ "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
+ "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl,
+ pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
+ (void *)pnesfrpl->nes_wqe_pbl.paddr);
- return rc;
+ return pifrpl;
}
-
-/**
- * nes_map_phys_fmr
+/*
+ * nes_free_fast_reg_page_list
*/
-static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
+static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
{
- return 0;
+ struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_ib_fast_reg_page_list *pnesfrpl;
+
+ pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
+ /*
+ * Free the WQE PBL.
+ */
+ pci_free_consistent(nesdev->pcidev,
+ pifrpl->max_page_list_len * sizeof(u64),
+ pnesfrpl->nes_wqe_pbl.kva,
+ pnesfrpl->nes_wqe_pbl.paddr);
+ /*
+ * Free the PBL structure
+ */
+ kfree(pnesfrpl);
}
-
-/**
- * nes_unmap_frm
- */
-static int nes_unmap_fmr(struct list_head *ibfmr_list)
-{
- return 0;
-}
-
-
-
/**
* nes_query_device
*/
@@ -633,23 +527,23 @@
props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
props->max_sge = nesdev->nesadapter->max_sge;
props->max_cq = nesibdev->max_cq;
- props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+ props->max_cqe = nesdev->nesadapter->max_cqe;
props->max_mr = nesibdev->max_mr;
props->max_mw = nesibdev->max_mr;
props->max_pd = nesibdev->max_pd;
props->max_sge_rd = 1;
switch (nesdev->nesadapter->max_irrq_wr) {
case 0:
- props->max_qp_rd_atom = 1;
+ props->max_qp_rd_atom = 2;
break;
case 1:
- props->max_qp_rd_atom = 4;
+ props->max_qp_rd_atom = 8;
break;
case 2:
- props->max_qp_rd_atom = 16;
+ props->max_qp_rd_atom = 32;
break;
case 3:
- props->max_qp_rd_atom = 32;
+ props->max_qp_rd_atom = 64;
break;
default:
props->max_qp_rd_atom = 0;
@@ -1121,6 +1015,7 @@
kunmap(nesqp->page);
return -ENOMEM;
}
+ nesqp->sq_kmapped = 1;
nesqp->hwqp.q2_vbase = mem;
mem += 256;
memset(nesqp->hwqp.q2_vbase, 0, 256);
@@ -1198,7 +1093,10 @@
pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
nesqp->pbl_vbase = NULL;
- kunmap(nesqp->page);
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
+ }
}
}
@@ -1504,8 +1402,6 @@
nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
spin_lock_init(&nesqp->lock);
- init_waitqueue_head(&nesqp->state_waitq);
- init_waitqueue_head(&nesqp->kick_waitq);
nes_add_ref(&nesqp->ibqp);
break;
default:
@@ -1513,6 +1409,8 @@
return ERR_PTR(-EINVAL);
}
+ nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
+
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
@@ -1607,8 +1505,10 @@
nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
}
}
- if (nesqp->pbl_pbase)
+ if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
kunmap(nesqp->page);
+ }
} else {
/* Clean any pending completions from the cq(s) */
if (nesqp->nesscq)
@@ -1649,6 +1549,9 @@
unsigned long flags;
int ret;
+ if (entries > nesadapter->max_cqe)
+ return ERR_PTR(-EINVAL);
+
err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
if (err) {
@@ -2606,9 +2509,6 @@
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
- if (stag == 0) {
- stag = 1;
- }
iova_start = virt;
/* Make the leaf PBL the root if only one PBL */
@@ -3109,7 +3009,6 @@
" already done based on hw state.\n",
nesqp->hwqp.qp_id);
issue_modify_qp = 0;
- nesqp->in_disconnect = 0;
}
switch (nesqp->hw_iwarp_state) {
case NES_AEQE_IWARP_STATE_CLOSING:
@@ -3122,7 +3021,6 @@
break;
default:
next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->in_disconnect = 1;
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
break;
}
@@ -3139,7 +3037,6 @@
next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
issue_modify_qp = 1;
- nesqp->in_disconnect = 1;
break;
case IB_QPS_ERR:
case IB_QPS_RESET:
@@ -3162,7 +3059,6 @@
if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
(nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
next_iwarp_state |= NES_CQP_QP_RESET;
- nesqp->in_disconnect = 1;
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
@@ -3373,21 +3269,17 @@
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_qp *nesqp = to_nesqp(ibqp);
struct nes_hw_qp_wqe *wqe;
- int err;
+ int err = 0;
u32 qsize = nesqp->hwqp.sq_size;
u32 head;
- u32 wqe_misc;
- u32 wqe_count;
+ u32 wqe_misc = 0;
+ u32 wqe_count = 0;
u32 counter;
- u32 total_payload_length;
- err = 0;
- wqe_misc = 0;
- wqe_count = 0;
- total_payload_length = 0;
-
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
+ if (nesqp->ibqp_state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
spin_lock_irqsave(&nesqp->lock, flags);
@@ -3413,94 +3305,208 @@
u64temp = (u64)(ib_wr->wr_id);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
u64temp);
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- if (ib_wr->send_flags & IB_SEND_SOLICITED) {
- wqe_misc = NES_IWARP_SQ_OP_SENDSE;
- } else {
- wqe_misc = NES_IWARP_SQ_OP_SEND;
- }
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
- if (ib_wr->send_flags & IB_SEND_FENCE) {
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
- }
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (IB_WR_SEND == ib_wr->opcode) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+ else
+ wqe_misc = NES_IWARP_SQ_OP_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
+ else
+ wqe_misc = NES_IWARP_SQ_OP_SENDINV;
- break;
- case IB_WR_RDMA_WRITE:
- wqe_misc = NES_IWARP_SQ_OP_RDMAW;
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
- ib_wr->num_sge,
- nesdev->nesadapter->max_sge);
- err = -EINVAL;
- break;
- }
- if (ib_wr->send_flags & IB_SEND_FENCE) {
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
- }
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
- break;
- case IB_WR_RDMA_READ:
- /* iWARP only supports 1 sge for RDMA reads */
- if (ib_wr->num_sge > 1) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
- ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
- wqe_misc = NES_IWARP_SQ_OP_RDMAR;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
- ib_wr->sg_list->length);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- ib_wr->sg_list->addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
- ib_wr->sg_list->lkey);
- break;
- default:
- /* error */
- err = -EINVAL;
- break;
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+ ib_wr->ex.invalidate_rkey);
}
- if (ib_wr->send_flags & IB_SEND_SIGNALED) {
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+
+ break;
+ case IB_WR_RDMA_WRITE:
+ wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+ ib_wr->num_sge, nesdev->nesadapter->max_sge);
+ err = -EINVAL;
+ break;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ /* iWARP only supports 1 sge for RDMA reads */
+ if (ib_wr->num_sge > 1) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+ ib_wr->num_sge);
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->opcode == IB_WR_RDMA_READ) {
+ wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+ } else {
+ wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+ ib_wr->ex.invalidate_rkey);
+ }
+
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+ ib_wr->sg_list->length);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+ ib_wr->sg_list->addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+ ib_wr->sg_list->lkey);
+ break;
+ case IB_WR_LOCAL_INV:
+ wqe_misc = NES_IWARP_SQ_OP_LOCINV;
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
+ ib_wr->ex.invalidate_rkey);
+ break;
+ case IB_WR_FAST_REG_MR:
+ {
+ int i;
+ int flags = ib_wr->wr.fast_reg.access_flags;
+ struct nes_ib_fast_reg_page_list *pnesfrpl =
+ container_of(ib_wr->wr.fast_reg.page_list,
+ struct nes_ib_fast_reg_page_list,
+ ibfrpl);
+ u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
+ u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
+
+ if (ib_wr->wr.fast_reg.page_list_len >
+ (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+ nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
+ err = -EINVAL;
+ break;
+ }
+ wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
+ ib_wr->wr.fast_reg.iova_start);
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
+ ib_wr->wr.fast_reg.length);
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
+ ib_wr->wr.fast_reg.rkey);
+ /* Set page size: */
+ if (ib_wr->wr.fast_reg.page_shift == 12) {
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
+ } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
+ } else {
+ nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
+ " ib_wr=%u, max=1\n", ib_wr->num_sge);
+ err = -EINVAL;
+ break;
+ }
+ /* Set access_flags */
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
+ if (flags & IB_ACCESS_LOCAL_WRITE)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
+
+ if (flags & IB_ACCESS_REMOTE_WRITE)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
+
+ if (flags & IB_ACCESS_REMOTE_READ)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
+
+ if (flags & IB_ACCESS_MW_BIND)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
+
+ /* Fill in PBL info: */
+ if (ib_wr->wr.fast_reg.page_list_len >
+ pnesfrpl->ibfrpl.max_page_list_len) {
+ nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
+ " ib_wr=%p, value=%u, max=%u\n",
+ ib_wr, ib_wr->wr.fast_reg.page_list_len,
+ pnesfrpl->ibfrpl.max_page_list_len);
+ err = -EINVAL;
+ break;
+ }
+
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
+ pnesfrpl->nes_wqe_pbl.paddr);
+
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
+ ib_wr->wr.fast_reg.page_list_len * 8);
+
+ for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
+ dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+
+ nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, "
+ "length: %d, rkey: %0x, pgl_paddr: %p, "
+ "page_list_len: %u, wqe_misc: %x\n",
+ (void *)ib_wr->wr.fast_reg.iova_start,
+ ib_wr->wr.fast_reg.length,
+ ib_wr->wr.fast_reg.rkey,
+ (void *)pnesfrpl->nes_wqe_pbl.paddr,
+ ib_wr->wr.fast_reg.page_list_len,
+ wqe_misc);
+ break;
}
+ default:
+ /* error */
+ err = -EINVAL;
+ break;
+ }
+
+ if (err)
+ break;
+
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
+ wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
ib_wr = ib_wr->next;
@@ -3522,6 +3528,7 @@
spin_unlock_irqrestore(&nesqp->lock, flags);
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -3548,8 +3555,10 @@
u32 counter;
u32 total_payload_length;
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
+ if (nesqp->ibqp_state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
spin_lock_irqsave(&nesqp->lock, flags);
@@ -3612,6 +3621,7 @@
spin_unlock_irqrestore(&nesqp->lock, flags);
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -3720,6 +3730,12 @@
nes_debug(NES_DBG_CQ, "Operation = Send.\n");
entry->opcode = IB_WC_SEND;
break;
+ case NES_IWARP_SQ_OP_LOCINV:
+ entry->opcode = IB_WR_LOCAL_INV;
+ break;
+ case NES_IWARP_SQ_OP_FAST_REG:
+ entry->opcode = IB_WC_FAST_REG_MR;
+ break;
}
nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
@@ -3890,10 +3906,9 @@
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
nesibdev->ibdev.bind_mw = nes_bind_mw;
- nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
- nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
- nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
- nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+ nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
+ nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
+ nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
nesibdev->ibdev.attach_mcast = nes_multicast_attach;
nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 89822d7..2df9993e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -135,19 +135,15 @@
struct ib_qp ibqp;
void *allocated_buffer;
struct iw_cm_id *cm_id;
- struct workqueue_struct *wq;
struct nes_cq *nesscq;
struct nes_cq *nesrcq;
struct nes_pd *nespd;
void *cm_node; /* handle of the node this QP is associated with */
struct ietf_mpa_frame *ietf_frame;
dma_addr_t ietf_frame_pbase;
- wait_queue_head_t state_waitq;
struct ib_mr *lsmm_mr;
- unsigned long socket;
struct nes_hw_qp hwqp;
struct work_struct work;
- struct work_struct ae_work;
enum ib_qp_state ibqp_state;
u32 iwarp_state;
u32 hte_index;
@@ -165,19 +161,20 @@
struct page *page;
struct timer_list terminate_timer;
enum ib_event_type terminate_eventtype;
- wait_queue_head_t kick_waitq;
- u16 in_disconnect;
+ u16 active_conn:1;
+ u16 skip_lsmm:1;
+ u16 user_mode:1;
+ u16 hte_added:1;
+ u16 flush_issued:1;
+ u16 destroyed:1;
+ u16 sig_all:1;
+ u16 rsvd:9;
u16 private_data_len;
u16 term_sq_flush_code;
u16 term_rq_flush_code;
- u8 active_conn;
- u8 skip_lsmm;
- u8 user_mode;
- u8 hte_added;
u8 hw_iwarp_state;
- u8 flush_issued;
u8 hw_tcp_state;
u8 term_flags;
- u8 destroyed;
+ u8 sq_kmapped;
};
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2bf5116..df3eb8c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -884,6 +884,7 @@
neigh->neighbour = neighbour;
neigh->dev = dev;
+ memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
ipoib_cm_set(neigh, NULL);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index b9453d0..274c883 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -209,6 +209,8 @@
mem_copy->copy_buf = NULL;
}
+#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
+
/**
* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
* and returns the length of resulting physical address array (may be less than
@@ -221,62 +223,52 @@
* where --few fragments of the same page-- are present in the SG as
* consecutive elements. Also, it handles one entry SG.
*/
+
static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct iser_page_vec *page_vec,
struct ib_device *ibdev)
{
- struct scatterlist *sgl = (struct scatterlist *)data->buf;
- struct scatterlist *sg;
- u64 first_addr, last_addr, page;
- int end_aligned;
- unsigned int cur_page = 0;
+ struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+ u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0;
- int i;
+ unsigned int dma_len;
+ int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ new_chunk = 1;
+ cur_page = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-
+ start_addr = ib_sg_dma_address(ibdev, sg);
+ if (new_chunk)
+ chunk_start = start_addr;
+ dma_len = ib_sg_dma_len(ibdev, sg);
+ end_addr = start_addr + dma_len;
total_sz += dma_len;
- first_addr = ib_sg_dma_address(ibdev, sg);
- last_addr = first_addr + dma_len;
-
- end_aligned = !(last_addr & ~MASK_4K);
-
- /* continue to collect page fragments till aligned or SG ends */
- while (!end_aligned && (i + 1 < data->dma_nents)) {
- sg = sg_next(sg);
- i++;
- dma_len = ib_sg_dma_len(ibdev, sg);
- total_sz += dma_len;
- last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
- end_aligned = !(last_addr & ~MASK_4K);
+ /* collect page fragments until aligned or end of SG list */
+ if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
+ new_chunk = 0;
+ continue;
}
+ new_chunk = 1;
- /* handle the 1st page in the 1st DMA element */
- if (cur_page == 0) {
- page = first_addr & MASK_4K;
- page_vec->pages[cur_page] = page;
- cur_page++;
+ /* address of the first page in the contiguous chunk;
+ masking relevant for the very first SG entry,
+ which might be unaligned */
+ page = chunk_start & MASK_4K;
+ do {
+ page_vec->pages[cur_page++] = page;
page += SIZE_4K;
- } else
- page = first_addr;
-
- for (; page < last_addr; page += SIZE_4K) {
- page_vec->pages[cur_page] = page;
- cur_page++;
- }
-
+ } while (page < end_addr);
}
+
page_vec->data_size = total_sz;
iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
return cur_page;
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -284,42 +276,40 @@
* the number of entries which are aligned correctly. Supports the case where
* consecutive SG elements are actually fragments of the same physcial page.
*/
-static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
- struct ib_device *ibdev)
+static int iser_data_buf_aligned_len(struct iser_data_buf *data,
+ struct ib_device *ibdev)
{
- struct scatterlist *sgl, *sg;
- u64 end_addr, next_addr;
- int i, cnt;
- unsigned int ret_len = 0;
+ struct scatterlist *sgl, *sg, *next_sg = NULL;
+ u64 start_addr, end_addr;
+ int i, ret_len, start_check = 0;
+
+ if (data->dma_nents == 1)
+ return 1;
sgl = (struct scatterlist *)data->buf;
+ start_addr = ib_sg_dma_address(ibdev, sgl);
- cnt = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
- /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
- "offset: %ld sz: %ld\n", i,
- (unsigned long)sg_phys(sg),
- (unsigned long)sg->offset,
- (unsigned long)sg->length); */
- end_addr = ib_sg_dma_address(ibdev, sg) +
- ib_sg_dma_len(ibdev, sg);
- /* iser_dbg("Checking sg iobuf end address "
- "0x%08lX\n", end_addr); */
- if (i + 1 < data->dma_nents) {
- next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
- /* are i, i+1 fragments of the same page? */
- if (end_addr == next_addr) {
- cnt++;
- continue;
- } else if (!IS_4K_ALIGNED(end_addr)) {
- ret_len = cnt + 1;
- break;
- }
- }
- cnt++;
+ if (start_check && !IS_4K_ALIGNED(start_addr))
+ break;
+
+ next_sg = sg_next(sg);
+ if (!next_sg)
+ break;
+
+ end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
+ start_addr = ib_sg_dma_address(ibdev, next_sg);
+
+ if (end_addr == start_addr) {
+ start_check = 0;
+ continue;
+ } else
+ start_check = 1;
+
+ if (!IS_4K_ALIGNED(end_addr))
+ break;
}
- if (i == data->dma_nents)
- ret_len = cnt; /* loop ended */
+ ret_len = (next_sg) ? i : i+1;
iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
ret_len, data->dma_nents, data);
return ret_len;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602..04f42ae 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[12] = "DPDP",
+ [15] = "Big LSO headers",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
@@ -235,7 +236,7 @@
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
- dev_cap->reserved_eqs = 1 << (field & 0xf);
+ dev_cap->reserved_eqs = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c..e92d1bf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
+ MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
MLX4_DEV_CAP_FLAG_APM = 1 << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 483057b..fa0d52b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
#include <rdma/ib_verbs.h>
@@ -60,8 +61,8 @@
unsigned char src_dev_addr[MAX_ADDR_LEN];
unsigned char dst_dev_addr[MAX_ADDR_LEN];
unsigned char broadcast[MAX_ADDR_LEN];
- enum rdma_node_type dev_type;
- struct net_device *src_dev;
+ unsigned short dev_type;
+ int bound_dev_if;
};
/**
@@ -121,40 +122,29 @@
memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
}
-static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
{
- memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
+ return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
-static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
- memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+ memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
-static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
- memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
+ memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
-static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
- memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+ memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
-static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
- memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
-}
-
-static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
-{
- memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+ memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
#endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 3841c1a..1082afa 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -379,4 +379,10 @@
struct ib_sa_path_rec *rec,
struct ib_ah_attr *ah_attr);
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+
#endif /* IB_SA_H */
diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h
index 6591201..cfc7c9b 100644
--- a/include/rdma/ib_user_sa.h
+++ b/include/rdma/ib_user_sa.h
@@ -35,6 +35,22 @@
#include <linux/types.h>
+enum {
+ IB_PATH_GMP = 1,
+ IB_PATH_PRIMARY = (1<<1),
+ IB_PATH_ALTERNATE = (1<<2),
+ IB_PATH_OUTBOUND = (1<<3),
+ IB_PATH_INBOUND = (1<<4),
+ IB_PATH_INBOUND_REVERSE = (1<<5),
+ IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
+};
+
+struct ib_path_rec_data {
+ __u32 flags;
+ __u32 reserved;
+ __u32 path_rec[16];
+};
+
struct ib_user_path_rec {
__u8 dgid[16];
__u8 sgid[16];
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c179318..09509ed 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,11 @@
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index c557054..1d16502 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -215,12 +215,14 @@
/* Option levels */
enum {
- RDMA_OPTION_ID = 0
+ RDMA_OPTION_ID = 0,
+ RDMA_OPTION_IB = 1
};
/* Option details */
enum {
- RDMA_OPTION_ID_TOS = 0
+ RDMA_OPTION_ID_TOS = 0,
+ RDMA_OPTION_IB_PATH = 1
};
struct rdma_ucm_set_option {
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 536ebe5..3b89923 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -182,8 +182,8 @@
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
- ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
- ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+ rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+ rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index db224f7..b28fa85 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -184,8 +184,8 @@
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
- ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
- ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+ rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+ rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;