Merge branches 'cxgb4', 'flowsteer', 'ipoib', 'iser', 'mlx4', 'ocrdma' and 'qib' into for-next
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 0fcd7aa..d040b87 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -135,6 +135,7 @@
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
+ struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
@@ -155,6 +156,7 @@
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
+extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
@@ -215,5 +217,7 @@
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_flow);
+IB_UVERBS_DECLARE_CMD(destroy_flow);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b3c07b0..f2b81b9 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,6 +54,7 @@
static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
@@ -330,6 +331,7 @@
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
+ INIT_LIST_HEAD(&ucontext->rule_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -1526,7 +1528,7 @@
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -1642,8 +1644,13 @@
goto err_copy;
}
- if (xrcd)
+ if (xrcd) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
put_xrcd_read(xrcd_uobj);
+ }
+
if (pd)
put_pd_read(pd);
if (scq)
@@ -1753,6 +1760,8 @@
goto err_remove;
}
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
put_xrcd_read(xrcd_uobj);
mutex_lock(&file->mutex);
@@ -2019,6 +2028,9 @@
if (ret)
return ret;
+ if (obj->uxrcd)
+ atomic_dec(&obj->uxrcd->refcnt);
+
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
mutex_lock(&file->mutex);
@@ -2587,6 +2599,232 @@
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ if (ib_spec->eth.size != kern_spec->eth.size)
+ return -EINVAL;
+ memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+ sizeof(struct ib_flow_eth_filter));
+ memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+ sizeof(struct ib_flow_eth_filter));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+ sizeof(struct ib_flow_ipv4_filter));
+ memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+ sizeof(struct ib_flow_ipv4_filter));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ return -EINVAL;
+ memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_kern_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
+ int i;
+ int kern_attr_size;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+ !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ return -EPERM;
+
+ if (cmd.flow_attr.num_of_specs < 0 ||
+ cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
+
+ kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
+ sizeof(struct ib_uverbs_cmd_hdr_ex);
+
+ if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
+ kern_attr_size < 0 || kern_attr_size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+ return -EINVAL;
+
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+ if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
+ kern_attr_size)) {
+ err = -EFAULT;
+ goto err_free_attr;
+ }
+ } else {
+ kern_flow_attr = &cmd.flow_attr;
+ kern_attr_size = sizeof(cmd.flow_attr);
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto err_free_attr;
+ }
+ init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ down_write(&uobj->mutex);
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ if (!flow_attr) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ flow_attr->type = kern_flow_attr->type;
+ flow_attr->priority = kern_flow_attr->priority;
+ flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+ flow_attr->port = kern_flow_attr->port;
+ flow_attr->flags = kern_flow_attr->flags;
+ flow_attr->size = sizeof(*flow_attr);
+
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *) ib_spec)->size;
+ kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
+ kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *) ib_spec)->size;
+ }
+ if (kern_attr_size) {
+ pr_warn("create flow failed, %d bytes left from uverb cmd\n",
+ kern_attr_size);
+ goto err_free;
+ }
+ flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+ if (IS_ERR(flow_id)) {
+ err = PTR_ERR(flow_id);
+ goto err_free;
+ }
+ flow_id->qp = qp;
+ flow_id->uobject = uobj;
+ uobj->object = flow_id;
+
+ err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ if (err)
+ goto destroy_flow;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.flow_handle = uobj->id;
+
+ if (copy_to_user((void __user *)(unsigned long) cmd.response,
+ &resp, sizeof(resp))) {
+ err = -EFAULT;
+ goto err_copy;
+ }
+
+ put_qp_read(qp);
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rule_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+ kfree(flow_attr);
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return in_len;
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+destroy_flow:
+ ib_destroy_flow(flow_id);
+err_free:
+ kfree(flow_attr);
+err_put:
+ put_qp_read(qp);
+err_uobj:
+ put_uobj_write(uobj);
+err_free_attr:
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return err;
+}
+
+ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) {
+ struct ib_uverbs_destroy_flow cmd;
+ struct ib_flow *flow_id;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ flow_id = uobj->object;
+
+ ret = ib_destroy_flow(flow_id);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return ret ? ret : in_len;
+}
+
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
@@ -2860,6 +3098,8 @@
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
+ struct ib_usrq_object *us;
+ enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2869,6 +3109,7 @@
return -EINVAL;
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
+ srq_type = srq->srq_type;
ret = ib_destroy_srq(srq);
if (!ret)
@@ -2879,6 +3120,11 @@
if (ret)
return ret;
+ if (srq_type == IB_SRQT_XRC) {
+ us = container_of(obj, struct ib_usrq_object, uevent);
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
mutex_lock(&file->mutex);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2c6f0f2..75ad86c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -73,6 +73,7 @@
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
DEFINE_IDR(ib_uverbs_xrcd_idr);
+DEFINE_IDR(ib_uverbs_rule_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -113,7 +114,9 @@
[IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp
+ [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
+ [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow,
+ [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -212,6 +215,14 @@
kfree(uobj);
}
+ list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
+ struct ib_flow *flow_id = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+ ib_destroy_flow(flow_id);
+ kfree(uobj);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
@@ -583,9 +594,6 @@
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- if (hdr.in_words * 4 != count)
- return -EINVAL;
-
if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command])
return -EINVAL;
@@ -597,8 +605,30 @@
if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -ENOSYS;
- return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
- hdr.in_words * 4, hdr.out_words * 4);
+ if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
+ struct ib_uverbs_cmd_hdr_ex hdr_ex;
+
+ if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
+ return -EFAULT;
+
+ if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file,
+ buf + sizeof(hdr_ex),
+ (hdr_ex.in_words +
+ hdr_ex.provider_in_words) * 4,
+ (hdr_ex.out_words +
+ hdr_ex.provider_out_words) * 4);
+ } else {
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ }
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 22192de..a321df2 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -346,10 +346,13 @@
static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
{
struct ib_qp *qp = context;
+ unsigned long flags;
+ spin_lock_irqsave(&qp->device->event_handler_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
+ spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
}
static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
@@ -1254,3 +1257,30 @@
return xrcd->device->dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct ib_flow *flow_id;
+ if (!qp->device->create_flow)
+ return ERR_PTR(-ENOSYS);
+
+ flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ if (!IS_ERR(flow_id))
+ atomic_inc(&qp->usecnt);
+ return flow_id;
+}
+EXPORT_SYMBOL(ib_create_flow);
+
+int ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err;
+ struct ib_qp *qp = flow_id->qp;
+
+ err = qp->device->destroy_flow(flow_id);
+ if (!err)
+ atomic_dec(&qp->usecnt);
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_flow);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index a188d31..d6c5a73 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -54,6 +54,8 @@
#define DRV_VERSION "1.0"
#define DRV_RELDATE "April 4, 2008"
+#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
@@ -88,6 +90,25 @@
static union ib_gid zgid;
+static int check_flow_steering_support(struct mlx4_dev *dev)
+{
+ int ib_num_ports = 0;
+ int i;
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ if (ib_num_ports || mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable "
+ "for IB ports or in multifunction env.\n");
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -144,6 +165,8 @@
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
+ if (check_flow_steering_support(dev->dev))
+ props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
@@ -798,6 +821,209 @@
union ib_gid gid;
};
+static int parse_flow_attr(struct mlx4_dev *dev,
+ union ib_flow_spec *ib_spec,
+ struct _rule_hw *mlx4_spec)
+{
+ enum mlx4_net_trans_rule_id type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ type = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
+ ETH_ALEN);
+ memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
+ ETH_ALEN);
+ mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
+ mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
+ break;
+
+ case IB_FLOW_SPEC_IPV4:
+ type = MLX4_NET_TRANS_RULE_ID_IPV4;
+ mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
+ mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
+ mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
+ mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
+ break;
+
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ type = ib_spec->type == IB_FLOW_SPEC_TCP ?
+ MLX4_NET_TRANS_RULE_ID_TCP :
+ MLX4_NET_TRANS_RULE_ID_UDP;
+ mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
+ mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
+ mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
+ mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
+ mlx4_hw_rule_sz(dev, type) < 0)
+ return -EINVAL;
+ mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
+ mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
+ return mlx4_hw_rule_sz(dev, type);
+}
+
+static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+ int domain,
+ enum mlx4_net_trans_promisc_mode flow_type,
+ u64 *reg_id)
+{
+ int ret, i;
+ int size = 0;
+ void *ib_flow;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->device);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
+ (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+
+ static const u16 __mlx4_domain[] = {
+ [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
+ [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
+ [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
+ [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
+ };
+
+ if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
+ pr_err("Invalid priority value %d\n", flow_attr->priority);
+ return -EINVAL;
+ }
+
+ if (domain >= IB_FLOW_DOMAIN_NUM) {
+ pr_err("Invalid domain value %d\n", domain);
+ return -EINVAL;
+ }
+
+ if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, rule_size);
+ ctrl = mailbox->buf;
+
+ ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
+ flow_attr->priority);
+ ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
+ ctrl->port = flow_attr->port;
+ ctrl->qpn = cpu_to_be32(qp->qp_num);
+
+ ib_flow = flow_attr + 1;
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ for (i = 0; i < flow_attr->num_of_specs; i++) {
+ ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ ib_flow += ((union ib_flow_spec *) ib_flow)->size;
+ size += ret;
+ }
+
+ ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret == -ENOMEM)
+ pr_err("mcg table is full. Fail to register network rule.\n");
+ else if (ret == -ENXIO)
+ pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
+ else if (ret)
+ pr_err("Invalid argumant. Fail to register network rule.\n");
+
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return ret;
+}
+
+static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+ err = mlx4_cmd(dev, reg_id, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ pr_err("Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+
+static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ int err = 0, i = 0;
+ struct mlx4_ib_flow *mflow;
+ enum mlx4_net_trans_promisc_mode type[2];
+
+ memset(type, 0, sizeof(type));
+
+ mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
+ if (!mflow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ type[0] = MLX4_FS_REGULAR;
+ break;
+
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ type[0] = MLX4_FS_ALL_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ type[0] = MLX4_FS_MC_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_SNIFFER:
+ type[0] = MLX4_FS_UC_SNIFFER;
+ type[1] = MLX4_FS_MC_SNIFFER;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ while (i < ARRAY_SIZE(type) && type[i]) {
+ err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
+ &mflow->reg_id[i]);
+ if (err)
+ goto err_free;
+ i++;
+ }
+
+ return &mflow->ibflow;
+
+err_free:
+ kfree(mflow);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err, ret = 0;
+ int i = 0;
+ struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
+ struct mlx4_ib_flow *mflow = to_mflow(flow_id);
+
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ if (err)
+ ret = err;
+ i++;
+ }
+
+ kfree(mflow);
+ return ret;
+}
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
@@ -1461,6 +1687,15 @@
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (check_flow_steering_support(dev)) {
+ ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
+ ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
+
+ ibdev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
+ }
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f61ec26..036b663 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -132,6 +132,12 @@
struct mlx4_fmr mfmr;
};
+struct mlx4_ib_flow {
+ struct ib_flow ibflow;
+ /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
+ u64 reg_id[2];
+};
+
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
@@ -552,6 +558,12 @@
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
+
+static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
+{
+ return container_of(ibflow, struct mlx4_ib_flow, ibflow);
+}
+
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index d540180..adc11d1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -56,10 +56,12 @@
u16 max_qp;
u16 max_wqe;
u16 max_rqe;
+ u16 max_srq;
u32 max_inline_data;
int max_send_sge;
int max_recv_sge;
int max_srq_sge;
+ int max_rdma_sge;
int max_mr;
u64 max_mr_size;
u32 max_num_mr_pbl;
@@ -130,8 +132,7 @@
struct ocrdma_cq **cq_tbl;
struct ocrdma_qp **qp_tbl;
- struct ocrdma_eq meq;
- struct ocrdma_eq *qp_eq_tbl;
+ struct ocrdma_eq *eq_tbl;
int eq_cnt;
u16 base_eqid;
u16 max_eq;
@@ -168,11 +169,12 @@
struct list_head entry;
struct rcu_head rcu;
int id;
+ u64 stag_arr[OCRDMA_MAX_STAG];
+ u16 pvid;
};
struct ocrdma_cq {
struct ib_cq ibcq;
- struct ocrdma_dev *dev;
struct ocrdma_cqe *va;
u32 phase;
u32 getp; /* pointer to pending wrs to
@@ -214,7 +216,6 @@
struct ocrdma_ah {
struct ib_ah ibah;
- struct ocrdma_dev *dev;
struct ocrdma_av *av;
u16 sgid_index;
u32 id;
@@ -234,7 +235,6 @@
struct ocrdma_srq {
struct ib_srq ibsrq;
- struct ocrdma_dev *dev;
u8 __iomem *db;
struct ocrdma_qp_hwq_info rq;
u64 *rqe_wr_id_tbl;
@@ -290,10 +290,11 @@
u32 qkey;
bool dpp_enabled;
u8 *ird_q_va;
+ bool signaled;
+ u16 db_cache;
};
struct ocrdma_hw_mr {
- struct ocrdma_dev *dev;
u32 lkey;
u8 fr_mr;
u8 remote_atomic;
@@ -317,15 +318,16 @@
struct ib_mr ibmr;
struct ib_umem *umem;
struct ocrdma_hw_mr hwmr;
- struct ocrdma_pd *pd;
};
struct ocrdma_ucontext {
struct ib_ucontext ibucontext;
- struct ocrdma_dev *dev;
struct list_head mm_head;
struct mutex mm_list_lock; /* protects list entries of mm type */
+ struct ocrdma_pd *cntxt_pd;
+ int pd_in_use;
+
struct {
u32 *va;
dma_addr_t pa;
@@ -386,14 +388,14 @@
static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
{
return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
- qp->id < 64) ? 24 : 16);
+ qp->id < 128) ? 24 : 16);
}
static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
{
int cqe_valid;
cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
- return ((cqe_valid == cq->phase) ? 1 : 0);
+ return (cqe_valid == cq->phase);
}
static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
index 517ab20..fbac8eb 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -28,6 +28,9 @@
#ifndef __OCRDMA_ABI_H__
#define __OCRDMA_ABI_H__
+#define OCRDMA_ABI_VERSION 1
+/* user kernel communication data structures. */
+
struct ocrdma_alloc_ucontext_resp {
u32 dev_id;
u32 wqe_size;
@@ -35,16 +38,16 @@
u32 dpp_wqe_size;
u64 ah_tbl_page;
u32 ah_tbl_len;
- u32 rsvd;
- u8 fw_ver[32];
u32 rqe_size;
+ u8 fw_ver[32];
+ /* for future use/new features in progress */
u64 rsvd1;
-} __packed;
+ u64 rsvd2;
+};
-/* user kernel communication data structures. */
struct ocrdma_alloc_pd_ureq {
u64 rsvd1;
-} __packed;
+};
struct ocrdma_alloc_pd_uresp {
u32 id;
@@ -52,12 +55,12 @@
u32 dpp_page_addr_hi;
u32 dpp_page_addr_lo;
u64 rsvd1;
-} __packed;
+};
struct ocrdma_create_cq_ureq {
u32 dpp_cq;
- u32 rsvd;
-} __packed;
+ u32 rsvd; /* pad */
+};
#define MAX_CQ_PAGES 8
struct ocrdma_create_cq_uresp {
@@ -69,9 +72,10 @@
u64 db_page_addr;
u32 db_page_size;
u32 phase_change;
+ /* for future use/new features in progress */
u64 rsvd1;
u64 rsvd2;
-} __packed;
+};
#define MAX_QP_PAGES 8
#define MAX_UD_AV_PAGES 8
@@ -80,14 +84,14 @@
u8 enable_dpp_cq;
u8 rsvd;
u16 dpp_cq_id;
- u32 rsvd1;
+ u32 rsvd1; /* pad */
};
struct ocrdma_create_qp_uresp {
u16 qp_id;
u16 sq_dbid;
u16 rq_dbid;
- u16 resv0;
+ u16 resv0; /* pad */
u32 sq_page_size;
u32 rq_page_size;
u32 num_sq_pages;
@@ -98,19 +102,19 @@
u32 db_page_size;
u32 dpp_credit;
u32 dpp_offset;
- u32 rsvd1;
u32 num_wqe_allocated;
u32 num_rqe_allocated;
u32 db_sq_offset;
u32 db_rq_offset;
u32 db_shift;
+ u64 rsvd1;
u64 rsvd2;
u64 rsvd3;
} __packed;
struct ocrdma_create_srq_uresp {
u16 rq_dbid;
- u16 resv0;
+ u16 resv0; /* pad */
u32 resv1;
u32 rq_page_size;
@@ -126,6 +130,6 @@
u64 rsvd2;
u64 rsvd3;
-} __packed;
+};
#endif /* __OCRDMA_ABI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index f4c587c..ee499d9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,12 +35,11 @@
#include "ocrdma_ah.h"
#include "ocrdma_hw.h"
-static inline int set_av_attr(struct ocrdma_ah *ah,
+static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, int pdid)
{
int status = 0;
u16 vlan_tag; bool vlan_enabled = false;
- struct ocrdma_dev *dev = ah->dev;
struct ocrdma_eth_vlan eth;
struct ocrdma_grh grh;
int eth_sz;
@@ -51,6 +50,8 @@
ah->sgid_index = attr->grh.sgid_index;
vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+ if (!vlan_tag || (vlan_tag > 0xFFF))
+ vlan_tag = dev->pvid;
if (vlan_tag && (vlan_tag < 0x1000)) {
eth.eth_type = cpu_to_be16(0x8100);
eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
@@ -92,7 +93,7 @@
int status;
struct ocrdma_ah *ah;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = pd->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
@@ -100,12 +101,11 @@
ah = kzalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- ah->dev = pd->dev;
status = ocrdma_alloc_av(dev, ah);
if (status)
goto av_err;
- status = set_av_attr(ah, attr, pd->id);
+ status = set_av_attr(dev, ah, attr, pd->id);
if (status)
goto av_conf_err;
@@ -126,7 +126,9 @@
int ocrdma_destroy_ah(struct ib_ah *ibah)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
- ocrdma_free_av(ah->dev, ah);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
+
+ ocrdma_free_av(dev, ah);
kfree(ah);
return 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 0965278..4ed8235 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -94,7 +94,7 @@
static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq)
{
- return (u8 *)eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
+ return eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
}
static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
@@ -105,8 +105,7 @@
static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev)
{
struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *)
- ((u8 *) dev->mq.cq.va +
- (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
+ (dev->mq.cq.va + (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK))
return NULL;
@@ -120,9 +119,7 @@
static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
{
- return (struct ocrdma_mqe *)((u8 *) dev->mq.sq.va +
- (dev->mq.sq.head *
- sizeof(struct ocrdma_mqe)));
+ return dev->mq.sq.va + (dev->mq.sq.head * sizeof(struct ocrdma_mqe));
}
static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
@@ -132,8 +129,7 @@
static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
{
- return (void *)((u8 *) dev->mq.sq.va +
- (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)));
+ return dev->mq.sq.va + (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe));
}
enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
@@ -181,7 +177,7 @@
static int ocrdma_get_mbx_errno(u32 status)
{
- int err_num = -EFAULT;
+ int err_num;
u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >>
OCRDMA_MBX_RSP_STATUS_SHIFT;
u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >>
@@ -260,10 +256,11 @@
break;
case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES:
case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING:
- err_num = -EAGAIN;
+ err_num = -EINVAL;
break;
case OCRDMA_MBX_CQE_STATUS_DMA_FAILED:
- err_num = -EIO;
+ default:
+ err_num = -EINVAL;
break;
}
return err_num;
@@ -367,22 +364,6 @@
}
}
-static void ocrdma_assign_eq_vect_gen2(struct ocrdma_dev *dev,
- struct ocrdma_eq *eq)
-{
- /* assign vector and update vector id for next EQ */
- eq->vector = dev->nic_info.msix.start_vector;
- dev->nic_info.msix.start_vector += 1;
-}
-
-static void ocrdma_free_eq_vect_gen2(struct ocrdma_dev *dev)
-{
- /* this assumes that EQs are freed in exactly reverse order
- * as its allocation.
- */
- dev->nic_info.msix.start_vector -= 1;
-}
-
static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
int queue_type)
{
@@ -423,11 +404,8 @@
memset(cmd, 0, sizeof(*cmd));
ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON,
sizeof(*cmd));
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
- cmd->req.rsvd_version = 0;
- else
- cmd->req.rsvd_version = 2;
+ cmd->req.rsvd_version = 2;
cmd->num_pages = 4;
cmd->valid = OCRDMA_CREATE_EQ_VALID;
cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT;
@@ -438,12 +416,7 @@
NULL);
if (!status) {
eq->q.id = rsp->vector_eqid & 0xffff;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
- ocrdma_assign_eq_vect_gen2(dev, eq);
- else {
- eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
- dev->nic_info.msix.start_vector += 1;
- }
+ eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
eq->q.created = true;
}
return status;
@@ -486,8 +459,6 @@
{
if (eq->q.created) {
ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ);
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
- ocrdma_free_eq_vect_gen2(dev);
ocrdma_free_q(dev, &eq->q);
}
}
@@ -506,13 +477,12 @@
_ocrdma_destroy_eq(dev, eq);
}
-static void ocrdma_destroy_qp_eqs(struct ocrdma_dev *dev)
+static void ocrdma_destroy_eqs(struct ocrdma_dev *dev)
{
int i;
- /* deallocate the data path eqs */
for (i = 0; i < dev->eq_cnt; i++)
- ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
+ ocrdma_destroy_eq(dev, &dev->eq_tbl[i]);
}
static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
@@ -527,16 +497,21 @@
ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
- cmd->pgsz_pgcnt = PAGES_4K_SPANNED(cq->va, cq->size);
- cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
- cmd->eqn = (eq->id << OCRDMA_CREATE_CQ_EQID_SHIFT);
+ cmd->req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+ cmd->pgsz_pgcnt = (cq->size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+ OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
+ cmd->pgsz_pgcnt |= PAGES_4K_SPANNED(cq->va, cq->size);
- ocrdma_build_q_pages(&cmd->pa[0], cmd->pgsz_pgcnt,
+ cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
+ cmd->eqn = eq->id;
+ cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+
+ ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
cq->dma, PAGE_SIZE_4K);
status = be_roce_mcc_cmd(dev->nic_info.netdev,
cmd, sizeof(*cmd), NULL, NULL);
if (!status) {
- cq->id = (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+ cq->id = (u16) (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
cq->created = true;
}
return status;
@@ -569,7 +544,10 @@
cmd->cqid_pages = num_pages;
cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
- cmd->async_event_bitmap = Bit(20);
+
+ cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
+ cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
@@ -596,7 +574,7 @@
if (status)
goto alloc_err;
- status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->meq.q);
+ status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
if (status)
goto mbx_cq_free;
@@ -653,7 +631,7 @@
if (qp == NULL)
BUG();
- ocrdma_qp_state_machine(qp, new_ib_qps, &old_ib_qps);
+ ocrdma_qp_state_change(qp, new_ib_qps, &old_ib_qps);
}
static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
@@ -746,11 +724,35 @@
qp->srq->ibsrq.event_handler(&ib_evt,
qp->srq->ibsrq.
srq_context);
- } else if (dev_event)
+ } else if (dev_event) {
ib_dispatch_event(&ib_evt);
+ }
}
+static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+ struct ocrdma_ae_pvid_mcqe *evt;
+ int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
+ OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+
+ switch (type) {
+ case OCRDMA_ASYNC_EVENT_PVID_STATE:
+ evt = (struct ocrdma_ae_pvid_mcqe *)cqe;
+ if ((evt->tag_enabled & OCRDMA_AE_PVID_MCQE_ENABLED_MASK) >>
+ OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT)
+ dev->pvid = ((evt->tag_enabled &
+ OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
+ OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
+ break;
+ default:
+ /* Not interested evts. */
+ break;
+ }
+}
+
+
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
@@ -758,8 +760,10 @@
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
- if (evt_code == OCRDMA_ASYNC_EVE_CODE)
+ if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
ocrdma_dispatch_ibevent(dev, cqe);
+ else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+ ocrdma_process_grp5_aync(dev, cqe);
else
pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
dev->id, evt_code);
@@ -957,9 +961,8 @@
rsp = ocrdma_get_mqe_rsp(dev);
ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
if (cqe_status || ext_status) {
- pr_err
- ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
- __func__,
+ pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
+ __func__,
(rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
status = ocrdma_get_mbx_cqe_errno(cqe_status);
@@ -991,9 +994,15 @@
attr->max_srq_sge = (rsp->max_srq_rqe_sge &
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
+ attr->max_rdma_sge = (rsp->max_write_send_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
+ attr->max_srq =
+ (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1013,6 +1022,9 @@
attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
attr->max_cqe = rsp->max_cq_cqes_per_cq &
OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK;
+ attr->max_cq = (rsp->max_cq_cqes_per_cq &
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET;
attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) *
@@ -1045,7 +1057,6 @@
return -EINVAL;
dev->base_eqid = conf->base_eqid;
dev->max_eq = conf->max_eq;
- dev->attr.max_cq = OCRDMA_MAX_CQ - 1;
return 0;
}
@@ -1118,6 +1129,34 @@
return status;
}
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+{
+ int status = -ENOMEM;
+ struct ocrdma_get_link_speed_rsp *rsp;
+ struct ocrdma_mqe *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+ sizeof(*cmd));
+ if (!cmd)
+ return status;
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+ ((struct ocrdma_mbx_hdr *)cmd->u.cmd)->rsvd_version = 0x1;
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
+ *lnk_speed = rsp->phys_port_speed;
+
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
{
int status = -ENOMEM;
@@ -1296,19 +1335,19 @@
u16 eq_id;
mutex_lock(&dev->dev_lock);
- cq_cnt = dev->qp_eq_tbl[0].cq_cnt;
- eq_id = dev->qp_eq_tbl[0].q.id;
+ cq_cnt = dev->eq_tbl[0].cq_cnt;
+ eq_id = dev->eq_tbl[0].q.id;
/* find the EQ which is has the least number of
* CQs associated with it.
*/
for (i = 0; i < dev->eq_cnt; i++) {
- if (dev->qp_eq_tbl[i].cq_cnt < cq_cnt) {
- cq_cnt = dev->qp_eq_tbl[i].cq_cnt;
- eq_id = dev->qp_eq_tbl[i].q.id;
+ if (dev->eq_tbl[i].cq_cnt < cq_cnt) {
+ cq_cnt = dev->eq_tbl[i].cq_cnt;
+ eq_id = dev->eq_tbl[i].q.id;
selected_eq = i;
}
}
- dev->qp_eq_tbl[selected_eq].cq_cnt += 1;
+ dev->eq_tbl[selected_eq].cq_cnt += 1;
mutex_unlock(&dev->dev_lock);
return eq_id;
}
@@ -1319,16 +1358,16 @@
mutex_lock(&dev->dev_lock);
for (i = 0; i < dev->eq_cnt; i++) {
- if (dev->qp_eq_tbl[i].q.id != eq_id)
+ if (dev->eq_tbl[i].q.id != eq_id)
continue;
- dev->qp_eq_tbl[i].cq_cnt -= 1;
+ dev->eq_tbl[i].cq_cnt -= 1;
break;
}
mutex_unlock(&dev->dev_lock);
}
int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
- int entries, int dpp_cq)
+ int entries, int dpp_cq, u16 pd_id)
{
int status = -ENOMEM; int max_hw_cqe;
struct pci_dev *pdev = dev->nic_info.pdev;
@@ -1336,8 +1375,6 @@
struct ocrdma_create_cq_rsp *rsp;
u32 hw_pages, cqe_size, page_size, cqe_count;
- if (dpp_cq)
- return -EINVAL;
if (entries > dev->attr.max_cqe) {
pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
__func__, dev->id, dev->attr.max_cqe, entries);
@@ -1377,15 +1414,13 @@
cmd->cmd.pgsz_pgcnt |= hw_pages;
cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
- if (dev->eq_cnt < 0)
- goto eq_err;
cq->eqn = ocrdma_bind_eq(dev);
- cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+ cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
cqe_count = cq->len / cqe_size;
- if (cqe_count > 1024)
+ if (cqe_count > 1024) {
/* Set cnt to 3 to indicate more than 1024 cq entries */
cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
- else {
+ } else {
u8 count = 0;
switch (cqe_count) {
case 256:
@@ -1416,6 +1451,7 @@
cq->phase_change = true;
}
+ cmd->cmd.pd_id = pd_id; /* valid only for v3 */
ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status)
@@ -1427,7 +1463,6 @@
return 0;
mbx_err:
ocrdma_unbind_eq(dev, cq->eqn);
-eq_err:
dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa);
mem_err:
kfree(cmd);
@@ -1524,6 +1559,7 @@
return -ENOMEM;
cmd->num_pbl_pdid =
pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT);
+ cmd->fr_mr = hwmr->fr_mr;
cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr <<
OCRDMA_REG_NSMR_REMOTE_WR_SHIFT);
@@ -1678,8 +1714,16 @@
spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags);
}
-int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
- enum ib_qp_state *old_ib_state)
+static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp)
+{
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+}
+
+int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
+ enum ib_qp_state *old_ib_state)
{
unsigned long flags;
int status = 0;
@@ -1696,96 +1740,15 @@
return 1;
}
- switch (qp->state) {
- case OCRDMA_QPS_RST:
- switch (new_state) {
- case OCRDMA_QPS_RST:
- case OCRDMA_QPS_INIT:
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_INIT:
- /* qps: INIT->XXX */
- switch (new_state) {
- case OCRDMA_QPS_INIT:
- case OCRDMA_QPS_RTR:
- break;
- case OCRDMA_QPS_ERR:
- ocrdma_flush_qp(qp);
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_RTR:
- /* qps: RTS->XXX */
- switch (new_state) {
- case OCRDMA_QPS_RTS:
- break;
- case OCRDMA_QPS_ERR:
- ocrdma_flush_qp(qp);
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_RTS:
- /* qps: RTS->XXX */
- switch (new_state) {
- case OCRDMA_QPS_SQD:
- case OCRDMA_QPS_SQE:
- break;
- case OCRDMA_QPS_ERR:
- ocrdma_flush_qp(qp);
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_SQD:
- /* qps: SQD->XXX */
- switch (new_state) {
- case OCRDMA_QPS_RTS:
- case OCRDMA_QPS_SQE:
- case OCRDMA_QPS_ERR:
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_SQE:
- switch (new_state) {
- case OCRDMA_QPS_RTS:
- case OCRDMA_QPS_ERR:
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- case OCRDMA_QPS_ERR:
- /* qps: ERR->XXX */
- switch (new_state) {
- case OCRDMA_QPS_RST:
- break;
- default:
- status = -EINVAL;
- break;
- };
- break;
- default:
- status = -EINVAL;
- break;
- };
- if (!status)
- qp->state = new_state;
+
+ if (new_state == OCRDMA_QPS_INIT) {
+ ocrdma_init_hwq_ptr(qp);
+ ocrdma_del_flush_qp(qp);
+ } else if (new_state == OCRDMA_QPS_ERR) {
+ ocrdma_flush_qp(qp);
+ }
+
+ qp->state = new_state;
spin_unlock_irqrestore(&qp->q_lock, flags);
return status;
@@ -1819,10 +1782,9 @@
u32 max_wqe_allocated;
u32 max_sges = attrs->cap.max_send_sge;
- max_wqe_allocated = attrs->cap.max_send_wr;
- /* need to allocate one extra to for GEN1 family */
- if (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)
- max_wqe_allocated += 1;
+ /* QP1 may exceed 127 */
+ max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+ dev->attr.max_wqe);
status = ocrdma_build_q_conf(&max_wqe_allocated,
dev->attr.wqe_size, &hw_pages, &hw_page_size);
@@ -1934,6 +1896,8 @@
dma_addr_t pa = 0;
int ird_page_size = dev->attr.ird_page_size;
int ird_q_len = dev->attr.num_ird_pages * ird_page_size;
+ struct ocrdma_hdr_wqe *rqe;
+ int i = 0;
if (dev->attr.ird == 0)
return 0;
@@ -1945,6 +1909,15 @@
memset(qp->ird_q_va, 0, ird_q_len);
ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
pa, ird_page_size);
+ for (; i < ird_q_len / dev->attr.rqe_size; i++) {
+ rqe = (struct ocrdma_hdr_wqe *)(qp->ird_q_va +
+ (i * dev->attr.rqe_size));
+ rqe->cw = 0;
+ rqe->cw |= 2;
+ rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+ rqe->cw |= (8 << OCRDMA_WQE_SIZE_SHIFT);
+ rqe->cw |= (8 << OCRDMA_WQE_NXT_WQE_SIZE_SHIFT);
+ }
return 0;
}
@@ -2057,9 +2030,10 @@
qp->rq_cq = cq;
if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
- (attrs->cap.max_inline_data <= dev->attr.max_inline_data))
+ (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
+ }
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status)
@@ -2108,38 +2082,48 @@
struct in6_addr in6;
memcpy(&in6, dgid, sizeof in6);
- if (rdma_is_multicast_addr(&in6))
+ if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, mac_addr);
- else if (rdma_link_local_addr(&in6))
+ } else if (rdma_link_local_addr(&in6)) {
rdma_get_ll_mac(&in6, mac_addr);
- else {
+ } else {
pr_err("%s() fail to resolve mac_addr.\n", __func__);
return -EINVAL;
}
return 0;
}
-static void ocrdma_set_av_params(struct ocrdma_qp *qp,
+static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
struct ib_qp_attr *attrs)
{
+ int status;
struct ib_ah_attr *ah_attr = &attrs->ah_attr;
- union ib_gid sgid;
+ union ib_gid sgid, zgid;
u32 vlan_id;
u8 mac_addr[6];
+
if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
- return;
+ return -EINVAL;
cmd->params.tclass_sq_psn |=
(ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
(ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+ cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
cmd->params.hop_lmt_rq_psn |=
(ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
- ocrdma_query_gid(&qp->dev->ibdev, 1,
+ status = ocrdma_query_gid(&qp->dev->ibdev, 1,
ah_attr->grh.sgid_index, &sgid);
+ if (status)
+ return status;
+
+ memset(&zgid, 0, sizeof(zgid));
+ if (!memcmp(&sgid, &zgid, sizeof(zgid)))
+ return -EINVAL;
+
qp->sgid_idx = ah_attr->grh.sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
@@ -2155,6 +2139,7 @@
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
}
+ return 0;
}
static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
@@ -2163,8 +2148,6 @@
enum ib_qp_state old_qps)
{
int status = 0;
- struct net_device *netdev = qp->dev->nic_info.netdev;
- int eth_mtu = iboe_get_mtu(netdev->mtu);
if (attr_mask & IB_QP_PKEY_INDEX) {
cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index &
@@ -2176,9 +2159,11 @@
cmd->params.qkey = attrs->qkey;
cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
}
- if (attr_mask & IB_QP_AV)
- ocrdma_set_av_params(qp, cmd, attrs);
- else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
+ if (attr_mask & IB_QP_AV) {
+ status = ocrdma_set_av_params(qp, cmd, attrs);
+ if (status)
+ return status;
+ } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
/* set the default mac address for UD, GSI QPs */
cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] |
(qp->dev->nic_info.mac_addr[1] << 8) |
@@ -2199,8 +2184,8 @@
cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
}
if (attr_mask & IB_QP_PATH_MTU) {
- if (ib_mtu_enum_to_int(eth_mtu) <
- ib_mtu_enum_to_int(attrs->path_mtu)) {
+ if (attrs->path_mtu < IB_MTU_256 ||
+ attrs->path_mtu > IB_MTU_4096) {
status = -EINVAL;
goto pmtu_err;
}
@@ -2283,10 +2268,12 @@
OCRDMA_QP_PARAMS_STATE_SHIFT) &
OCRDMA_QP_PARAMS_STATE_MASK;
cmd->flags |= OCRDMA_QP_PARA_QPS_VALID;
- } else
+ } else {
cmd->params.max_sge_recv_flags |=
(qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) &
OCRDMA_QP_PARAMS_STATE_MASK;
+ }
+
status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
if (status)
goto mbx_err;
@@ -2324,7 +2311,7 @@
return status;
}
-int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
+int ocrdma_mbx_create_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
struct ib_srq_init_attr *srq_attr,
struct ocrdma_pd *pd)
{
@@ -2334,7 +2321,6 @@
struct ocrdma_create_srq_rsp *rsp;
struct ocrdma_create_srq *cmd;
dma_addr_t pa;
- struct ocrdma_dev *dev = srq->dev;
struct pci_dev *pdev = dev->nic_info.pdev;
u32 max_rqe_allocated;
@@ -2404,13 +2390,16 @@
{
int status = -ENOMEM;
struct ocrdma_modify_srq *cmd;
- cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+ struct ocrdma_pd *pd = srq->pd;
+ struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_SRQ, sizeof(*cmd));
if (!cmd)
return status;
cmd->id = srq->id;
cmd->limit_max_rqe |= srq_attr->srq_limit <<
OCRDMA_MODIFY_SRQ_LIMIT_SHIFT;
- status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
kfree(cmd);
return status;
}
@@ -2419,11 +2408,13 @@
{
int status = -ENOMEM;
struct ocrdma_query_srq *cmd;
- cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+ struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device);
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_SRQ, sizeof(*cmd));
if (!cmd)
return status;
cmd->id = srq->rq.dbid;
- status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (status == 0) {
struct ocrdma_query_srq_rsp *rsp =
(struct ocrdma_query_srq_rsp *)cmd;
@@ -2448,7 +2439,7 @@
if (!cmd)
return status;
cmd->id = srq->id;
- status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (srq->rq.va)
dma_free_coherent(&pdev->dev, srq->rq.len,
srq->rq.va, srq->rq.pa);
@@ -2490,38 +2481,7 @@
return 0;
}
-static int ocrdma_create_mq_eq(struct ocrdma_dev *dev)
-{
- int status;
- int irq;
- unsigned long flags = 0;
- int num_eq = 0;
-
- if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
- flags = IRQF_SHARED;
- else {
- num_eq = dev->nic_info.msix.num_vectors -
- dev->nic_info.msix.start_vector;
- /* minimum two vectors/eq are required for rdma to work.
- * one for control path and one for data path.
- */
- if (num_eq < 2)
- return -EBUSY;
- }
-
- status = ocrdma_create_eq(dev, &dev->meq, OCRDMA_EQ_LEN);
- if (status)
- return status;
- sprintf(dev->meq.irq_name, "ocrdma_mq%d", dev->id);
- irq = ocrdma_get_irq(dev, &dev->meq);
- status = request_irq(irq, ocrdma_irq_handler, flags, dev->meq.irq_name,
- &dev->meq);
- if (status)
- _ocrdma_destroy_eq(dev, &dev->meq);
- return status;
-}
-
-static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev)
+static int ocrdma_create_eqs(struct ocrdma_dev *dev)
{
int num_eq, i, status = 0;
int irq;
@@ -2532,49 +2492,47 @@
if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) {
num_eq = 1;
flags = IRQF_SHARED;
- } else
+ } else {
num_eq = min_t(u32, num_eq, num_online_cpus());
- dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
- if (!dev->qp_eq_tbl)
+ }
+
+ if (!num_eq)
+ return -EINVAL;
+
+ dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
+ if (!dev->eq_tbl)
return -ENOMEM;
for (i = 0; i < num_eq; i++) {
- status = ocrdma_create_eq(dev, &dev->qp_eq_tbl[i],
+ status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
OCRDMA_EQ_LEN);
if (status) {
status = -EINVAL;
break;
}
- sprintf(dev->qp_eq_tbl[i].irq_name, "ocrdma_qp%d-%d",
+ sprintf(dev->eq_tbl[i].irq_name, "ocrdma%d-%d",
dev->id, i);
- irq = ocrdma_get_irq(dev, &dev->qp_eq_tbl[i]);
+ irq = ocrdma_get_irq(dev, &dev->eq_tbl[i]);
status = request_irq(irq, ocrdma_irq_handler, flags,
- dev->qp_eq_tbl[i].irq_name,
- &dev->qp_eq_tbl[i]);
- if (status) {
- _ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
- status = -EINVAL;
- break;
- }
+ dev->eq_tbl[i].irq_name,
+ &dev->eq_tbl[i]);
+ if (status)
+ goto done;
dev->eq_cnt += 1;
}
/* one eq is sufficient for data path to work */
- if (dev->eq_cnt >= 1)
- return 0;
- if (status)
- ocrdma_destroy_qp_eqs(dev);
+ return 0;
+done:
+ ocrdma_destroy_eqs(dev);
return status;
}
int ocrdma_init_hw(struct ocrdma_dev *dev)
{
int status;
- /* set up control path eq */
- status = ocrdma_create_mq_eq(dev);
- if (status)
- return status;
- /* set up data path eq */
- status = ocrdma_create_qp_eqs(dev);
+
+ /* create the eqs */
+ status = ocrdma_create_eqs(dev);
if (status)
goto qpeq_err;
status = ocrdma_create_mq(dev);
@@ -2597,9 +2555,8 @@
conf_err:
ocrdma_destroy_mq(dev);
mq_err:
- ocrdma_destroy_qp_eqs(dev);
+ ocrdma_destroy_eqs(dev);
qpeq_err:
- ocrdma_destroy_eq(dev, &dev->meq);
pr_err("%s() status=%d\n", __func__, status);
return status;
}
@@ -2608,10 +2565,9 @@
{
ocrdma_mbx_delete_ah_tbl(dev);
- /* cleanup the data path eqs */
- ocrdma_destroy_qp_eqs(dev);
+ /* cleanup the eqs */
+ ocrdma_destroy_eqs(dev);
/* cleanup the control path */
ocrdma_destroy_mq(dev);
- ocrdma_destroy_eq(dev, &dev->meq);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index be5db77..f2a89d4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -78,6 +78,11 @@
#endif
}
+static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid)
+{
+ return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size);
+}
+
int ocrdma_init_hw(struct ocrdma_dev *);
void ocrdma_cleanup_hw(struct ocrdma_dev *);
@@ -86,6 +91,7 @@
bool solicited, u16 cqe_popped);
/* verbs specific mailbox commands */
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
@@ -100,7 +106,7 @@
int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
u32 pd_id, int acc);
int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
- int entries, int dpp_cq);
+ int entries, int dpp_cq, u16 pd_id);
int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
@@ -112,8 +118,7 @@
int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
struct ocrdma_qp_params *param);
int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
-
-int ocrdma_mbx_create_srq(struct ocrdma_srq *,
+int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *,
struct ib_srq_init_attr *,
struct ocrdma_pd *);
int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
@@ -123,7 +128,7 @@
int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
-int ocrdma_qp_state_machine(struct ocrdma_qp *, enum ib_qp_state new_state,
+int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
enum ib_qp_state *old_ib_state);
bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index ded416f1..56e0049 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -39,6 +39,7 @@
#include "ocrdma_ah.h"
#include "be_roce.h"
#include "ocrdma_hw.h"
+#include "ocrdma_abi.h"
MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
@@ -265,6 +266,7 @@
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
sizeof(OCRDMA_NODE_DESC));
dev->ibdev.owner = THIS_MODULE;
+ dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
dev->ibdev.uverbs_cmd_mask =
OCRDMA_UVERBS(GET_CONTEXT) |
OCRDMA_UVERBS(QUERY_DEVICE) |
@@ -326,9 +328,14 @@
dev->ibdev.req_notify_cq = ocrdma_arm_cq;
dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
+ dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
+ dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr;
+ dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
+ dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+
/* mandatory to support user space verbs consumer. */
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 36b062d..9f9570e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -70,6 +70,7 @@
#define OCRDMA_SUBSYS_COMMON 1
enum {
+ OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1 = 5,
OCRDMA_CMD_CREATE_CQ = 12,
OCRDMA_CMD_CREATE_EQ = 13,
OCRDMA_CMD_CREATE_MQ = 21,
@@ -91,15 +92,15 @@
#define OCRDMA_MAX_QP 2048
#define OCRDMA_MAX_CQ 2048
+#define OCRDMA_MAX_STAG 8192
enum {
OCRDMA_DB_RQ_OFFSET = 0xE0,
- OCRDMA_DB_GEN2_RQ1_OFFSET = 0x100,
- OCRDMA_DB_GEN2_RQ2_OFFSET = 0xC0,
+ OCRDMA_DB_GEN2_RQ_OFFSET = 0x100,
OCRDMA_DB_SQ_OFFSET = 0x60,
OCRDMA_DB_GEN2_SQ_OFFSET = 0x1C0,
OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET,
- OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ1_OFFSET,
+ OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET,
OCRDMA_DB_CQ_OFFSET = 0x120,
OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET,
OCRDMA_DB_MQ_OFFSET = 0x140
@@ -143,8 +144,11 @@
# 2: 16K Bytes
# 3: 32K Bytes
# 4: 64K Bytes
+# 5: 128K Bytes
+# 6: 256K Bytes
+# 7: 512K Bytes
*/
-#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (5)
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
#define MAX_OCRDMA_QP_PAGES (8)
@@ -177,7 +181,7 @@
u32 timeout; /* in seconds */
u32 cmd_len;
u32 rsvd_version;
-} __packed;
+};
enum {
OCRDMA_MBX_RSP_OPCODE_SHIFT = 0,
@@ -197,7 +201,7 @@
u32 status;
u32 rsp_len;
u32 add_rsp_len;
-} __packed;
+};
enum {
OCRDMA_MQE_EMBEDDED = 1,
@@ -208,7 +212,7 @@
u32 pa_lo;
u32 pa_hi;
u32 len;
-} __packed;
+};
enum {
OCRDMA_MQE_HDR_EMB_SHIFT = 0,
@@ -225,12 +229,12 @@
u32 tag_lo;
u32 tag_hi;
u32 rsvd3;
-} __packed;
+};
struct ocrdma_mqe_emb_cmd {
struct ocrdma_mbx_hdr mch;
u8 pyld[220];
-} __packed;
+};
struct ocrdma_mqe {
struct ocrdma_mqe_hdr hdr;
@@ -242,7 +246,7 @@
u8 cmd[236];
struct ocrdma_mbx_rsp rsp;
} u;
-} __packed;
+};
#define OCRDMA_EQ_LEN 4096
#define OCRDMA_MQ_CQ_LEN 256
@@ -259,12 +263,12 @@
struct ocrdma_delete_q_req {
struct ocrdma_mbx_hdr req;
u32 id;
-} __packed;
+};
struct ocrdma_pa {
u32 lo;
u32 hi;
-} __packed;
+};
#define MAX_OCRDMA_EQ_PAGES (8)
struct ocrdma_create_eq_req {
@@ -275,7 +279,7 @@
u32 delay;
u32 rsvd;
struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES];
-} __packed;
+};
enum {
OCRDMA_CREATE_EQ_VALID = Bit(29),
@@ -310,7 +314,7 @@
u32 tag_lo;
u32 tag_hi;
u32 valid_ae_cmpl_cons;
-} __packed;
+};
enum {
OCRDMA_AE_MCQE_QPVALID = Bit(31),
@@ -332,7 +336,21 @@
u32 cqvalid_cqid;
u32 evt_tag;
u32 valid_ae_event;
-} __packed;
+};
+
+enum {
+ OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT = 0,
+ OCRDMA_AE_PVID_MCQE_ENABLED_MASK = 0xFF,
+ OCRDMA_AE_PVID_MCQE_TAG_SHIFT = 16,
+ OCRDMA_AE_PVID_MCQE_TAG_MASK = 0xFFFF << OCRDMA_AE_PVID_MCQE_TAG_SHIFT
+};
+
+struct ocrdma_ae_pvid_mcqe {
+ u32 tag_enabled;
+ u32 event_tag;
+ u32 rsvd1;
+ u32 rsvd2;
+};
enum {
OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16,
@@ -356,7 +374,7 @@
u32 w1;
u32 w2;
u32 valid_ae_event;
-} __packed;
+};
enum {
OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT = 0,
@@ -382,9 +400,11 @@
u32 w1;
u32 w2;
u32 valid_ae_event;
-} __packed;
+};
-#define OCRDMA_ASYNC_EVE_CODE 0x14
+#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
+#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_CQ_ERROR = 0x00,
@@ -487,7 +507,8 @@
u32 max_ird_ord_per_qp;
u32 max_shared_ird_ord;
u32 max_mr;
- u64 max_mr_size;
+ u32 max_mr_size_lo;
+ u32 max_mr_size_hi;
u32 max_num_mr_pbl;
u32 max_mw;
u32 max_fmr;
@@ -502,14 +523,14 @@
u32 max_wqes_rqes_per_q;
u32 max_cq_cqes_per_cq;
u32 max_srq_rqe_sge;
-} __packed;
+};
struct ocrdma_fw_ver_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
u8 running_ver[32];
-} __packed;
+};
struct ocrdma_fw_conf_rsp {
struct ocrdma_mqe_hdr hdr;
@@ -535,14 +556,41 @@
u32 base_eqid;
u32 max_eq;
-} __packed;
+};
enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+struct ocrdma_get_link_speed_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u8 pt_port_num;
+ u8 link_duplex;
+ u8 phys_port_speed;
+ u8 phys_port_fault;
+ u16 rsvd1;
+ u16 qos_lnk_speed;
+ u8 logical_lnk_status;
+ u8 rsvd2[3];
+};
+
+enum {
+ OCRDMA_PHYS_LINK_SPEED_ZERO = 0x0,
+ OCRDMA_PHYS_LINK_SPEED_10MBPS = 0x1,
+ OCRDMA_PHYS_LINK_SPEED_100MBPS = 0x2,
+ OCRDMA_PHYS_LINK_SPEED_1GBPS = 0x3,
+ OCRDMA_PHYS_LINK_SPEED_10GBPS = 0x4,
+ OCRDMA_PHYS_LINK_SPEED_20GBPS = 0x5,
+ OCRDMA_PHYS_LINK_SPEED_25GBPS = 0x6,
+ OCRDMA_PHYS_LINK_SPEED_40GBPS = 0x7,
+ OCRDMA_PHYS_LINK_SPEED_100GBPS = 0x8
+};
+
enum {
OCRDMA_CREATE_CQ_VER2 = 2,
+ OCRDMA_CREATE_CQ_VER3 = 3,
OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
@@ -576,7 +624,8 @@
u32 pgsz_pgcnt;
u32 ev_cnt_flags;
u32 eqn;
- u32 cqe_count;
+ u16 cqe_count;
+ u16 pd_id;
u32 rsvd6;
struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
};
@@ -584,7 +633,7 @@
struct ocrdma_create_cq {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_create_cq_cmd cmd;
-} __packed;
+};
enum {
OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
@@ -593,12 +642,12 @@
struct ocrdma_create_cq_cmd_rsp {
struct ocrdma_mbx_rsp rsp;
u32 cq_id;
-} __packed;
+};
struct ocrdma_create_cq_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_create_cq_cmd_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22,
@@ -617,12 +666,12 @@
u32 async_cqid_valid;
u32 rsvd;
struct ocrdma_pa pa[8];
-} __packed;
+};
struct ocrdma_create_mq_rsp {
struct ocrdma_mbx_rsp rsp;
u32 id;
-} __packed;
+};
enum {
OCRDMA_DESTROY_CQ_QID_SHIFT = 0,
@@ -637,12 +686,12 @@
struct ocrdma_mbx_hdr req;
u32 bypass_flush_qid;
-} __packed;
+};
struct ocrdma_destroy_cq_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_QPT_GSI = 1,
@@ -766,7 +815,7 @@
u32 dpp_credits_cqid;
u32 rpir_lkey;
struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES];
-} __packed;
+};
enum {
OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT = 0,
@@ -820,18 +869,18 @@
u32 max_ord_ird;
u32 sq_rq_id;
u32 dpp_response;
-} __packed;
+};
struct ocrdma_destroy_qp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
u32 qp_id;
-} __packed;
+};
struct ocrdma_destroy_qp_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_MODIFY_QP_ID_SHIFT = 0,
@@ -975,7 +1024,7 @@
u32 dmac_b0_to_b3;
u32 vlan_dmac_b4_to_b5;
u32 qkey;
-} __packed;
+};
struct ocrdma_modify_qp {
@@ -986,7 +1035,7 @@
u32 flags;
u32 rdma_flags;
u32 num_outstanding_atomic_rd;
-} __packed;
+};
enum {
OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT = 0,
@@ -1007,7 +1056,7 @@
u32 max_wqe_rqe;
u32 max_ord_ird;
-} __packed;
+};
struct ocrdma_query_qp {
struct ocrdma_mqe_hdr hdr;
@@ -1016,13 +1065,13 @@
#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
u32 qp_id;
-} __packed;
+};
struct ocrdma_query_qp_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
struct ocrdma_qp_params params;
-} __packed;
+};
enum {
OCRDMA_CREATE_SRQ_PD_ID_SHIFT = 0,
@@ -1051,7 +1100,7 @@
u32 max_sge_rqe;
u32 pages_rqe_sz;
struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES];
-} __packed;
+};
enum {
OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT = 0,
@@ -1070,7 +1119,7 @@
u32 id;
u32 max_sge_rqe_allocated;
-} __packed;
+};
enum {
OCRDMA_MODIFY_SRQ_ID_SHIFT = 0,
@@ -1089,7 +1138,7 @@
u32 id;
u32 limit_max_rqe;
-} __packed;
+};
enum {
OCRDMA_QUERY_SRQ_ID_SHIFT = 0,
@@ -1101,7 +1150,7 @@
struct ocrdma_mbx_rsp req;
u32 id;
-} __packed;
+};
enum {
OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT = 0,
@@ -1123,7 +1172,7 @@
u32 max_rqe_pdid;
u32 srq_lmt_max_sge;
-} __packed;
+};
enum {
OCRDMA_DESTROY_SRQ_ID_SHIFT = 0,
@@ -1135,7 +1184,7 @@
struct ocrdma_mbx_rsp req;
u32 id;
-} __packed;
+};
enum {
OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
@@ -1147,7 +1196,7 @@
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
u32 enable_dpp_rsvd;
-} __packed;
+};
enum {
OCRDMA_ALLOC_PD_RSP_DPP = Bit(16),
@@ -1159,18 +1208,18 @@
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
u32 dpp_page_pdid;
-} __packed;
+};
struct ocrdma_dealloc_pd {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
u32 id;
-} __packed;
+};
struct ocrdma_dealloc_pd_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_ADDR_CHECK_ENABLE = 1,
@@ -1206,7 +1255,7 @@
u32 pdid;
u32 pbl_sz_flags;
-} __packed;
+};
struct ocrdma_alloc_lkey_rsp {
struct ocrdma_mqe_hdr hdr;
@@ -1214,7 +1263,7 @@
u32 lrkey;
u32 num_pbl_rsvd;
-} __packed;
+};
struct ocrdma_dealloc_lkey {
struct ocrdma_mqe_hdr hdr;
@@ -1222,12 +1271,12 @@
u32 lkey;
u32 rsvd_frmr;
-} __packed;
+};
struct ocrdma_dealloc_lkey_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
#define MAX_OCRDMA_NSMR_PBL (u32)22
#define MAX_OCRDMA_PBL_SIZE 65536
@@ -1273,7 +1322,7 @@
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr cmd;
- u32 lrkey_key_index;
+ u32 fr_mr;
u32 num_pbl_pdid;
u32 flags_hpage_pbe_sz;
u32 totlen_low;
@@ -1283,7 +1332,7 @@
u32 va_loaddr;
u32 va_hiaddr;
struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
-} __packed;
+};
enum {
OCRDMA_REG_NSMR_CONT_PBL_SHIFT = 0,
@@ -1305,12 +1354,12 @@
u32 last;
struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
-} __packed;
+};
struct ocrdma_pbe {
u32 pa_hi;
u32 pa_lo;
-} __packed;
+};
enum {
OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT = 16,
@@ -1322,7 +1371,7 @@
u32 lrkey;
u32 num_pbl;
-} __packed;
+};
enum {
OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT = 0,
@@ -1342,7 +1391,7 @@
u32 lrkey_key_index;
u32 num_pbl;
-} __packed;
+};
enum {
OCRDMA_ALLOC_MW_PD_ID_SHIFT = 0,
@@ -1354,7 +1403,7 @@
struct ocrdma_mbx_hdr req;
u32 pdid;
-} __packed;
+};
enum {
OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT = 0,
@@ -1366,7 +1415,7 @@
struct ocrdma_mbx_rsp rsp;
u32 lrkey_index;
-} __packed;
+};
struct ocrdma_attach_mcast {
struct ocrdma_mqe_hdr hdr;
@@ -1375,12 +1424,12 @@
u8 mgid[16];
u32 mac_b0_to_b3;
u32 vlan_mac_b4_to_b5;
-} __packed;
+};
struct ocrdma_attach_mcast_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
struct ocrdma_detach_mcast {
struct ocrdma_mqe_hdr hdr;
@@ -1389,12 +1438,12 @@
u8 mgid[16];
u32 mac_b0_to_b3;
u32 vlan_mac_b4_to_b5;
-} __packed;
+};
struct ocrdma_detach_mcast_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_CREATE_AH_NUM_PAGES_SHIFT = 19,
@@ -1418,24 +1467,24 @@
u32 ah_conf;
struct ocrdma_pa tbl_addr[8];
-} __packed;
+};
struct ocrdma_create_ah_tbl_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
u32 ahid;
-} __packed;
+};
struct ocrdma_delete_ah_tbl {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
u32 ahid;
-} __packed;
+};
struct ocrdma_delete_ah_tbl_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
enum {
OCRDMA_EQE_VALID_SHIFT = 0,
@@ -1448,7 +1497,7 @@
struct ocrdma_eqe {
u32 id_valid;
-} __packed;
+};
enum OCRDMA_CQE_STATUS {
OCRDMA_CQE_SUCCESS = 0,
@@ -1532,14 +1581,14 @@
} cmn;
};
u32 flags_status_srcqpn; /* w3 */
-} __packed;
+};
struct ocrdma_sge {
u32 addr_hi;
u32 addr_lo;
u32 lrkey;
u32 len;
-} __packed;
+};
enum {
OCRDMA_FLAG_SIG = 0x1,
@@ -1563,6 +1612,7 @@
OCRDMA_SEND = 0x00,
OCRDMA_CMP_SWP = 0x14,
OCRDMA_BIND_MW = 0x10,
+ OCRDMA_FR_MR = 0x11,
OCRDMA_RESV1 = 0x0A,
OCRDMA_LKEY_INV = 0x15,
OCRDMA_FETCH_ADD = 0x13,
@@ -1600,14 +1650,26 @@
u32 lkey;
};
u32 total_len;
-} __packed;
+};
struct ocrdma_ewqe_ud_hdr {
u32 rsvd_dest_qpn;
u32 qkey;
u32 rsvd_ahid;
u32 rsvd;
-} __packed;
+};
+
+/* extended wqe followed by hdr_wqe for Fast Memory register */
+struct ocrdma_ewqe_fr {
+ u32 va_hi;
+ u32 va_lo;
+ u32 fbo_hi;
+ u32 fbo_lo;
+ u32 size_sge;
+ u32 num_sges;
+ u32 rsvd;
+ u32 rsvd2;
+};
struct ocrdma_eth_basic {
u8 dmac[6];
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index f36630e..6e982bb 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -75,14 +75,15 @@
attr->vendor_part_id = dev->nic_info.pdev->device;
attr->hw_ver = 0;
attr->max_qp = dev->attr.max_qp;
- attr->max_ah = dev->attr.max_qp;
+ attr->max_ah = OCRDMA_MAX_AH;
attr->max_qp_wr = dev->attr.max_wqe;
attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_LOCAL_DMA_LKEY;
+ IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
attr->max_sge_rd = 0;
attr->max_cq = dev->attr.max_cq;
@@ -96,7 +97,7 @@
attr->max_qp_rd_atom =
min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
- attr->max_srq = (dev->attr.max_qp - 1);
+ attr->max_srq = dev->attr.max_srq;
attr->max_srq_sge = dev->attr.max_srq_sge;
attr->max_srq_wr = dev->attr.max_rqe;
attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
@@ -105,6 +106,45 @@
return 0;
}
+static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
+ u8 *ib_speed, u8 *ib_width)
+{
+ int status;
+ u8 speed;
+
+ status = ocrdma_mbx_get_link_speed(dev, &speed);
+ if (status)
+ speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
+
+ switch (speed) {
+ case OCRDMA_PHYS_LINK_SPEED_1GBPS:
+ *ib_speed = IB_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_10GBPS:
+ *ib_speed = IB_SPEED_QDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_20GBPS:
+ *ib_speed = IB_SPEED_DDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_40GBPS:
+ *ib_speed = IB_SPEED_QDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ default:
+ /* Unsupported */
+ *ib_speed = IB_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ };
+}
+
+
int ocrdma_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
@@ -141,8 +181,8 @@
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
props->qkey_viol_cntr = 0;
- props->active_width = IB_WIDTH_1X;
- props->active_speed = 4;
+ get_link_speed_and_width(dev, &props->active_speed,
+ &props->active_width);
props->max_msg_sz = 0x80000000;
props->max_vl_num = 4;
return 0;
@@ -186,7 +226,7 @@
mutex_lock(&uctx->mm_list_lock);
list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
- if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+ if (len != mm->key.len && phy_addr != mm->key.phy_addr)
continue;
list_del(&mm->entry);
@@ -204,7 +244,7 @@
mutex_lock(&uctx->mm_list_lock);
list_for_each_entry(mm, &uctx->mm_head, entry) {
- if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+ if (len != mm->key.len && phy_addr != mm->key.phy_addr)
continue;
found = true;
@@ -214,6 +254,108 @@
return found;
}
+static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
+ struct ocrdma_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ struct ocrdma_pd *pd = NULL;
+ int status = 0;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ if (udata && uctx) {
+ pd->dpp_enabled =
+ dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+ pd->num_dpp_qp =
+ pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+ }
+
+retry:
+ status = ocrdma_mbx_alloc_pd(dev, pd);
+ if (status) {
+ if (pd->dpp_enabled) {
+ pd->dpp_enabled = false;
+ pd->num_dpp_qp = 0;
+ goto retry;
+ } else {
+ kfree(pd);
+ return ERR_PTR(status);
+ }
+ }
+
+ return pd;
+}
+
+static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
+ struct ocrdma_pd *pd)
+{
+ return (uctx->cntxt_pd == pd ? true : false);
+}
+
+static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
+ struct ocrdma_pd *pd)
+{
+ int status = 0;
+
+ status = ocrdma_mbx_dealloc_pd(dev, pd);
+ kfree(pd);
+ return status;
+}
+
+static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
+ struct ocrdma_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ int status = 0;
+
+ uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
+ if (IS_ERR(uctx->cntxt_pd)) {
+ status = PTR_ERR(uctx->cntxt_pd);
+ uctx->cntxt_pd = NULL;
+ goto err;
+ }
+
+ uctx->cntxt_pd->uctx = uctx;
+ uctx->cntxt_pd->ibpd.device = &dev->ibdev;
+err:
+ return status;
+}
+
+static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ int status = 0;
+ struct ocrdma_pd *pd = uctx->cntxt_pd;
+ struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+ BUG_ON(uctx->pd_in_use);
+ uctx->cntxt_pd = NULL;
+ status = _ocrdma_dealloc_pd(dev, pd);
+ return status;
+}
+
+static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ struct ocrdma_pd *pd = NULL;
+
+ mutex_lock(&uctx->mm_list_lock);
+ if (!uctx->pd_in_use) {
+ uctx->pd_in_use = true;
+ pd = uctx->cntxt_pd;
+ }
+ mutex_unlock(&uctx->mm_list_lock);
+
+ return pd;
+}
+
+static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ mutex_lock(&uctx->mm_list_lock);
+ uctx->pd_in_use = false;
+ mutex_unlock(&uctx->mm_list_lock);
+}
+
struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -229,7 +371,6 @@
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
- ctx->dev = dev;
INIT_LIST_HEAD(&ctx->mm_head);
mutex_init(&ctx->mm_list_lock);
@@ -249,6 +390,11 @@
status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
if (status)
goto map_err;
+
+ status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
+ if (status)
+ goto pd_err;
+
resp.dev_id = dev->id;
resp.max_inline_data = dev->attr.max_inline_data;
resp.wqe_size = dev->attr.wqe_size;
@@ -262,6 +408,7 @@
return &ctx->ibucontext;
cpy_err:
+pd_err:
ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
map_err:
dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
@@ -272,9 +419,13 @@
int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
{
+ int status = 0;
struct ocrdma_mm *mm, *tmp;
struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
- struct pci_dev *pdev = uctx->dev->nic_info.pdev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
+ struct pci_dev *pdev = dev->nic_info.pdev;
+
+ status = ocrdma_dealloc_ucontext_pd(uctx);
ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
@@ -285,13 +436,13 @@
kfree(mm);
}
kfree(uctx);
- return 0;
+ return status;
}
int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
- struct ocrdma_dev *dev = ucontext->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
unsigned long len = (vma->vm_end - vma->vm_start);
@@ -307,7 +458,10 @@
if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
dev->nic_info.db_total_size)) &&
(len <= dev->nic_info.db_page_size)) {
- /* doorbell mapping */
+ if (vma->vm_flags & VM_READ)
+ return -EPERM;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
len, vma->vm_page_prot);
} else if (dev->nic_info.dpp_unmapped_len &&
@@ -315,19 +469,20 @@
(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
dev->nic_info.dpp_unmapped_len)) &&
(len <= dev->nic_info.dpp_unmapped_len)) {
- /* dpp area mapping */
+ if (vma->vm_flags & VM_READ)
+ return -EPERM;
+
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
len, vma->vm_page_prot);
} else {
- /* queue memory mapping */
status = remap_pfn_range(vma, vma->vm_start,
vma->vm_pgoff, len, vma->vm_page_prot);
}
return status;
}
-static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
+static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
struct ib_ucontext *ib_ctx,
struct ib_udata *udata)
{
@@ -341,19 +496,18 @@
memset(&rsp, 0, sizeof(rsp));
rsp.id = pd->id;
rsp.dpp_enabled = pd->dpp_enabled;
- db_page_addr = pd->dev->nic_info.unmapped_db +
- (pd->id * pd->dev->nic_info.db_page_size);
- db_page_size = pd->dev->nic_info.db_page_size;
+ db_page_addr = ocrdma_get_db_addr(dev, pd->id);
+ db_page_size = dev->nic_info.db_page_size;
status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
if (status)
return status;
if (pd->dpp_enabled) {
- dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
- (pd->id * OCRDMA_DPP_PAGE_SIZE);
+ dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
+ (pd->id * PAGE_SIZE);
status = ocrdma_add_mmap(uctx, dpp_page_addr,
- OCRDMA_DPP_PAGE_SIZE);
+ PAGE_SIZE);
if (status)
goto dpp_map_err;
rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
@@ -369,7 +523,7 @@
ucopy_err:
if (pd->dpp_enabled)
- ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
+ ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
dpp_map_err:
ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
return status;
@@ -381,76 +535,75 @@
{
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
struct ocrdma_pd *pd;
+ struct ocrdma_ucontext *uctx = NULL;
int status;
-
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
- pd->dev = dev;
- if (udata && context) {
- pd->dpp_enabled = (dev->nic_info.dev_family ==
- OCRDMA_GEN2_FAMILY) ? true : false;
- pd->num_dpp_qp =
- pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
- }
- status = ocrdma_mbx_alloc_pd(dev, pd);
- if (status) {
- kfree(pd);
- return ERR_PTR(status);
- }
+ u8 is_uctx_pd = false;
if (udata && context) {
- status = ocrdma_copy_pd_uresp(pd, context, udata);
+ uctx = get_ocrdma_ucontext(context);
+ pd = ocrdma_get_ucontext_pd(uctx);
+ if (pd) {
+ is_uctx_pd = true;
+ goto pd_mapping;
+ }
+ }
+
+ pd = _ocrdma_alloc_pd(dev, uctx, udata);
+ if (IS_ERR(pd)) {
+ status = PTR_ERR(pd);
+ goto exit;
+ }
+
+pd_mapping:
+ if (udata && context) {
+ status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
if (status)
goto err;
}
return &pd->ibpd;
err:
- ocrdma_dealloc_pd(&pd->ibpd);
+ if (is_uctx_pd) {
+ ocrdma_release_ucontext_pd(uctx);
+ } else {
+ status = ocrdma_mbx_dealloc_pd(dev, pd);
+ kfree(pd);
+ }
+exit:
return ERR_PTR(status);
}
int ocrdma_dealloc_pd(struct ib_pd *ibpd)
{
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = pd->dev;
- int status;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_ucontext *uctx = NULL;
+ int status = 0;
u64 usr_db;
- status = ocrdma_mbx_dealloc_pd(dev, pd);
- if (pd->uctx) {
+ uctx = pd->uctx;
+ if (uctx) {
u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
- (pd->id * OCRDMA_DPP_PAGE_SIZE);
+ (pd->id * PAGE_SIZE);
if (pd->dpp_enabled)
- ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
- usr_db = dev->nic_info.unmapped_db +
- (pd->id * dev->nic_info.db_page_size);
+ ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
+ usr_db = ocrdma_get_db_addr(dev, pd->id);
ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
+
+ if (is_ucontext_pd(uctx, pd)) {
+ ocrdma_release_ucontext_pd(uctx);
+ return status;
+ }
}
- kfree(pd);
+ status = _ocrdma_dealloc_pd(dev, pd);
return status;
}
-static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
- int acc, u32 num_pbls,
- u32 addr_check)
+static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+ u32 pdid, int acc, u32 num_pbls, u32 addr_check)
{
int status;
- struct ocrdma_mr *mr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = pd->dev;
- if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
- pr_err("%s(%d) leaving err, invalid access rights\n",
- __func__, dev->id);
- return ERR_PTR(-EINVAL);
- }
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
- mr->hwmr.dev = dev;
mr->hwmr.fr_mr = 0;
mr->hwmr.local_rd = 1;
mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
@@ -460,25 +613,38 @@
mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
mr->hwmr.num_pbls = num_pbls;
- status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
- if (status) {
- kfree(mr);
- return ERR_PTR(-ENOMEM);
- }
- mr->pd = pd;
+ status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
+ if (status)
+ return status;
+
mr->ibmr.lkey = mr->hwmr.lkey;
if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
mr->ibmr.rkey = mr->hwmr.lkey;
- return mr;
+ return 0;
}
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
{
+ int status;
struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
+ if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
+ pr_err("%s err, invalid access rights\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
+ OCRDMA_ADDR_CHECK_DISABLE);
+ if (status) {
+ kfree(mr);
+ return ERR_PTR(status);
+ }
return &mr->ibmr;
}
@@ -502,7 +668,8 @@
}
}
-static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
+static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+ u32 num_pbes)
{
u32 num_pbls = 0;
u32 idx = 0;
@@ -518,7 +685,7 @@
num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
num_pbls = num_pbls / (pbl_size / sizeof(u64));
idx++;
- } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
+ } while (num_pbls >= dev->attr.max_num_mr_pbl);
mr->hwmr.num_pbes = num_pbes;
mr->hwmr.num_pbls = num_pbls;
@@ -613,13 +780,12 @@
u64 usr_addr, int acc, struct ib_udata *udata)
{
int status = -ENOMEM;
- struct ocrdma_dev *dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_mr *mr;
struct ocrdma_pd *pd;
u32 num_pbes;
pd = get_ocrdma_pd(ibpd);
- dev = pd->dev;
if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
return ERR_PTR(-EINVAL);
@@ -627,14 +793,13 @@
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(status);
- mr->hwmr.dev = dev;
mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
if (IS_ERR(mr->umem)) {
status = -EFAULT;
goto umem_err;
}
num_pbes = ib_umem_page_count(mr->umem);
- status = ocrdma_get_pbl_info(mr, num_pbes);
+ status = ocrdma_get_pbl_info(dev, mr, num_pbes);
if (status)
goto umem_err;
@@ -654,7 +819,6 @@
status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
if (status)
goto mbx_err;
- mr->pd = pd;
mr->ibmr.lkey = mr->hwmr.lkey;
if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
mr->ibmr.rkey = mr->hwmr.lkey;
@@ -671,7 +835,7 @@
int ocrdma_dereg_mr(struct ib_mr *ib_mr)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
- struct ocrdma_dev *dev = mr->hwmr.dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
int status;
status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
@@ -686,29 +850,29 @@
return status;
}
-static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
+static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
+ struct ib_udata *udata,
struct ib_ucontext *ib_ctx)
{
int status;
- struct ocrdma_ucontext *uctx;
+ struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
struct ocrdma_create_cq_uresp uresp;
memset(&uresp, 0, sizeof(uresp));
uresp.cq_id = cq->id;
- uresp.page_size = cq->len;
+ uresp.page_size = PAGE_ALIGN(cq->len);
uresp.num_pages = 1;
uresp.max_hw_cqe = cq->max_hw_cqe;
uresp.page_addr[0] = cq->pa;
- uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
- uresp.db_page_size = cq->dev->nic_info.db_page_size;
+ uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
+ uresp.db_page_size = dev->nic_info.db_page_size;
uresp.phase_change = cq->phase_change ? 1 : 0;
status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (status) {
pr_err("%s(%d) copy error cqid=0x%x.\n",
- __func__, cq->dev->id, cq->id);
+ __func__, dev->id, cq->id);
goto err;
}
- uctx = get_ocrdma_ucontext(ib_ctx);
status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
if (status)
goto err;
@@ -728,6 +892,8 @@
{
struct ocrdma_cq *cq;
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+ struct ocrdma_ucontext *uctx = NULL;
+ u16 pd_id = 0;
int status;
struct ocrdma_create_cq_ureq ureq;
@@ -744,15 +910,19 @@
spin_lock_init(&cq->comp_handler_lock);
INIT_LIST_HEAD(&cq->sq_head);
INIT_LIST_HEAD(&cq->rq_head);
- cq->dev = dev;
- status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
+ if (ib_ctx) {
+ uctx = get_ocrdma_ucontext(ib_ctx);
+ pd_id = uctx->cntxt_pd->id;
+ }
+
+ status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
if (status) {
kfree(cq);
return ERR_PTR(status);
}
if (ib_ctx) {
- status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
+ status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
if (status)
goto ctx_err;
}
@@ -786,13 +956,17 @@
{
int status;
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
- struct ocrdma_dev *dev = cq->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
+ int pdid = 0;
status = ocrdma_mbx_destroy_cq(dev, cq);
if (cq->ucontext) {
- ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
- ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
+ pdid = cq->ucontext->cntxt_pd->id;
+ ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
+ PAGE_ALIGN(cq->len));
+ ocrdma_del_mmap(cq->ucontext,
+ ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
dev->cq_tbl[cq->id] = NULL;
@@ -820,14 +994,17 @@
static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
- if (attrs->qp_type != IB_QPT_GSI &&
- attrs->qp_type != IB_QPT_RC &&
- attrs->qp_type != IB_QPT_UD) {
+ if ((attrs->qp_type != IB_QPT_GSI) &&
+ (attrs->qp_type != IB_QPT_RC) &&
+ (attrs->qp_type != IB_QPT_UC) &&
+ (attrs->qp_type != IB_QPT_UD)) {
pr_err("%s(%d) unsupported qp type=0x%x requested\n",
__func__, dev->id, attrs->qp_type);
return -EINVAL;
}
- if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
+ /* Skip the check for QP1 to support CM size of 128 */
+ if ((attrs->qp_type != IB_QPT_GSI) &&
+ (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
__func__, dev->id, attrs->cap.max_send_wr);
pr_err("%s(%d) supported send_wr=0x%x\n",
@@ -878,11 +1055,9 @@
/* verify consumer QPs are not trying to use GSI QP's CQ */
if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
- (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
- (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
- (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
+ (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
- __func__, dev->id);
+ __func__, dev->id);
return -EINVAL;
}
}
@@ -905,13 +1080,13 @@
uresp.qp_id = qp->id;
uresp.sq_dbid = qp->sq.dbid;
uresp.num_sq_pages = 1;
- uresp.sq_page_size = qp->sq.len;
+ uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
uresp.sq_page_addr[0] = qp->sq.pa;
uresp.num_wqe_allocated = qp->sq.max_cnt;
if (!srq) {
uresp.rq_dbid = qp->rq.dbid;
uresp.num_rq_pages = 1;
- uresp.rq_page_size = qp->rq.len;
+ uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
uresp.rq_page_addr[0] = qp->rq.pa;
uresp.num_rqe_allocated = qp->rq.max_cnt;
}
@@ -919,9 +1094,8 @@
uresp.db_page_size = dev->nic_info.db_page_size;
if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
- uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
- OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
- uresp.db_shift = (qp->id < 128) ? 24 : 16;
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+ uresp.db_shift = 24;
} else {
uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
@@ -964,8 +1138,7 @@
OCRDMA_DB_GEN2_SQ_OFFSET;
qp->rq_db = dev->nic_info.db +
(pd->id * dev->nic_info.db_page_size) +
- ((qp->id < 128) ?
- OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
+ OCRDMA_DB_GEN2_RQ_OFFSET;
} else {
qp->sq_db = dev->nic_info.db +
(pd->id * dev->nic_info.db_page_size) +
@@ -1006,6 +1179,7 @@
qp->sq.max_sges = attrs->cap.max_send_sge;
qp->rq.max_sges = attrs->cap.max_recv_sge;
qp->state = OCRDMA_QPS_RST;
+ qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
}
@@ -1026,7 +1200,7 @@
int status;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_qp *qp;
- struct ocrdma_dev *dev = pd->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_create_qp_ureq ureq;
u16 dpp_credit_lmt, dpp_offset;
@@ -1046,6 +1220,9 @@
}
qp->dev = dev;
ocrdma_set_qp_init_params(qp, pd, attrs);
+ if (udata == NULL)
+ qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
+ OCRDMA_QP_FAST_REG);
mutex_lock(&dev->dev_lock);
status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
@@ -1056,8 +1233,6 @@
/* user space QP's wr_id table are managed in library */
if (udata == NULL) {
- qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
- OCRDMA_QP_FAST_REG);
status = ocrdma_alloc_wr_id_tbl(qp);
if (status)
goto map_err;
@@ -1093,6 +1268,17 @@
return ERR_PTR(status);
}
+
+static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
+{
+ if (qp->db_cache) {
+ u32 val = qp->rq.dbid | (qp->db_cache <<
+ ocrdma_get_num_posted_shift(qp));
+ iowrite32(val, qp->rq_db);
+ qp->db_cache = 0;
+ }
+}
+
int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask)
{
@@ -1104,13 +1290,16 @@
qp = get_ocrdma_qp(ibqp);
dev = qp->dev;
if (attr_mask & IB_QP_STATE)
- status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
+ status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
/* if new and previous states are same hw doesn't need to
* know about it.
*/
if (status < 0)
return status;
status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
+ if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
+ ocrdma_flush_rq_db(qp);
+
return status;
}
@@ -1215,7 +1404,7 @@
qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
qp_attr->cap.max_send_sge = qp->sq.max_sges;
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
- qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
memcpy(&qp_attr->ah_attr.grh.dgid, ¶ms.dgid[0],
sizeof(params.dgid));
@@ -1278,23 +1467,17 @@
static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
{
- int free_cnt;
- if (q->head >= q->tail)
- free_cnt = (q->max_cnt - q->head) + q->tail;
- else
- free_cnt = q->tail - q->head;
- return free_cnt;
+ return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
}
static int is_hw_sq_empty(struct ocrdma_qp *qp)
{
- return (qp->sq.tail == qp->sq.head &&
- ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
+ return (qp->sq.tail == qp->sq.head);
}
static int is_hw_rq_empty(struct ocrdma_qp *qp)
{
- return (qp->rq.tail == qp->rq.head) ? 1 : 0;
+ return (qp->rq.tail == qp->rq.head);
}
static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
@@ -1360,17 +1543,18 @@
*/
discard_cnt += 1;
cqe->cmn.qpn = 0;
- if (is_cqe_for_sq(cqe))
+ if (is_cqe_for_sq(cqe)) {
ocrdma_hwq_inc_tail(&qp->sq);
- else {
+ } else {
if (qp->srq) {
spin_lock_irqsave(&qp->srq->q_lock, flags);
ocrdma_hwq_inc_tail(&qp->srq->rq);
ocrdma_srq_toggle_bit(qp->srq, cur_getp);
spin_unlock_irqrestore(&qp->srq->q_lock, flags);
- } else
+ } else {
ocrdma_hwq_inc_tail(&qp->rq);
+ }
}
skip_cqe:
cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
@@ -1378,7 +1562,7 @@
spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
}
-static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
{
int found = false;
unsigned long flags;
@@ -1444,9 +1628,11 @@
mutex_unlock(&dev->dev_lock);
if (pd->uctx) {
- ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
+ ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
+ PAGE_ALIGN(qp->sq.len));
if (!qp->srq)
- ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
+ ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
+ PAGE_ALIGN(qp->rq.len));
}
ocrdma_del_flush_qp(qp);
@@ -1457,7 +1643,8 @@
return status;
}
-static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
+static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
+ struct ib_udata *udata)
{
int status;
struct ocrdma_create_srq_uresp uresp;
@@ -1467,12 +1654,12 @@
uresp.num_rq_pages = 1;
uresp.rq_page_addr[0] = srq->rq.pa;
uresp.rq_page_size = srq->rq.len;
- uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
- (srq->pd->id * srq->dev->nic_info.db_page_size);
- uresp.db_page_size = srq->dev->nic_info.db_page_size;
+ uresp.db_page_addr = dev->nic_info.unmapped_db +
+ (srq->pd->id * dev->nic_info.db_page_size);
+ uresp.db_page_size = dev->nic_info.db_page_size;
uresp.num_rqe_allocated = srq->rq.max_cnt;
- if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
- uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
+ if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
uresp.db_shift = 24;
} else {
uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
@@ -1495,7 +1682,7 @@
{
int status = -ENOMEM;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = pd->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_srq *srq;
if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
@@ -1508,10 +1695,9 @@
return ERR_PTR(status);
spin_lock_init(&srq->q_lock);
- srq->dev = dev;
srq->pd = pd;
srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
- status = ocrdma_mbx_create_srq(srq, init_attr, pd);
+ status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
if (status)
goto err;
@@ -1538,7 +1724,7 @@
}
if (udata) {
- status = ocrdma_copy_srq_uresp(srq, udata);
+ status = ocrdma_copy_srq_uresp(dev, srq, udata);
if (status)
goto arm_err;
}
@@ -1584,15 +1770,15 @@
{
int status;
struct ocrdma_srq *srq;
- struct ocrdma_dev *dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
srq = get_ocrdma_srq(ibsrq);
- dev = srq->dev;
status = ocrdma_mbx_destroy_srq(dev, srq);
if (srq->pd->uctx)
- ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
+ ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
+ PAGE_ALIGN(srq->rq.len));
kfree(srq->idx_bit_fields);
kfree(srq->rqe_wr_id_tbl);
@@ -1634,23 +1820,43 @@
memset(sge, 0, sizeof(*sge));
}
+static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
+{
+ uint32_t total_len = 0, i;
+
+ for (i = 0; i < num_sge; i++)
+ total_len += sg_list[i].length;
+ return total_len;
+}
+
+
static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
struct ocrdma_hdr_wqe *hdr,
struct ocrdma_sge *sge,
struct ib_send_wr *wr, u32 wqe_size)
{
- if (wr->send_flags & IB_SEND_INLINE) {
- if (wr->sg_list[0].length > qp->max_inline_data) {
+ int i;
+ char *dpp_addr;
+
+ if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
+ hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
+ if (unlikely(hdr->total_len > qp->max_inline_data)) {
pr_err("%s() supported_len=0x%x,\n"
" unspported len req=0x%x\n", __func__,
- qp->max_inline_data, wr->sg_list[0].length);
+ qp->max_inline_data, hdr->total_len);
return -EINVAL;
}
- memcpy(sge,
- (void *)(unsigned long)wr->sg_list[0].addr,
- wr->sg_list[0].length);
- hdr->total_len = wr->sg_list[0].length;
+ dpp_addr = (char *)sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dpp_addr,
+ (void *)(unsigned long)wr->sg_list[i].addr,
+ wr->sg_list[i].length);
+ dpp_addr += wr->sg_list[i].length;
+ }
+
wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
+ if (0 == hdr->total_len)
+ wqe_size += sizeof(struct ocrdma_sge);
hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
} else {
ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
@@ -1675,8 +1881,9 @@
ocrdma_build_ud_hdr(qp, hdr, wr);
sge = (struct ocrdma_sge *)(hdr + 2);
wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
- } else
+ } else {
sge = (struct ocrdma_sge *)(hdr + 1);
+ }
status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
return status;
@@ -1719,6 +1926,96 @@
ext_rw->len = hdr->total_len;
}
+static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
+ struct ocrdma_hw_mr *hwmr)
+{
+ int i;
+ u64 buf_addr = 0;
+ int num_pbes;
+ struct ocrdma_pbe *pbe;
+
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+
+ /* go through the OS phy regions & fill hw pbe entries into pbls. */
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ /* number of pbes can be more for one OS buf, when
+ * buffers are of different sizes.
+ * split the ib_buf to one or more pbes.
+ */
+ buf_addr = wr->wr.fast_reg.page_list->page_list[i];
+ pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+ num_pbes += 1;
+ pbe++;
+
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ }
+ }
+ return;
+}
+
+static int get_encoded_page_size(int pg_sz)
+{
+ /* Max size is 256M 4096 << 16 */
+ int i = 0;
+ for (; i < 17; i++)
+ if (pg_sz == (4096 << i))
+ break;
+ return i;
+}
+
+
+static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ u64 fbo;
+ struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
+ struct ocrdma_mr *mr;
+ u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+
+ wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
+
+ if ((wr->wr.fast_reg.page_list_len >
+ qp->dev->attr.max_pages_per_frmr) ||
+ (wr->wr.fast_reg.length > 0xffffffffULL))
+ return -EINVAL;
+
+ hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
+ hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+
+ if (wr->wr.fast_reg.page_list_len == 0)
+ BUG();
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
+ hdr->lkey = wr->wr.fast_reg.rkey;
+ hdr->total_len = wr->wr.fast_reg.length;
+
+ fbo = wr->wr.fast_reg.iova_start -
+ (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+
+ fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
+ fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+ fast_reg->fbo_hi = upper_32_bits(fbo);
+ fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
+ fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
+ fast_reg->size_sge =
+ get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
+ mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
+ (OCRDMA_MAX_STAG - 1)];
+ build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+ return 0;
+}
+
static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
{
u32 val = qp->sq.dbid | (1 << 16);
@@ -1750,7 +2047,7 @@
}
hdr = ocrdma_hwq_head(&qp->sq);
hdr->cw = 0;
- if (wr->send_flags & IB_SEND_SIGNALED)
+ if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
if (wr->send_flags & IB_SEND_FENCE)
hdr->cw |=
@@ -1788,10 +2085,14 @@
case IB_WR_LOCAL_INV:
hdr->cw |=
(OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
- hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
+ hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
+ sizeof(struct ocrdma_sge)) /
OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
hdr->lkey = wr->ex.invalidate_rkey;
break;
+ case IB_WR_FAST_REG_MR:
+ status = ocrdma_build_fr(qp, hdr, wr);
+ break;
default:
status = -EINVAL;
break;
@@ -1800,7 +2101,7 @@
*bad_wr = wr;
break;
}
- if (wr->send_flags & IB_SEND_SIGNALED)
+ if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
else
qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
@@ -1824,7 +2125,10 @@
{
u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
- iowrite32(val, qp->rq_db);
+ if (qp->state != OCRDMA_QPS_INIT)
+ iowrite32(val, qp->rq_db);
+ else
+ qp->db_cache++;
}
static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -1958,7 +2262,7 @@
static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
{
- enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
+ enum ib_wc_status ibwc_status;
switch (status) {
case OCRDMA_CQE_GENERAL_ERR:
@@ -2055,6 +2359,9 @@
case OCRDMA_SEND:
ibwc->opcode = IB_WC_SEND;
break;
+ case OCRDMA_FR_MR:
+ ibwc->opcode = IB_WC_FAST_REG_MR;
+ break;
case OCRDMA_LKEY_INV:
ibwc->opcode = IB_WC_LOCAL_INV;
break;
@@ -2108,7 +2415,7 @@
ibwc->status = ocrdma_to_ibwc_err(status);
ocrdma_flush_qp(qp);
- ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
+ ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
/* if wqe/rqe pending for which cqe needs to be returned,
* trigger inflating it.
@@ -2193,7 +2500,8 @@
ocrdma_update_wc(qp, ibwc, tail);
*polled = true;
}
- wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
+ wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
+ OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
if (tail != wqe_idx)
expand = true; /* Coalesced CQE can't be consumed yet */
@@ -2242,7 +2550,8 @@
u32 wqe_idx;
srq = get_ocrdma_srq(qp->ibqp.srq);
- wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
+ wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+ OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
spin_lock_irqsave(&srq->q_lock, flags);
ocrdma_srq_toggle_bit(srq, wqe_idx);
@@ -2299,9 +2608,9 @@
ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
}
- if (qp->ibqp.srq)
+ if (qp->ibqp.srq) {
ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
- else {
+ } else {
ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
ocrdma_hwq_inc_tail(&qp->rq);
}
@@ -2314,13 +2623,14 @@
bool expand = false;
ibwc->wc_flags = 0;
- if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
+ if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
status = (le32_to_cpu(cqe->flags_status_srcqpn) &
OCRDMA_CQE_UD_STATUS_MASK) >>
OCRDMA_CQE_UD_STATUS_SHIFT;
- else
+ } else {
status = (le32_to_cpu(cqe->flags_status_srcqpn) &
OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+ }
if (status == OCRDMA_CQE_SUCCESS) {
*polled = true;
@@ -2338,9 +2648,10 @@
if (cq->phase_change) {
if (cur_getp == 0)
cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
- } else
+ } else {
/* clear valid bit */
cqe->flags_status_srcqpn = 0;
+ }
}
static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
@@ -2351,7 +2662,7 @@
bool expand = false;
int polled_hw_cqes = 0;
struct ocrdma_qp *qp = NULL;
- struct ocrdma_dev *dev = cq->dev;
+ struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
struct ocrdma_cqe *cqe;
u16 cur_getp; bool polled = false; bool stop = false;
@@ -2417,8 +2728,9 @@
} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
ocrdma_hwq_inc_tail(&qp->rq);
- } else
+ } else {
return err_cqes;
+ }
ibwc->byte_len = 0;
ibwc->status = IB_WC_WR_FLUSH_ERR;
ibwc = ibwc + 1;
@@ -2431,14 +2743,11 @@
int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
int cqes_to_poll = num_entries;
- struct ocrdma_cq *cq = NULL;
- unsigned long flags;
- struct ocrdma_dev *dev;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
int num_os_cqe = 0, err_cqes = 0;
struct ocrdma_qp *qp;
-
- cq = get_ocrdma_cq(ibcq);
- dev = cq->dev;
+ unsigned long flags;
/* poll cqes from adapter CQ */
spin_lock_irqsave(&cq->cq_lock, flags);
@@ -2469,16 +2778,14 @@
int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
{
- struct ocrdma_cq *cq;
- unsigned long flags;
- struct ocrdma_dev *dev;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
u16 cq_id;
u16 cur_getp;
struct ocrdma_cqe *cqe;
+ unsigned long flags;
- cq = get_ocrdma_cq(ibcq);
cq_id = cq->id;
- dev = cq->dev;
spin_lock_irqsave(&cq->cq_lock, flags);
if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
@@ -2500,3 +2807,226 @@
spin_unlock_irqrestore(&cq->cq_lock, flags);
return 0;
}
+
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
+{
+ int status;
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+
+ if (max_page_list_len > dev->attr.max_pages_per_frmr)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ status = ocrdma_get_pbl_info(dev, mr, max_page_list_len);
+ if (status)
+ goto pbl_err;
+ mr->hwmr.fr_mr = 1;
+ mr->hwmr.remote_rd = 0;
+ mr->hwmr.remote_wr = 0;
+ mr->hwmr.local_rd = 0;
+ mr->hwmr.local_wr = 0;
+ mr->hwmr.mw_bind = 0;
+ status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+ if (status)
+ goto pbl_err;
+ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
+ if (status)
+ goto mbx_err;
+ mr->ibmr.rkey = mr->hwmr.lkey;
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+ return &mr->ibmr;
+mbx_err:
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+}
+
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+ *ibdev,
+ int page_list_len)
+{
+ struct ib_fast_reg_page_list *frmr_list;
+ int size;
+
+ size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
+ frmr_list = kzalloc(size, GFP_KERNEL);
+ if (!frmr_list)
+ return ERR_PTR(-ENOMEM);
+ frmr_list->page_list = (u64 *)(frmr_list + 1);
+ return frmr_list;
+}
+
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ kfree(page_list);
+}
+
+#define MAX_KERNEL_PBE_SIZE 65536
+static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
+ int buf_cnt, u32 *pbe_size)
+{
+ u64 total_size = 0;
+ u64 buf_size = 0;
+ int i;
+ *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
+ *pbe_size = roundup_pow_of_two(*pbe_size);
+
+ /* find the smallest PBE size that we can have */
+ for (i = 0; i < buf_cnt; i++) {
+ /* first addr may not be page aligned, so ignore checking */
+ if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
+ (buf_list[i].size & ~PAGE_MASK))) {
+ return 0;
+ }
+
+ /* if configured PBE size is greater then the chosen one,
+ * reduce the PBE size.
+ */
+ buf_size = roundup(buf_list[i].size, PAGE_SIZE);
+ /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
+ buf_size = roundup_pow_of_two(buf_size);
+ if (*pbe_size > buf_size)
+ *pbe_size = buf_size;
+
+ total_size += buf_size;
+ }
+ *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
+ (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
+
+ /* num_pbes = total_size / (*pbe_size); this is implemented below. */
+
+ return total_size >> ilog2(*pbe_size);
+}
+
+static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
+ u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
+ struct ocrdma_hw_mr *hwmr)
+{
+ int i;
+ int idx;
+ int pbes_per_buf = 0;
+ u64 buf_addr = 0;
+ int num_pbes;
+ struct ocrdma_pbe *pbe;
+ int total_num_pbes = 0;
+
+ if (!hwmr->num_pbes)
+ return;
+
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+
+ /* go through the OS phy regions & fill hw pbe entries into pbls. */
+ for (i = 0; i < ib_buf_cnt; i++) {
+ buf_addr = buf_list[i].addr;
+ pbes_per_buf =
+ roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
+ pbe_size;
+ hwmr->len += buf_list[i].size;
+ /* number of pbes can be more for one OS buf, when
+ * buffers are of different sizes.
+ * split the ib_buf to one or more pbes.
+ */
+ for (idx = 0; idx < pbes_per_buf; idx++) {
+ /* we program always page aligned addresses,
+ * first unaligned address is taken care by fbo.
+ */
+ if (i == 0) {
+ /* for non zero fbo, assign the
+ * start of the page.
+ */
+ pbe->pa_lo =
+ cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi =
+ cpu_to_le32((u32) upper_32_bits(buf_addr));
+ } else {
+ pbe->pa_lo =
+ cpu_to_le32((u32) (buf_addr & 0xffffffff));
+ pbe->pa_hi =
+ cpu_to_le32((u32) upper_32_bits(buf_addr));
+ }
+ buf_addr += pbe_size;
+ num_pbes += 1;
+ total_num_pbes += 1;
+ pbe++;
+
+ if (total_num_pbes == hwmr->num_pbes)
+ goto mr_tbl_done;
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+ }
+ }
+ }
+mr_tbl_done:
+ return;
+}
+
+struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
+ struct ib_phys_buf *buf_list,
+ int buf_cnt, int acc, u64 *iova_start)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ u32 num_pbes;
+ u32 pbe_size = 0;
+
+ if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(status);
+
+ num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
+ if (num_pbes == 0) {
+ status = -EINVAL;
+ goto pbl_err;
+ }
+ status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ if (status)
+ goto pbl_err;
+
+ mr->hwmr.pbe_size = pbe_size;
+ mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
+ mr->hwmr.va = *iova_start;
+ mr->hwmr.local_rd = 1;
+ mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+ status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+ if (status)
+ goto pbl_err;
+ build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
+ &mr->hwmr);
+ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
+ if (status)
+ goto mbx_err;
+
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+ mr->ibmr.rkey = mr->hwmr.lkey;
+ return &mr->ibmr;
+
+mbx_err:
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+ kfree(mr);
+ return ERR_PTR(status);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 633f03d..b8f7853 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -72,6 +72,7 @@
struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
int ocrdma_destroy_qp(struct ib_qp *);
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp);
struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
struct ib_udata *);
@@ -89,5 +90,10 @@
int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len);
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+ *ibdev,
+ int page_list_len);
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4a9af79..1946101 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -89,7 +89,6 @@
extern struct qlogic_ib_stats qib_stats;
extern const struct pci_error_handlers qib_pci_err_handler;
-extern struct pci_driver qib_driver;
#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
/*
@@ -576,11 +575,13 @@
/* read/write using lock */
spinlock_t sdma_lock ____cacheline_aligned_in_smp;
struct list_head sdma_activelist;
+ struct list_head sdma_userpending;
u64 sdma_descq_added;
u64 sdma_descq_removed;
u16 sdma_descq_tail;
u16 sdma_descq_head;
u8 sdma_generation;
+ u8 sdma_intrequest;
struct tasklet_struct sdma_sw_clean_up_task
____cacheline_aligned_in_smp;
@@ -1326,6 +1327,8 @@
void qib_teardown_sdma(struct qib_pportdata *);
void __qib_sdma_intr(struct qib_pportdata *);
void qib_sdma_intr(struct qib_pportdata *);
+void qib_user_sdma_send_desc(struct qib_pportdata *dd,
+ struct list_head *pktlist);
int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
u32, struct qib_verbs_txreq *);
/* ppd->sdma_lock should be locked before calling this. */
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 4f255b7..5670ace 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define QIB_USER_SWMINOR 12
+#define QIB_USER_SWMINOR 13
#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
@@ -701,7 +701,37 @@
__be32 bth[3];
/* fields below this point are in host byte order */
struct qib_header iph;
+ /* fields below are simplified, but should match PSM */
+ /* some are accessed by driver when packet spliting is needed */
__u8 sub_opcode;
+ __u8 flags;
+ __u16 commidx;
+ __u32 ack_seq_num;
+ __u8 flowid;
+ __u8 hdr_dlen;
+ __u16 mqhdr;
+ __u32 uwords[4];
+};
+
+/* sequence number bits for message */
+union qib_seqnum {
+ struct {
+ __u32 seq:11;
+ __u32 gen:8;
+ __u32 flow:5;
+ };
+ struct {
+ __u32 pkt:16;
+ __u32 msg:8;
+ };
+ __u32 val;
+};
+
+/* qib receiving-dma tid-session-member */
+struct qib_tid_session_member {
+ __u16 tid;
+ __u16 offset;
+ __u16 length;
};
/* IB - LRH header consts */
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b51a514..275f247 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1220,7 +1220,7 @@
return user_swminor == 3;
default:
/* >= 4 are compatible (or are expected to be) */
- return user_swminor >= 4;
+ return user_swminor <= QIB_USER_SWMINOR;
}
}
/* make no promises yet for future major versions */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 36e048e..24e802f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1193,7 +1193,7 @@
MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
-struct pci_driver qib_driver = {
+static struct pci_driver qib_driver = {
.name = QIB_DRV_NAME,
.probe = qib_init_one,
.remove = qib_remove_one,
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 57bd3fa..28874f86 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -415,7 +415,6 @@
struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
} __packed;
-#endif /* _QIB_MAD_H */
/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
@@ -428,3 +427,5 @@
COUNTER_MASK(1, 2) | \
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
+
+#endif /* _QIB_MAD_H */
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index c574ec7..3f14009 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -283,12 +283,12 @@
goto bail;
}
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX);
+ pos = dd->pcidev->msix_cap;
if (nent && *nent && pos) {
qib_msix_setup(dd, pos, nent, entry);
ret = 0; /* did it, either MSIx or INTx */
} else {
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (pos)
ret = qib_msi_setup(dd, pos);
else
@@ -357,7 +357,7 @@
if (!dd->msi_lo)
goto bail;
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (!pos) {
qib_dev_err(dd,
"Can't find MSI capability, can't restore MSI settings\n");
@@ -426,7 +426,7 @@
if (new != cw)
pci_write_config_word(pdev, PCI_COMMAND, new);
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pos = pdev->msi_cap;
if (pos) {
/* then turn off MSI */
pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
@@ -434,7 +434,7 @@
if (new != cw)
pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
}
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ pos = pdev->msix_cap;
if (pos) {
/* then turn off MSIx */
pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw);
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 9b5322d..c6d6a54 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -423,8 +423,11 @@
void __qib_sdma_intr(struct qib_pportdata *ppd)
{
- if (__qib_sdma_running(ppd))
+ if (__qib_sdma_running(ppd)) {
qib_sdma_make_progress(ppd);
+ if (!list_empty(&ppd->sdma_userpending))
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ }
}
int qib_setup_sdma(struct qib_pportdata *ppd)
@@ -452,6 +455,9 @@
ppd->sdma_descq_removed = 0;
ppd->sdma_descq_added = 0;
+ ppd->sdma_intrequest = 0;
+ INIT_LIST_HEAD(&ppd->sdma_userpending);
+
INIT_LIST_HEAD(&ppd->sdma_activelist);
tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 8244208..d0a0ea0 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -53,20 +53,36 @@
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
struct qib_user_sdma_pkt {
- u8 naddr; /* dimension of addr (1..3) ... */
+ struct list_head list; /* list element */
+
+ u8 tiddma; /* if this is NEW tid-sdma */
+ u8 largepkt; /* this is large pkt from kmalloc */
+ u16 frag_size; /* frag size used by PSM */
+ u16 index; /* last header index or push index */
+ u16 naddr; /* dimension of addr (1..3) ... */
+ u16 addrlimit; /* addr array size */
+ u16 tidsmidx; /* current tidsm index */
+ u16 tidsmcount; /* tidsm array item count */
+ u16 payload_size; /* payload size so far for header */
+ u32 bytes_togo; /* bytes for processing */
u32 counter; /* sdma pkts queued counter for this entry */
+ struct qib_tid_session_member *tidsm; /* tid session member array */
+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
u64 added; /* global descq number of entries */
struct {
- u32 offset; /* offset for kvaddr, addr */
- u32 length; /* length in page */
- u8 put_page; /* should we put_page? */
- u8 dma_mapped; /* is page dma_mapped? */
+ u16 offset; /* offset for kvaddr, addr */
+ u16 length; /* length in page */
+ u16 first_desc; /* first desc */
+ u16 last_desc; /* last desc */
+ u16 put_page; /* should we put_page? */
+ u16 dma_mapped; /* is page dma_mapped? */
+ u16 dma_length; /* for dma_unmap_page() */
+ u16 padding;
struct page *page; /* may be NULL (coherent mem) */
void *kvaddr; /* FIXME: only for pio hack */
dma_addr_t addr;
} addr[4]; /* max pages, any more and we coalesce */
- struct list_head list; /* list element */
};
struct qib_user_sdma_queue {
@@ -77,6 +93,12 @@
*/
struct list_head sent;
+ /*
+ * Because above list will be accessed by both process and
+ * signal handler, we need a spinlock for it.
+ */
+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
+
/* headers with expected length are allocated from here... */
char header_cache_name[64];
struct dma_pool *header_cache;
@@ -88,6 +110,12 @@
/* as packets go on the queued queue, they are counted... */
u32 counter;
u32 sent_counter;
+ /* pending packets, not sending yet */
+ u32 num_pending;
+ /* sending packets, not complete yet */
+ u32 num_sending;
+ /* global descq number of entry of last sending packet */
+ u64 added;
/* dma page table */
struct rb_root dma_pages_root;
@@ -107,8 +135,12 @@
pq->counter = 0;
pq->sent_counter = 0;
- INIT_LIST_HEAD(&pq->sent);
+ pq->num_pending = 0;
+ pq->num_sending = 0;
+ pq->added = 0;
+ INIT_LIST_HEAD(&pq->sent);
+ spin_lock_init(&pq->sent_lock);
mutex_init(&pq->lock);
snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
@@ -144,34 +176,310 @@
}
static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
- int i, size_t offset, size_t len,
- int put_page, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+ int i, u16 offset, u16 len,
+ u16 first_desc, u16 last_desc,
+ u16 put_page, u16 dma_mapped,
+ struct page *page, void *kvaddr,
+ dma_addr_t dma_addr, u16 dma_length)
{
pkt->addr[i].offset = offset;
pkt->addr[i].length = len;
+ pkt->addr[i].first_desc = first_desc;
+ pkt->addr[i].last_desc = last_desc;
pkt->addr[i].put_page = put_page;
pkt->addr[i].dma_mapped = dma_mapped;
pkt->addr[i].page = page;
pkt->addr[i].kvaddr = kvaddr;
pkt->addr[i].addr = dma_addr;
+ pkt->addr[i].dma_length = dma_length;
}
-static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
- u32 counter, size_t offset,
- size_t len, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
{
- pkt->naddr = 1;
- pkt->counter = counter;
- qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
- kvaddr, dma_addr);
+ void *hdr;
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ dma_addr);
+ else
+ hdr = NULL;
+
+ if (!hdr) {
+ hdr = kmalloc(len, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = 0;
+ }
+
+ return hdr;
+}
+
+static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ struct page *page, u16 put,
+ u16 offset, u16 len, void *kvaddr)
+{
+ __le16 *pbc16;
+ void *pbcvaddr;
+ struct qib_message_header *hdr;
+ u16 newlen, pbclen, lastdesc, dma_mapped;
+ u32 vcto;
+ union qib_seqnum seqnum;
+ dma_addr_t pbcdaddr;
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ page, offset, len, DMA_TO_DEVICE);
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ /*
+ * dma mapping error, pkt has not managed
+ * this page yet, return the page here so
+ * the caller can ignore this page.
+ */
+ if (put) {
+ put_page(page);
+ } else {
+ /* coalesce case */
+ kunmap(page);
+ __free_page(page);
+ }
+ ret = -ENOMEM;
+ goto done;
+ }
+ offset = 0;
+ dma_mapped = 1;
+
+
+next_fragment:
+
+ /*
+ * In tid-sdma, the transfer length is restricted by
+ * receiver side current tid page length.
+ */
+ if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
+ newlen = pkt->tidsm[pkt->tidsmidx].length;
+ else
+ newlen = len;
+
+ /*
+ * Then the transfer length is restricted by MTU.
+ * the last descriptor flag is determined by:
+ * 1. the current packet is at frag size length.
+ * 2. the current tid page is done if tid-sdma.
+ * 3. there is no more byte togo if sdma.
+ */
+ lastdesc = 0;
+ if ((pkt->payload_size + newlen) >= pkt->frag_size) {
+ newlen = pkt->frag_size - pkt->payload_size;
+ lastdesc = 1;
+ } else if (pkt->tiddma) {
+ if (newlen == pkt->tidsm[pkt->tidsmidx].length)
+ lastdesc = 1;
+ } else {
+ if (newlen == pkt->bytes_togo)
+ lastdesc = 1;
+ }
+
+ /* fill the next fragment in this page */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ offset, newlen, /* offset, len */
+ 0, lastdesc, /* first last desc */
+ put, dma_mapped, /* put page, dma mapped */
+ page, kvaddr, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->bytes_togo -= newlen;
+ pkt->payload_size += newlen;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* If there is no more byte togo. (lastdesc==1) */
+ if (pkt->bytes_togo == 0) {
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ goto done;
+ }
+
+ /* If tid-sdma, advance tid info. */
+ if (pkt->tiddma) {
+ pkt->tidsm[pkt->tidsmidx].length -= newlen;
+ if (pkt->tidsm[pkt->tidsmidx].length) {
+ pkt->tidsm[pkt->tidsmidx].offset += newlen;
+ } else {
+ pkt->tidsmidx++;
+ if (pkt->tidsmidx == pkt->tidsmcount) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * If this is NOT the last descriptor. (newlen==len)
+ * the current packet is not done yet, but the current
+ * send side page is done.
+ */
+ if (lastdesc == 0)
+ goto done;
+
+ /*
+ * If running this driver under PSM with message size
+ * fitting into one transfer unit, it is not possible
+ * to pass this line. otherwise, it is a buggggg.
+ */
+
+ /*
+ * Since the current packet is done, and there are more
+ * bytes togo, we need to create a new sdma header, copying
+ * from previous sdma header and modify both.
+ */
+ pbclen = pkt->addr[pkt->index].length;
+ pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
+ if (!pbcvaddr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Copy the previous sdma header to new sdma header */
+ pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
+ memcpy(pbcvaddr, pbc16, pbclen);
+
+ /* Modify the previous sdma header */
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* turn on the header suppression */
+ hdr->iph.pkt_flags =
+ cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
+ /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
+ hdr->flags &= ~(0x04|0x20);
+ } else {
+ /* turn off extra bytes: 20-21 bits */
+ hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
+ /* turn off ACK_REQ: 0x04 */
+ hdr->flags &= ~(0x04);
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ /* Modify the new sdma header */
+ pbc16 = (__le16 *)pbcvaddr;
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* Set new tid and offset for new sdma header */
+ hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
+ (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
+ (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
+ (pkt->tidsm[pkt->tidsmidx].offset>>2));
+ } else {
+ /* Middle protocol new packet offset */
+ hdr->uwords[2] += pkt->payload_size;
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* Next sequence number in new sdma header */
+ seqnum.val = be32_to_cpu(hdr->bth[2]);
+ if (pkt->tiddma)
+ seqnum.seq++;
+ else
+ seqnum.pkt++;
+ hdr->bth[2] = cpu_to_be32(seqnum.val);
+
+ /* Init new sdma header. */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ 0, pbclen, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbcvaddr, /* struct page, virt addr */
+ pbcdaddr, pbclen); /* dma addr, dma length */
+ pkt->index = pkt->naddr;
+ pkt->payload_size = 0;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* Prepare for next fragment in this page */
+ if (newlen != len) {
+ if (dma_mapped) {
+ put = 0;
+ dma_mapped = 0;
+ page = NULL;
+ kvaddr = NULL;
+ }
+ len -= newlen;
+ offset += newlen;
+
+ goto next_fragment;
+ }
+
+done:
+ return ret;
}
/* we've too many pages in the iovec, coalesce to a single page */
static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov)
@@ -182,7 +490,6 @@
char *mpage;
int i;
int len = 0;
- dma_addr_t dma_addr;
if (!page) {
ret = -ENOMEM;
@@ -205,17 +512,8 @@
len += iov[i].iov_len;
}
- dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
- ret = -ENOMEM;
- goto free_unmap;
- }
-
- qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
- dma_addr);
- pkt->naddr = 2;
-
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ page, 0, 0, len, mpage_save);
goto done;
free_unmap:
@@ -238,16 +536,6 @@
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/*
- * Truncate length to page boundary.
- */
-static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
- const unsigned long offset = addr & ~PAGE_MASK;
-
- return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
static void qib_user_sdma_free_pkt_frag(struct device *dev,
struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
@@ -256,10 +544,11 @@
const int i = frag;
if (pkt->addr[i].page) {
+ /* only user data has page */
if (pkt->addr[i].dma_mapped)
dma_unmap_page(dev,
pkt->addr[i].addr,
- pkt->addr[i].length,
+ pkt->addr[i].dma_length,
DMA_TO_DEVICE);
if (pkt->addr[i].kvaddr)
@@ -269,55 +558,81 @@
put_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
- } else if (pkt->addr[i].kvaddr)
- /* free coherent mem from cache... */
- dma_pool_free(pq->header_cache,
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+ /* from kmalloc & dma mapped */
+ dma_unmap_single(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+ kfree(pkt->addr[i].kvaddr);
+ } else if (pkt->addr[i].addr) {
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
pkt->addr[i].kvaddr, pkt->addr[i].addr);
+ } else {
+ /* from kmalloc but not dma mapped */
+ kfree(pkt->addr[i].kvaddr);
+ }
+ }
}
/* return number of pages pinned... */
static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
unsigned long addr, int tlen, int npages)
{
- struct page *pages[2];
- int j;
- int ret;
+ struct page *pages[8];
+ int i, j;
+ int ret = 0;
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
+ while (npages) {
+ if (npages > 8)
+ j = 8;
+ else
+ j = npages;
- if (ret != npages) {
- int i;
-
- for (i = 0; i < ret; i++)
- put_page(pages[i]);
-
- ret = -ENOMEM;
- goto done;
- }
-
- for (j = 0; j < npages; j++) {
- /* map the pages... */
- const int flen = qib_user_sdma_page_length(addr, tlen);
- dma_addr_t dma_addr =
- dma_map_page(&dd->pcidev->dev,
- pages[j], 0, flen, DMA_TO_DEVICE);
- unsigned long fofs = addr & ~PAGE_MASK;
-
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = get_user_pages(current, current->mm, addr,
+ j, 0, 1, pages, NULL);
+ if (ret != j) {
+ i = 0;
+ j = ret;
ret = -ENOMEM;
- goto done;
+ goto free_pages;
}
- qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
- pages[j], kmap(pages[j]), dma_addr);
+ for (i = 0; i < j; i++) {
+ /* map the pages... */
+ unsigned long fofs = addr & ~PAGE_MASK;
+ int flen = ((fofs + tlen) > PAGE_SIZE) ?
+ (PAGE_SIZE - fofs) : tlen;
- pkt->naddr++;
- addr += flen;
- tlen -= flen;
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ pages[i], 1, fofs, flen, NULL);
+ if (ret < 0) {
+ /* current page has beed taken
+ * care of inside above call.
+ */
+ i++;
+ goto free_pages;
+ }
+
+ addr += flen;
+ tlen -= flen;
+ }
+
+ npages -= j;
}
+ goto done;
+
+ /* if error, return all pages not managed by pkt */
+free_pages:
+ while (i < j)
+ put_page(pages[i++]);
+
done:
return ret;
}
@@ -335,7 +650,7 @@
const int npages = qib_user_sdma_num_pages(iov + idx);
const unsigned long addr = (unsigned long) iov[idx].iov_base;
- ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+ ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
iov[idx].iov_len, npages);
if (ret < 0)
goto free_pkt;
@@ -344,9 +659,22 @@
goto done;
free_pkt:
- for (idx = 0; idx < pkt->naddr; idx++)
+ /* we need to ignore the first entry here */
+ for (idx = 1; idx < pkt->naddr; idx++)
qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+ /* need to dma unmap the first entry, this is to restore to
+ * the original state so that caller can free the memory in
+ * error condition. Caller does not know if dma mapped or not*/
+ if (pkt->addr[0].dma_mapped) {
+ dma_unmap_single(&dd->pcidev->dev,
+ pkt->addr[0].addr,
+ pkt->addr[0].dma_length,
+ DMA_TO_DEVICE);
+ pkt->addr[0].addr = 0;
+ pkt->addr[0].dma_mapped = 0;
+ }
+
done:
return ret;
}
@@ -359,8 +687,9 @@
{
int ret = 0;
- if (npages >= ARRAY_SIZE(pkt->addr))
- ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+ if (pkt->frag_size == pkt->bytes_togo &&
+ npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
else
ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
@@ -380,7 +709,10 @@
for (i = 0; i < pkt->naddr; i++)
qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
- kmem_cache_free(pq->pkt_slab, pkt);
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
}
INIT_LIST_HEAD(list);
}
@@ -393,63 +725,48 @@
* as, if there is an error we clean it...
*/
static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
- struct list_head *list,
const struct iovec *iov,
unsigned long niov,
- int maxpkts)
+ struct list_head *list,
+ int *maxpkts, int *ndesc)
{
unsigned long idx = 0;
int ret = 0;
int npkts = 0;
- struct page *page = NULL;
__le32 *pbc;
dma_addr_t dma_addr;
struct qib_user_sdma_pkt *pkt = NULL;
size_t len;
size_t nw;
u32 counter = pq->counter;
- int dma_mapped = 0;
+ u16 frag_size;
- while (idx < niov && npkts < maxpkts) {
+ while (idx < niov && npkts < *maxpkts) {
const unsigned long addr = (unsigned long) iov[idx].iov_base;
const unsigned long idx_save = idx;
unsigned pktnw;
unsigned pktnwc;
int nfrags = 0;
int npages = 0;
+ int bytes_togo = 0;
+ int tiddma = 0;
int cfur;
- dma_mapped = 0;
len = iov[idx].iov_len;
nw = len >> 2;
- page = NULL;
-
- pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
- if (!pkt) {
- ret = -ENOMEM;
- goto free_list;
- }
if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
len > PAGE_SIZE || len & 3 || addr & 3) {
ret = -EINVAL;
- goto free_pkt;
+ goto free_list;
}
- if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
- pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
- &dma_addr);
- else
- pbc = NULL;
-
+ pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
if (!pbc) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto free_pkt;
- }
- pbc = kmap(page);
+ ret = -ENOMEM;
+ goto free_list;
}
cfur = copy_from_user(pbc, iov[idx].iov_base, len);
@@ -474,8 +791,8 @@
* we can verify that the packet is consistent with the
* iovec lengths.
*/
- pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
- if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ pktnw = le32_to_cpu(*pbc) & 0xFFFF;
+ if (pktnw < pktnwc) {
ret = -EINVAL;
goto free_pbc;
}
@@ -486,17 +803,14 @@
const unsigned long faddr =
(unsigned long) iov[idx].iov_base;
- if (slen & 3 || faddr & 3 || !slen ||
- slen > PAGE_SIZE) {
+ if (slen & 3 || faddr & 3 || !slen) {
ret = -EINVAL;
goto free_pbc;
}
- npages++;
- if ((faddr & PAGE_MASK) !=
- ((faddr + slen - 1) & PAGE_MASK))
- npages++;
+ npages += qib_user_sdma_num_pages(&iov[idx]);
+ bytes_togo += slen;
pktnwc += slen >> 2;
idx++;
nfrags++;
@@ -507,48 +821,139 @@
goto free_pbc;
}
- if (page) {
- dma_addr = dma_map_page(&dd->pcidev->dev,
- page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
+ if (((frag_size ? frag_size : bytes_togo) + len) >
+ ppd->ibmaxlen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (frag_size) {
+ int pktsize, tidsmsize, n;
+
+ n = npages*((2*PAGE_SIZE/frag_size)+1);
+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+
+ /*
+ * Determine if this is tid-sdma or just sdma.
+ */
+ tiddma = (((le32_to_cpu(pbc[7])>>
+ QLOGIC_IB_I_TID_SHIFT)&
+ QLOGIC_IB_I_TID_MASK) !=
+ QLOGIC_IB_I_TID_MASK);
+
+ if (tiddma)
+ tidsmsize = iov[idx].iov_len;
+ else
+ tidsmsize = 0;
+
+ pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+ if (!pkt) {
ret = -ENOMEM;
goto free_pbc;
}
+ pkt->largepkt = 1;
+ pkt->frag_size = frag_size;
+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
- dma_mapped = 1;
+ if (tiddma) {
+ char *tidsm = (char *)pkt + pktsize;
+ cfur = copy_from_user(tidsm,
+ iov[idx].iov_base, tidsmsize);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pkt;
+ }
+ pkt->tidsm =
+ (struct qib_tid_session_member *)tidsm;
+ pkt->tidsmcount = tidsmsize/
+ sizeof(struct qib_tid_session_member);
+ pkt->tidsmidx = 0;
+ idx++;
+ }
+
+ /*
+ * pbc 'fill1' field is borrowed to pass frag size,
+ * we need to clear it after picking frag size, the
+ * hardware requires this field to be zero.
+ */
+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
+ } else {
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 0;
+ pkt->frag_size = bytes_togo;
+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
}
+ pkt->bytes_togo = bytes_togo;
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
- qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
- page, pbc, dma_addr);
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+ 0, len, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbc, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->index = 0;
+ pkt->naddr = 1;
if (nfrags) {
ret = qib_user_sdma_init_payload(dd, pq, pkt,
iov + idx_save + 1,
nfrags, npages);
if (ret < 0)
- goto free_pbc_dma;
+ goto free_pkt;
+ } else {
+ /* since there is no payload, mark the
+ * header as the last desc. */
+ pkt->addr[0].last_desc = 1;
+
+ if (dma_addr == 0) {
+ /*
+ * the header is not dma mapped yet.
+ * it should be from kmalloc.
+ */
+ dma_addr = dma_map_single(&dd->pcidev->dev,
+ pbc, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pkt->addr[0].addr = dma_addr;
+ pkt->addr[0].dma_mapped = 1;
+ }
}
counter++;
npkts++;
+ pkt->pq = pq;
+ pkt->index = 0; /* reset index for push on hw */
+ *ndesc += pkt->naddr;
list_add_tail(&pkt->list, list);
}
+ *maxpkts = npkts;
ret = idx;
goto done;
-free_pbc_dma:
- if (dma_mapped)
- dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
- if (page) {
- kunmap(page);
- __free_page(page);
- } else
- dma_pool_free(pq->header_cache, pbc, dma_addr);
free_pkt:
- kmem_cache_free(pq->pkt_slab, pkt);
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
+free_pbc:
+ if (dma_addr)
+ dma_pool_free(pq->header_cache, pbc, dma_addr);
+ else
+ kfree(pbc);
free_list:
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
done:
@@ -569,10 +974,20 @@
struct list_head free_list;
struct qib_user_sdma_pkt *pkt;
struct qib_user_sdma_pkt *pkt_prev;
+ unsigned long flags;
int ret = 0;
+ if (!pq->num_sending)
+ return 0;
+
INIT_LIST_HEAD(&free_list);
+ /*
+ * We need this spin lock here because interrupt handler
+ * might modify this list in qib_user_sdma_send_desc(), also
+ * we can not get interrupted, otherwise it is a deadlock.
+ */
+ spin_lock_irqsave(&pq->sent_lock, flags);
list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
s64 descd = ppd->sdma_descq_removed - pkt->added;
@@ -583,7 +998,9 @@
/* one more packet cleaned */
ret++;
+ pq->num_sending--;
}
+ spin_unlock_irqrestore(&pq->sent_lock, flags);
if (!list_empty(&free_list)) {
u32 counter;
@@ -627,6 +1044,7 @@
struct qib_user_sdma_queue *pq)
{
struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
int i;
if (!pq)
@@ -634,7 +1052,7 @@
for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
mutex_lock(&pq->lock);
- if (list_empty(&pq->sent)) {
+ if (!pq->num_pending && !pq->num_sending) {
mutex_unlock(&pq->lock);
break;
}
@@ -644,29 +1062,44 @@
msleep(10);
}
- if (!list_empty(&pq->sent)) {
+ if (pq->num_pending || pq->num_sending) {
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
struct list_head free_list;
+ mutex_lock(&pq->lock);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Since we hold sdma_lock, it is safe without sent_lock.
+ */
+ if (pq->num_pending) {
+ list_for_each_entry_safe(pkt, pkt_prev,
+ &ppd->sdma_userpending, list) {
+ if (pkt->pq == pq) {
+ list_move_tail(&pkt->list, &pq->sent);
+ pq->num_pending--;
+ pq->num_sending++;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
INIT_LIST_HEAD(&free_list);
- mutex_lock(&pq->lock);
list_splice_init(&pq->sent, &free_list);
+ pq->num_sending = 0;
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
mutex_unlock(&pq->lock);
}
}
-static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+static inline __le64 qib_sdma_make_desc0(u8 gen,
u64 addr, u64 dwlen, u64 dwoffset)
{
- u8 tmpgen;
-
- tmpgen = ppd->sdma_generation;
-
return cpu_to_le64(/* SDmaPhyAddr[31:0] */
((addr & 0xfffffffcULL) << 32) |
/* SDmaGeneration[1:0] */
- ((tmpgen & 3ULL) << 30) |
+ ((gen & 3ULL) << 30) |
/* SDmaDwordCount[10:0] */
((dwlen & 0x7ffULL) << 16) |
/* SDmaBufOffset[12:2] */
@@ -692,7 +1125,7 @@
static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
struct qib_user_sdma_pkt *pkt, int idx,
- unsigned ofs, u16 tail)
+ unsigned ofs, u16 tail, u8 gen)
{
const u64 addr = (u64) pkt->addr[idx].addr +
(u64) pkt->addr[idx].offset;
@@ -702,104 +1135,132 @@
descqp = &ppd->sdma_descq[tail].qw[0];
- descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
- if (idx == 0)
+ descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
+ if (pkt->addr[idx].first_desc)
descq0 = qib_sdma_make_first_desc0(descq0);
- if (idx == pkt->naddr - 1)
+ if (pkt->addr[idx].last_desc) {
descq0 = qib_sdma_make_last_desc0(descq0);
+ if (ppd->sdma_intrequest) {
+ descq0 |= cpu_to_le64(1ULL << 15);
+ ppd->sdma_intrequest = 0;
+ }
+ }
descqp[0] = descq0;
descqp[1] = qib_sdma_make_desc1(addr);
}
-/* pq->lock must be held, get packets on the wire... */
-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
- struct qib_user_sdma_queue *pq,
- struct list_head *pktlist)
+void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
+ struct list_head *pktlist)
{
struct qib_devdata *dd = ppd->dd;
+ u16 nfree, nsent;
+ u16 tail, tail_c;
+ u8 gen, gen_c;
+
+ nfree = qib_sdma_descq_freecnt(ppd);
+ if (!nfree)
+ return;
+
+retry:
+ nsent = 0;
+ tail_c = tail = ppd->sdma_descq_tail;
+ gen_c = gen = ppd->sdma_generation;
+ while (!list_empty(pktlist)) {
+ struct qib_user_sdma_pkt *pkt =
+ list_entry(pktlist->next, struct qib_user_sdma_pkt,
+ list);
+ int i, j, c = 0;
+ unsigned ofs = 0;
+ u16 dtail = tail;
+
+ for (i = pkt->index; i < pkt->naddr && nfree; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
+ ofs += pkt->addr[i].length >> 2;
+
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ ++gen;
+ ppd->sdma_intrequest = 1;
+ } else if (tail == (ppd->sdma_descq_cnt>>1)) {
+ ppd->sdma_intrequest = 1;
+ }
+ nfree--;
+ if (pkt->addr[i].last_desc == 0)
+ continue;
+
+ /*
+ * If the packet is >= 2KB mtu equivalent, we
+ * have to use the large buffers, and have to
+ * mark each descriptor as part of a large
+ * buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (j = pkt->index; j <= i; j++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
+ }
+ c += i + 1 - pkt->index;
+ pkt->index = i + 1; /* index for next first */
+ tail_c = dtail = tail;
+ gen_c = gen;
+ ofs = 0; /* reset for next packet */
+ }
+
+ ppd->sdma_descq_added += c;
+ nsent += c;
+ if (pkt->index == pkt->naddr) {
+ pkt->added = ppd->sdma_descq_added;
+ pkt->pq->added = pkt->added;
+ pkt->pq->num_pending--;
+ spin_lock(&pkt->pq->sent_lock);
+ pkt->pq->num_sending++;
+ list_move_tail(&pkt->list, &pkt->pq->sent);
+ spin_unlock(&pkt->pq->sent_lock);
+ }
+ if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
+ break;
+ }
+
+ /* advance the tail on the chip if necessary */
+ if (ppd->sdma_descq_tail != tail_c) {
+ ppd->sdma_generation = gen_c;
+ dd->f_sdma_update_tail(ppd, tail_c);
+ }
+
+ if (nfree && !list_empty(pktlist))
+ goto retry;
+
+ return;
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist, int count)
+{
int ret = 0;
unsigned long flags;
- u16 tail;
- u8 generation;
- u64 descq_added;
-
- if (list_empty(pktlist))
- return 0;
if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
return -ECOMM;
spin_lock_irqsave(&ppd->sdma_lock, flags);
- /* keep a copy for restoring purposes in case of problems */
- generation = ppd->sdma_generation;
- descq_added = ppd->sdma_descq_added;
-
if (unlikely(!__qib_sdma_running(ppd))) {
ret = -ECOMM;
goto unlock;
}
- tail = ppd->sdma_descq_tail;
- while (!list_empty(pktlist)) {
- struct qib_user_sdma_pkt *pkt =
- list_entry(pktlist->next, struct qib_user_sdma_pkt,
- list);
- int i;
- unsigned ofs = 0;
- u16 dtail = tail;
-
- if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
- goto unlock_check_tail;
-
- for (i = 0; i < pkt->naddr; i++) {
- qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
- ofs += pkt->addr[i].length >> 2;
-
- if (++tail == ppd->sdma_descq_cnt) {
- tail = 0;
- ++ppd->sdma_generation;
- }
- }
-
- if ((ofs << 2) > ppd->ibmaxlen) {
- ret = -EMSGSIZE;
- goto unlock;
- }
-
- /*
- * If the packet is >= 2KB mtu equivalent, we have to use
- * the large buffers, and have to mark each descriptor as
- * part of a large buffer packet.
- */
- if (ofs > dd->piosize2kmax_dwords) {
- for (i = 0; i < pkt->naddr; i++) {
- ppd->sdma_descq[dtail].qw[0] |=
- cpu_to_le64(1ULL << 14);
- if (++dtail == ppd->sdma_descq_cnt)
- dtail = 0;
- }
- }
-
- ppd->sdma_descq_added += pkt->naddr;
- pkt->added = ppd->sdma_descq_added;
- list_move_tail(&pkt->list, &pq->sent);
- ret++;
- }
-
-unlock_check_tail:
- /* advance the tail on the chip if necessary */
- if (ppd->sdma_descq_tail != tail)
- dd->f_sdma_update_tail(ppd, tail);
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
unlock:
- if (unlikely(ret < 0)) {
- ppd->sdma_generation = generation;
- ppd->sdma_descq_added = descq_added;
- }
spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-
return ret;
}
@@ -822,19 +1283,23 @@
if (!qib_sdma_running(ppd))
goto done_unlock;
- if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+ /* if I have packets not complete yet */
+ if (pq->added > ppd->sdma_descq_removed)
qib_user_sdma_hwqueue_clean(ppd);
+ /* if I have complete packets to be freed */
+ if (pq->num_sending)
qib_user_sdma_queue_clean(ppd, pq);
- }
while (dim) {
- const int mxp = 8;
+ int mxp = 8;
+ int ndesc = 0;
down_write(¤t->mm->mmap_sem);
- ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+ ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
+ iov, dim, &list, &mxp, &ndesc);
up_write(¤t->mm->mmap_sem);
- if (ret <= 0)
+ if (ret < 0)
goto done_unlock;
else {
dim -= ret;
@@ -844,24 +1309,20 @@
/* force packets onto the sdma hw queue... */
if (!list_empty(&list)) {
/*
- * Lazily clean hw queue. the 4 is a guess of about
- * how many sdma descriptors a packet will take (it
- * doesn't have to be perfect).
+ * Lazily clean hw queue.
*/
- if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+ if (qib_sdma_descq_freecnt(ppd) < ndesc) {
qib_user_sdma_hwqueue_clean(ppd);
- qib_user_sdma_queue_clean(ppd, pq);
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
}
- ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
if (ret < 0)
goto done_unlock;
else {
- npkts += ret;
- pq->counter += ret;
-
- if (!list_empty(&list))
- goto done_unlock;
+ npkts += mxp;
+ pq->counter += mxp;
}
}
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3eceb61..7a31754 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -817,7 +817,6 @@
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1234,7 +1233,6 @@
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1325,7 +1323,6 @@
neigh = p->neigh;
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
}
list_del(&p->list);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c6f71a8..82cec1a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -493,7 +493,6 @@
path,
neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
continue;
}
@@ -618,7 +617,6 @@
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
goto err_drop;
}
@@ -639,7 +637,7 @@
neigh->ah = NULL;
if (!path->query && path_rec_start(dev, path))
- goto err_list;
+ goto err_path;
__skb_queue_tail(&neigh->queue, skb);
}
@@ -648,9 +646,6 @@
ipoib_neigh_put(neigh);
return;
-err_list:
- list_del(&neigh->list);
-
err_path:
ipoib_neigh_free(neigh);
err_drop:
@@ -1098,6 +1093,8 @@
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
+ /* remove from parent list */
+ list_del(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2e84ef8..705de7b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -347,6 +347,7 @@
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_iser_conn *iser_conn;
+ struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
int error;
@@ -365,7 +366,8 @@
}
ib_conn = ep->dd_data;
- if (iser_alloc_rx_descriptors(ib_conn))
+ session = conn->session;
+ if (iser_alloc_rx_descriptors(ib_conn, session))
return -ENOMEM;
/* binds the iSER connection retrieved from the previously
@@ -419,12 +421,13 @@
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn;
+ struct iser_conn *ib_conn = NULL;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
return NULL;
shost->transportt = iscsi_iser_scsi_transport;
+ shost->cmd_per_lun = qdepth;
shost->max_lun = iscsi_max_lun;
shost->max_id = 0;
shost->max_channel = 0;
@@ -441,12 +444,14 @@
ep ? ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
- /*
- * we do not support setting can_queue cmd_per_lun from userspace yet
- * because we preallocate so many resources
- */
+ if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ iser_info("cmds_max changed from %u to %u\n",
+ cmds_max, ISER_DEF_XMIT_CMDS_MAX);
+ cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ }
+
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
- ISCSI_DEF_XMIT_CMDS_MAX, 0,
+ cmds_max, 0,
sizeof(struct iscsi_iser_task),
initial_cmdsn, 0);
if (!cls_session)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 4f069c0..6791402 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -78,14 +78,14 @@
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (iser_debug_level > 0) \
pr_warn(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (iser_debug_level > 1) \
pr_info(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
@@ -102,7 +102,13 @@
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
-#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX
+#define ISER_DEF_XMIT_CMDS_DEFAULT 512
+#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
+ #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
+#else
+ #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT
+#endif
+#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX
/* QP settings */
/* Maximal bounds on received asynchronous PDUs */
@@ -111,9 +117,9 @@
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
* SCSI_TMFUNC(2), LOGOUT(1) */
-#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
-#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -123,7 +129,7 @@
#define ISER_INFLIGHT_DATAOUTS 8
-#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
(1 + ISER_INFLIGHT_DATAOUTS) + \
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
@@ -205,7 +211,7 @@
u64 va;
u64 len;
void *mem_h;
- int is_fmr;
+ int is_mr;
};
struct iser_regd_buf {
@@ -246,6 +252,9 @@
#define ISER_MAX_CQ 4
+struct iser_conn;
+struct iscsi_iser_task;
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
@@ -259,6 +268,22 @@
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
+ int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ unsigned cmds_max);
+ void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+ void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+};
+
+struct fast_reg_descriptor {
+ struct list_head list;
+ /* For fast registration - FRWR */
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ /* Valid for fast registration flag */
+ bool valid;
};
struct iser_conn {
@@ -270,13 +295,13 @@
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
wait_queue_head_t wait; /* waitq for conn/disconn */
+ unsigned qp_max_recv_dtos; /* num of rx buffers */
+ unsigned qp_max_recv_dtos_mask; /* above minus 1 */
+ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -285,6 +310,17 @@
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ union {
+ struct {
+ struct ib_fmr_pool *pool; /* pool of IB FMRs */
+ struct iser_page_vec *page_vec; /* represents SG to fmr maps*
+ * maps serialized as tx is*/
+ } fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } frwr;
+ } fastreg;
};
struct iscsi_iser_conn {
@@ -368,8 +404,10 @@
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -380,7 +418,10 @@
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
-void iser_unreg_mem(struct iser_mem_reg *mem_reg);
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -394,5 +435,9 @@
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index b6d81a8..5f01da9 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,6 +49,7 @@
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -69,7 +70,7 @@
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -98,6 +99,7 @@
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -119,7 +121,7 @@
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -170,8 +172,78 @@
}
}
+static void iser_free_login_buf(struct iser_conn *ib_conn)
+{
+ if (!ib_conn->login_buf)
+ return;
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+ if (ib_conn->login_req_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ if (ib_conn->login_resp_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_resp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ kfree(ib_conn->login_buf);
+
+ /* make sure we never redo any unmapping */
+ ib_conn->login_req_dma = 0;
+ ib_conn->login_resp_dma = 0;
+ ib_conn->login_buf = NULL;
+}
+
+static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ int req_err, resp_err;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
+
+ ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!ib_conn->login_buf)
+ goto out_err;
+
+ ib_conn->login_req_buf = ib_conn->login_buf;
+ ib_conn->login_resp_buf = ib_conn->login_buf +
+ ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+ ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ req_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_req_dma);
+ resp_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_resp_dma);
+
+ if (req_err || resp_err) {
+ if (req_err)
+ ib_conn->login_req_dma = 0;
+ if (resp_err)
+ ib_conn->login_resp_dma = 0;
+ goto free_login_buf;
+ }
+ return 0;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
+
+out_err:
+ iser_err("unable to alloc or map login buf\n");
+ return -ENOMEM;
+}
+
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
@@ -179,14 +251,24 @@
struct ib_sge *rx_sg;
struct iser_device *device = ib_conn->device;
- ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+ ib_conn->qp_max_recv_dtos = session->cmds_max;
+ ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+
+ if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
+ goto create_rdma_reg_res_failed;
+
+ if (iser_alloc_login_buf(ib_conn))
+ goto alloc_login_buf_fail;
+
+ ib_conn->rx_descs = kmalloc(session->cmds_max *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!ib_conn->rx_descs)
goto rx_desc_alloc_fail;
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -207,10 +289,14 @@
rx_desc = ib_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
+ iser_free_login_buf(ib_conn);
+alloc_login_buf_fail:
+ device->iser_free_rdma_reg_res(ib_conn);
+create_rdma_reg_res_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
}
@@ -222,13 +308,21 @@
struct iser_device *device = ib_conn->device;
if (!ib_conn->rx_descs)
- return;
+ goto free_login_buf;
+
+ if (device->iser_free_rdma_reg_res)
+ device->iser_free_rdma_reg_res(ib_conn);
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
+ /* make sure we never redo any unmapping */
+ ib_conn->rx_descs = NULL;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
@@ -248,9 +342,10 @@
WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
- iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
+ iser_dbg("Initially post: %d\n", iser_conn->ib_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+ if (iser_post_recvm(iser_conn->ib_conn,
+ iser_conn->ib_conn->min_posted_rx))
return -ENOMEM;
return 0;
@@ -487,9 +582,9 @@
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
- count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
- ISER_MIN_POSTED_RX);
+ if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
+ count = min(ib_conn->qp_max_recv_dtos - outstanding,
+ ib_conn->min_posted_rx);
err = iser_post_recvm(ib_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
@@ -538,8 +633,8 @@
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
int is_rdma_aligned = 1;
- struct iser_regd_buf *regd;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@@ -553,17 +648,11 @@
iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
}
- if (iser_task->dir[ISER_DIR_IN]) {
- regd = &iser_task->rdma_regd[ISER_DIR_IN];
- if (regd->reg.is_fmr)
- iser_unreg_mem(®d->reg);
- }
+ if (iser_task->dir[ISER_DIR_IN])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
- if (iser_task->dir[ISER_DIR_OUT]) {
- regd = &iser_task->rdma_regd[ISER_DIR_OUT];
- if (regd->reg.is_fmr)
- iser_unreg_mem(®d->reg);
- }
+ if (iser_task->dir[ISER_DIR_OUT])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 7827baf..1ce0c97 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -170,8 +170,8 @@
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct iser_page_vec *page_vec,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u64 *pages,
+ int *offset, int *data_size)
{
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
@@ -180,7 +180,7 @@
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
- page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ *offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
@@ -204,13 +204,14 @@
which might be unaligned */
page = chunk_start & MASK_4K;
do {
- page_vec->pages[cur_page++] = page;
+ pages[cur_page++] = page;
page += SIZE_4K;
} while (page < end_addr);
}
- page_vec->data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
+ *data_size = total_sz;
+ iser_dbg("page_vec->data_size:%d cur_page %d\n",
+ *data_size, cur_page);
return cur_page;
}
@@ -267,11 +268,8 @@
struct scatterlist *sg;
int i;
- if (iser_debug_level == 0)
- return;
-
for_each_sg(sgl, sg, data->dma_nents, i)
- iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
+ iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
sg_page(sg), sg->offset,
@@ -298,8 +296,10 @@
page_vec->offset = 0;
iser_dbg("Translating sg sz: %d\n", data->dma_nents);
- page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);
- iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
+ page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
+ &page_vec->offset,
+ &page_vec->data_size);
+ iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
page_vec->length = page_vec_len;
@@ -347,16 +347,41 @@
}
}
+static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
+ struct ib_device *ibdev,
+ enum iser_data_dir cmd_dir,
+ int aligned_len)
+{
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+
+ iscsi_conn->fmr_unalign_cnt++;
+ iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+ aligned_len, mem->size);
+
+ if (iser_debug_level > 0)
+ iser_data_buf_dump(mem, ibdev);
+
+ /* unmap the command data before accessing it */
+ iser_dma_unmap_task_data(iser_task);
+
+ /* allocate copy buf, if we are writing, copy the */
+ /* unaligned scatterlist, dma map the copy */
+ if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
/**
- * iser_reg_rdma_mem - Registers memory intended for RDMA,
- * obtaining rkey and va
+ * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
+ * using FMR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
-int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
@@ -370,20 +395,13 @@
regd_buf = &iser_task->rdma_regd[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents ||
- (!ib_conn->fmr_pool && mem->dma_nents > 1)) {
- iscsi_conn->fmr_unalign_cnt++;
- iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
- iser_data_buf_dump(mem, ibdev);
-
- /* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task);
-
- /* allocate copy buf, if we are writing, copy the */
- /* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
- return -ENOMEM;
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
mem = &iser_task->data_copy[cmd_dir];
}
@@ -395,7 +413,7 @@
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_fmr = 0;
+ regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -404,22 +422,159 @@
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg);
+ iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
+ err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+ ®d_buf->reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
mem->dma_nents,
ntoh24(iser_task->desc.iscsi_header.dlength));
iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->page_vec->data_size, ib_conn->page_vec->length,
- ib_conn->page_vec->offset);
- for (i=0 ; i<ib_conn->page_vec->length ; i++)
+ ib_conn->fastreg.fmr.page_vec->data_size,
+ ib_conn->fastreg.fmr.page_vec->length,
+ ib_conn->fastreg.fmr.page_vec->offset);
+ for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->page_vec->pages[i]);
+ (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
}
if (err)
return err;
}
return 0;
}
+
+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+ struct iser_conn *ib_conn,
+ struct iser_regd_buf *regd_buf,
+ u32 offset, unsigned int data_size,
+ unsigned int page_list_len)
+{
+ struct ib_send_wr fastreg_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret;
+
+ if (!desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
+ fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+ fastreg_wr.wr.fast_reg.length = data_size;
+ fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+ fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ if (!wr) {
+ wr = &fastreg_wr;
+ atomic_inc(&ib_conn->post_send_buf_count);
+ } else {
+ wr->next = &fastreg_wr;
+ atomic_add(2, &ib_conn->post_send_buf_count);
+ }
+
+ ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+ if (ret) {
+ if (bad_wr->next)
+ atomic_sub(2, &ib_conn->post_send_buf_count);
+ else
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ desc->valid = false;
+
+ regd_buf->reg.mem_h = desc;
+ regd_buf->reg.lkey = desc->data_mr->lkey;
+ regd_buf->reg.rkey = desc->data_mr->rkey;
+ regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
+ regd_buf->reg.len = data_size;
+ regd_buf->reg.is_mr = 1;
+
+ return ret;
+}
+
+/**
+ * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * using Fast Registration WR (if possible) obtaining rkey and va
+ *
+ * returns 0 on success, errno code on failure
+ */
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_device *ibdev = device->ib_device;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+ struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+ struct fast_reg_descriptor *desc;
+ unsigned int data_size, page_list_len;
+ int err, aligned_len;
+ unsigned long flags;
+ u32 offset;
+
+ aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
+ mem = &iser_task->data_copy[cmd_dir];
+ }
+
+ /* if there a single dma entry, dma mr suffices */
+ if (mem->dma_nents == 1) {
+ struct scatterlist *sg = (struct scatterlist *)mem->buf;
+
+ regd_buf->reg.lkey = device->mr->lkey;
+ regd_buf->reg.rkey = device->mr->rkey;
+ regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
+ regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
+ regd_buf->reg.is_mr = 0;
+ } else {
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+ struct fast_reg_descriptor, list);
+ list_del(&desc->list);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
+ desc->data_frpl->page_list,
+ &offset, &data_size);
+
+ if (page_list_len * SIZE_4K < data_size) {
+ iser_err("fast reg page_list too short to hold this SG\n");
+ err = -EINVAL;
+ goto err_reg;
+ }
+
+ err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
+ offset, data_size, page_list_len);
+ if (err)
+ goto err_reg;
+ }
+
+ return 0;
+err_reg:
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ return err;
+}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2c4941d..afe9567 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -73,6 +73,36 @@
{
int i, j;
struct iser_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
+
+ dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
+ if (!dev_attr)
+ return -ENOMEM;
+
+ if (ib_query_device(device->ib_device, dev_attr)) {
+ pr_warn("Query device failed for %s\n", device->ib_device->name);
+ goto dev_attr_err;
+ }
+
+ /* Assign function handles - based on FMR support */
+ if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+ device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ iser_info("FMR supported, using FMR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+ device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+ } else
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ iser_info("FRWR supported, using FRWR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
+ device->iser_free_rdma_reg_res = iser_free_frwr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+ } else {
+ iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
+ goto dev_attr_err;
+ }
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -128,6 +158,7 @@
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
+ kfree(dev_attr);
return 0;
handler_err:
@@ -147,6 +178,8 @@
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
+dev_attr_err:
+ kfree(dev_attr);
return -1;
}
@@ -178,56 +211,23 @@
}
/**
- * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
+ * iser_create_fmr_pool - Creates FMR pool and page_vector
*
- * returns 0 on success, -1 on failure
+ * returns 0 on success, or errno code on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
{
- struct iser_device *device;
- struct ib_qp_init_attr init_attr;
- int req_err, resp_err, ret = -ENOMEM;
+ struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
- int index, min_index = 0;
+ int ret = -ENOMEM;
- BUG_ON(ib_conn->device == NULL);
+ ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+ GFP_KERNEL);
+ if (!ib_conn->fastreg.fmr.page_vec)
+ return ret;
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
- goto out_err;
-
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- ib_conn->login_req_dma = 0;
- if (resp_err)
- ib_conn->login_resp_dma = 0;
- goto out_err;
- }
-
- ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
- GFP_KERNEL);
- if (!ib_conn->page_vec)
- goto out_err;
-
- ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
+ ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -235,24 +235,143 @@
params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
/* make the pool size twice the max number of SCSI commands *
* the ML is expected to queue, watermark for unmap at 50% */
- params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
- params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
+ params.pool_size = cmds_max * 2;
+ params.dirty_watermark = cmds_max;
params.cache = 0;
params.flush_function = NULL;
params.access = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms);
- ret = PTR_ERR(ib_conn->fmr_pool);
- if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) {
- ib_conn->fmr_pool = NULL;
- goto out_err;
- } else if (ret == -ENOSYS) {
- ib_conn->fmr_pool = NULL;
+ ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms);
+ if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+ return 0;
+
+ /* no FMR => no need for page_vec */
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+
+ ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
+ ib_conn->fastreg.fmr.pool = NULL;
+ if (ret != -ENOSYS) {
+ iser_err("FMR allocation failed, err %d\n", ret);
+ return ret;
+ } else {
iser_warn("FMRs are not supported, using unaligned mode\n");
- ret = 0;
+ return 0;
}
+}
+
+/**
+ * iser_free_fmr_pool - releases the FMR pool and page vec
+ */
+void iser_free_fmr_pool(struct iser_conn *ib_conn)
+{
+ iser_info("freeing conn %p fmr pool %p\n",
+ ib_conn, ib_conn->fastreg.fmr.pool);
+
+ if (ib_conn->fastreg.fmr.pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+
+ ib_conn->fastreg.fmr.pool = NULL;
+
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+}
+
+/**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
+ int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size = 0;
+ for (i = 0; i < cmds_max; i++) {
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ iser_err("Failed to allocate a new fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_frpl)) {
+ ret = PTR_ERR(desc->data_frpl);
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
+ goto fast_reg_page_failure;
+ }
+
+ desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_mr)) {
+ ret = PTR_ERR(desc->data_mr);
+ iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+ goto fast_reg_mr_failure;
+ }
+ desc->valid = true;
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size++;
+ }
+
+ return 0;
+
+fast_reg_mr_failure:
+ ib_free_fast_reg_page_list(desc->data_frpl);
+fast_reg_page_failure:
+ kfree(desc);
+err:
+ iser_free_frwr_pool(ib_conn);
+ return ret;
+}
+
+/**
+ * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ */
+void iser_free_frwr_pool(struct iser_conn *ib_conn)
+{
+ struct fast_reg_descriptor *desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&ib_conn->fastreg.frwr.pool))
+ return;
+
+ iser_info("freeing conn %p frwr pool\n", ib_conn);
+
+ list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+ list_del(&desc->list);
+ ib_free_fast_reg_page_list(desc->data_frpl);
+ ib_dereg_mr(desc->data_mr);
+ kfree(desc);
+ ++i;
+ }
+
+ if (i < ib_conn->fastreg.frwr.pool_size)
+ iser_warn("pool still has %d regions registered\n",
+ ib_conn->fastreg.frwr.pool_size - i);
+}
+
+/**
+ * iser_create_ib_conn_res - Queue-Pair (QP)
+ *
+ * returns 0 on success, -1 on failure
+ */
+static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ struct ib_qp_init_attr init_attr;
+ int ret = -ENOMEM;
+ int index, min_index = 0;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
memset(&init_attr, 0, sizeof init_attr);
@@ -282,9 +401,9 @@
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
+ iser_info("setting conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->cma_id->qp);
+ ib_conn->cma_id->qp);
return ret;
out_err:
@@ -293,7 +412,7 @@
}
/**
- * releases the FMR pool and QP objects, returns 0 on success,
+ * releases the QP objects, returns 0 on success,
* -1 on failure
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
@@ -301,13 +420,11 @@
int cq_index;
BUG_ON(ib_conn == NULL);
- iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n",
+ iser_info("freeing conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->qp);
+ ib_conn->qp);
/* qp is created only once both addr & route are resolved */
- if (ib_conn->fmr_pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL) {
cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
@@ -316,21 +433,7 @@
rdma_destroy_qp(ib_conn->cma_id);
}
- ib_conn->fmr_pool = NULL;
ib_conn->qp = NULL;
- kfree(ib_conn->page_vec);
-
- if (ib_conn->login_buf) {
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
- }
return 0;
}
@@ -694,7 +797,7 @@
page_list = page_vec->pages;
io_addr = page_list[0];
- mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
+ mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
page_list,
page_vec->length,
io_addr);
@@ -709,7 +812,7 @@
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_fmr = 1;
+ mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -727,12 +830,18 @@
}
/**
- * Unregister (previosuly registered) memory.
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
*/
-void iser_unreg_mem(struct iser_mem_reg *reg)
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
+ if (!reg->is_mr)
+ return;
+
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
@@ -742,6 +851,23 @@
reg->mem_h = NULL;
}
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct fast_reg_descriptor *desc = reg->mem_h;
+
+ if (!reg->is_mr)
+ return;
+
+ reg->mem_h = NULL;
+ reg->is_mr = 0;
+ spin_lock_bh(&ib_conn->lock);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_bh(&ib_conn->lock);
+}
+
int iser_post_recvl(struct iser_conn *ib_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -779,7 +905,7 @@
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
+ my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
@@ -863,7 +989,11 @@
if (wc.status == IB_WC_SUCCESS) {
if (wc.opcode == IB_WC_SEND)
iser_snd_completion(tx_desc, ib_conn);
- else
+ else if (wc.opcode == IB_WC_LOCAL_INV ||
+ wc.opcode == IB_WC_FAST_REG_MR) {
+ atomic_dec(&ib_conn->post_send_buf_count);
+ continue;
+ } else
iser_err("expected opcode %d got %d\n",
IB_WC_SEND, wc.opcode);
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index f984a89..dd68763 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1909,7 +1909,8 @@
int log_rq_stride = qpc->rq_size_stride & 7;
int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
- int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0;
int sq_size;
int rq_size;
int total_pages;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 52c23a8..d73423c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1052,11 +1052,6 @@
};
};
-/* translating DMFS verbs sniffer rule to the FW API would need two reg IDs */
-struct mlx4_flow_handle {
- u64 reg_id[2];
-};
-
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
enum mlx4_net_trans_promisc_mode mode);
int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 645c3ce..e393171 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -116,7 +116,8 @@
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24)
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29)
};
enum ib_atomic_cap {
@@ -635,6 +636,12 @@
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
+
+/*
+ * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
+ * callback to destroy the passed in QP.
+ */
+
struct ib_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -953,6 +960,7 @@
struct list_head srq_list;
struct list_head ah_list;
struct list_head xrcd_list;
+ struct list_head rule_list;
int closing;
};
@@ -1033,7 +1041,8 @@
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list;
- atomic_t usecnt; /* count times opened, mcast attaches */
+ /* count times opened, mcast attaches, flow attaches */
+ atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
struct ib_uobject *uobject;
@@ -1068,6 +1077,112 @@
u32 rkey;
};
+/* Supported steering options */
+enum ib_flow_attr_type {
+ /* steering according to rule specifications */
+ IB_FLOW_ATTR_NORMAL = 0x0,
+ /* default unicast and multicast rule -
+ * receive all Eth traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_ALL_DEFAULT = 0x1,
+ /* default multicast rule -
+ * receive all Eth multicast traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_MC_DEFAULT = 0x2,
+ /* sniffer rule - receive all port traffic */
+ IB_FLOW_ATTR_SNIFFER = 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+ /* L2 headers*/
+ IB_FLOW_SPEC_ETH = 0x20,
+ /* L3 header*/
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ /* L4 headers*/
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41
+};
+
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+
+/* Flow steering rule priority is set according to it's domain.
+ * Lower domain value means higher priority.
+ */
+enum ib_flow_domain {
+ IB_FLOW_DOMAIN_USER,
+ IB_FLOW_DOMAIN_ETHTOOL,
+ IB_FLOW_DOMAIN_RFS,
+ IB_FLOW_DOMAIN_NIC,
+ IB_FLOW_DOMAIN_NUM /* Must be last */
+};
+
+struct ib_flow_eth_filter {
+ u8 dst_mac[6];
+ u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_flow_spec_eth {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_eth_filter val;
+ struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ipv4_filter {
+ __be32 src_ip;
+ __be32 dst_ip;
+};
+
+struct ib_flow_spec_ipv4 {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ipv4_filter val;
+ struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ib_flow_spec_tcp_udp {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_tcp_udp_filter val;
+ struct ib_flow_tcp_udp_filter mask;
+};
+
+union ib_flow_spec {
+ struct {
+ enum ib_flow_spec_type type;
+ u16 size;
+ };
+ struct ib_flow_spec_eth eth;
+ struct ib_flow_spec_ipv4 ipv4;
+ struct ib_flow_spec_tcp_udp tcp_udp;
+};
+
+struct ib_flow_attr {
+ enum ib_flow_attr_type type;
+ u16 size;
+ u16 priority;
+ u32 flags;
+ u8 num_of_specs;
+ u8 port;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_flow {
+ struct ib_qp *qp;
+ struct ib_uobject *uobject;
+};
+
struct ib_mad;
struct ib_grh;
@@ -1300,6 +1415,11 @@
struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ struct ib_flow * (*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr
+ *flow_attr,
+ int domain);
+ int (*destroy_flow)(struct ib_flow *flow_id);
struct ib_dma_mapping_ops *dma_ops;
@@ -2260,4 +2380,8 @@
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr, int domain);
+int ib_destroy_flow(struct ib_flow *flow_id);
+
#endif /* IB_VERBS_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 805711e..0b233c5 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -43,6 +43,7 @@
* compatibility are made.
*/
#define IB_USER_VERBS_ABI_VERSION 6
+#define IB_USER_VERBS_CMD_THRESHOLD 50
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +86,9 @@
IB_USER_VERBS_CMD_OPEN_XRCD,
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
- IB_USER_VERBS_CMD_OPEN_QP
+ IB_USER_VERBS_CMD_OPEN_QP,
+ IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+ IB_USER_VERBS_CMD_DESTROY_FLOW
};
/*
@@ -123,6 +126,15 @@
__u16 out_words;
};
+struct ib_uverbs_cmd_hdr_ex {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u16 provider_in_words;
+ __u16 provider_out_words;
+ __u32 cmd_hdr_reserved;
+};
+
struct ib_uverbs_get_context {
__u64 response;
__u64 driver_data[0];
@@ -684,6 +696,91 @@
__u64 driver_data[0];
};
+struct ib_kern_eth_filter {
+ __u8 dst_mac[6];
+ __u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_kern_spec_eth {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_eth_filter val;
+ struct ib_kern_eth_filter mask;
+};
+
+struct ib_kern_ipv4_filter {
+ __be32 src_ip;
+ __be32 dst_ip;
+};
+
+struct ib_kern_spec_ipv4 {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_ipv4_filter val;
+ struct ib_kern_ipv4_filter mask;
+};
+
+struct ib_kern_tcp_udp_filter {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ib_kern_spec_tcp_udp {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ib_kern_tcp_udp_filter val;
+ struct ib_kern_tcp_udp_filter mask;
+};
+
+struct ib_kern_spec {
+ union {
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ struct ib_kern_spec_eth eth;
+ struct ib_kern_spec_ipv4 ipv4;
+ struct ib_kern_spec_tcp_udp tcp_udp;
+ };
+};
+
+struct ib_kern_flow_attr {
+ __u32 type;
+ __u16 size;
+ __u16 priority;
+ __u8 num_of_specs;
+ __u8 reserved[2];
+ __u8 port;
+ __u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_uverbs_create_flow {
+ __u32 comp_mask;
+ __u64 response;
+ __u32 qp_handle;
+ struct ib_kern_flow_attr flow_attr;
+};
+
+struct ib_uverbs_create_flow_resp {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
+struct ib_uverbs_destroy_flow {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
struct ib_uverbs_create_srq {
__u64 response;
__u64 user_handle;