mlx4: MAD_IFC paravirtualization
The MAD_IFC firmware command fulfills two functions.
First, it is used in the QP0/QP1 MAD-handling flow to obtain
information from the FW (for answering queries), and for setting
variables in the HCA (MAD SET packets).
For this, MAD_IFC should provide the FW (physical) view of the data.
This is the view that OpenSM needs. We call this the "network view".
In the second case, MAD_IFC is used by various verbs to obtain data
regarding the local HCA (e.g., ib_query_device()). We call this the
"host view".
This data needs to be paravirtualized.
MAD_IFC therefore needs a wrapper function, and also needs another
flag indicating whether it should provide the network view (when it is
called by ib_process_mad in special-qp packet handling), or the host
view (when it is called while implementing a verb).
There are currently 2 flag parameters in mlx4_MAD_IFC already:
ignore_bkey and ignore_mkey. These two parameters are replaced by a
single "mad_ifc_flags" parameter, with different bits set for each
flag. A third flag is added: "network-view/host-view".
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 8dfbf69..ba25806 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -75,7 +75,7 @@
struct ib_mad mad;
} __packed;
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
@@ -102,10 +102,13 @@
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
- if (ignore_mkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
op_modifier |= 0x1;
- if (ignore_bkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
op_modifier |= 0x2;
+ if (mlx4_is_mfunc(dev->dev) &&
+ (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
+ op_modifier |= 0x8;
if (in_wc) {
struct {
@@ -138,10 +141,10 @@
in_modifier |= in_wc->slid << 16;
}
- err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
- in_modifier, op_modifier,
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
+ mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
- MLX4_CMD_NATIVE);
+ (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
if (!err)
memcpy(response_mad, outmailbox->buf, 256);
@@ -614,8 +617,9 @@
prev_lid = pattr.lid;
err = mlx4_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
+ (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
+ (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
+ MLX4_MAD_IFC_NET_VIEW,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8e10ec2..45a6cc0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -98,7 +98,8 @@
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -182,11 +183,12 @@
}
static int ib_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+ struct ib_port_attr *props, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int ext_active_speed;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -198,7 +200,10 @@
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL,
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
@@ -211,7 +216,10 @@
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ if (netw_view)
+ props->gid_tbl_len = out_mad->data[50];
+ else
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -244,7 +252,7 @@
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port,
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -270,7 +278,7 @@
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+ struct ib_port_attr *props, int netw_view)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -320,20 +328,27 @@
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
int err;
memset(props, 0, sizeof *props);
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
- ib_link_query_port(ibdev, port, props) :
- eth_link_query_port(ibdev, port, props);
+ ib_link_query_port(ibdev, port, props, netw_view) :
+ eth_link_query_port(ibdev, port, props, netw_view);
return err;
}
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ /* returns host view */
+ return __mlx4_ib_query_port(ibdev, port, props, 0);
+}
+
static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@@ -350,7 +365,8 @@
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port,
+ NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -360,7 +376,8 @@
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port,
+ NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -391,11 +408,12 @@
return iboe_query_gid(ibdev, port, index, gid);
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -407,7 +425,11 @@
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
if (err)
goto out;
@@ -419,6 +441,11 @@
return err;
}
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
+}
+
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
@@ -849,6 +876,7 @@
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -858,8 +886,10 @@
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+ if (mlx4_is_master(dev->dev))
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -867,7 +897,7 @@
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 137941d..ac71d56 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -176,6 +176,14 @@
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
+enum mlx4_ib_mad_ifc_flags {
+ MLX4_MAD_IFC_IGNORE_MKEY = 1,
+ MLX4_MAD_IFC_IGNORE_BKEY = 2,
+ MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
+ MLX4_MAD_IFC_IGNORE_BKEY),
+ MLX4_MAD_IFC_NET_VIEW = 4,
+};
+
enum {
MLX4_NUM_TUNNEL_BUFS = 256,
};
@@ -512,7 +520,7 @@
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -527,6 +535,10 @@
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view);
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 662a3c5..a13d8a6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -40,6 +40,7 @@
#include <linux/mlx4/cmd.h>
#include <linux/semaphore.h>
+#include <rdma/ib_smi.h>
#include <asm/io.h>
@@ -627,6 +628,149 @@
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
+static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
+ struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
+ int err;
+ int i;
+
+ if (index & 0x1f)
+ return -EINVAL;
+
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ for (i = 0; i < 32; ++i)
+ pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
+
+ return err;
+}
+
+static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
+ err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+#define PORT_CAPABILITY_LOCATION_IN_SMP 20
+#define PORT_STATE_OFFSET 32
+
+static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
+{
+ /* will be modified when add alias_guid feature */
+ return IB_PORT_DOWN;
+}
+
+static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct ib_smp *smp = inbox->buf;
+ u32 index;
+ u8 port;
+ u16 *table;
+ int err;
+ int vidx, pidx;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct ib_smp *outsmp = outbox->buf;
+ __be16 *outtab = (__be16 *)(outsmp->data);
+ __be32 slave_cap_mask;
+ port = vhcr->in_modifier;
+
+ if (smp->base_version == 1 &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->class_version == 1) {
+ if (smp->method == IB_MGMT_METHOD_GET) {
+ if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ index = be32_to_cpu(smp->attr_mod);
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+ table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+ /* need to get the full pkey table because the paravirtualized
+ * pkeys may be scattered among several pkey blocks.
+ */
+ err = get_full_pkey_table(dev, port, table, inbox, outbox);
+ if (!err) {
+ for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
+ pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
+ outtab[vidx % 32] = cpu_to_be16(table[pidx]);
+ }
+ }
+ kfree(table);
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
+ /*get the slave specific caps:*/
+ /*do the command */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ /* modify the response for slaves */
+ if (!err && slave != mlx4_master_func_num(dev)) {
+ u8 *state = outsmp->data + PORT_STATE_OFFSET;
+
+ *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
+ slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
+ /* compute slave's gid block */
+ smp->attr_mod = cpu_to_be32(slave / 8);
+ /* execute cmd */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ /* if needed, move slave gid to index 0 */
+ if (slave % 8)
+ memcpy(outsmp->data,
+ outsmp->data + (slave % 8) * 8, 8);
+ /* delete all other gids */
+ memset(outsmp->data + 8, 0, 56);
+ }
+ return err;
+ }
+ }
+ }
+ if (slave != mlx4_master_func_num(dev) &&
+ ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
+ (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->method == IB_MGMT_METHOD_SET))) {
+ mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
+ "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
+ slave, smp->method, smp->mgmt_class,
+ be16_to_cpu(smp->attr_id));
+ return -EPERM;
+ }
+ /*default:*/
+ return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@@ -1061,6 +1205,24 @@
.wrapper = mlx4_GEN_QP_wrapper
},
{
+ .opcode = MLX4_CMD_CONF_SPECIAL_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL, /* XXX verify: only demux can do this */
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_MAD_IFC,
+ .has_inbox = true,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAD_IFC_wrapper
+ },
+ {
.opcode = MLX4_CMD_QUERY_IF_STAT,
.has_inbox = false,
.has_outbox = true,