IB/mlx4: Add port aggregation support

Register the interface with the mlx4 core driver with port aggregation support
and check for port aggregation mode when the 'add' function is called.

In this mode, only one physical port is reported to the upper layer
(RoCE/IB core stack and ULPs).

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 9db258f..ed21ae6 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -351,6 +351,7 @@
 	enum ib_mtu tmp;
 	struct mlx4_cmd_mailbox *mailbox;
 	int err = 0;
+	int is_bonded = mlx4_is_bonded(mdev->dev);
 
 	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 	if (IS_ERR(mailbox))
@@ -374,8 +375,12 @@
 	props->state		= IB_PORT_DOWN;
 	props->phys_state	= state_to_phys_state(props->state);
 	props->active_mtu	= IB_MTU_256;
+	if (is_bonded)
+		rtnl_lock(); /* required to get upper dev */
 	spin_lock_bh(&iboe->lock);
 	ndev = iboe->netdevs[port - 1];
+	if (ndev && is_bonded)
+		ndev = netdev_master_upper_dev_get(ndev);
 	if (!ndev)
 		goto out_unlock;
 
@@ -387,6 +392,8 @@
 	props->phys_state	= state_to_phys_state(props->state);
 out_unlock:
 	spin_unlock_bh(&iboe->lock);
+	if (is_bonded)
+		rtnl_unlock();
 out:
 	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 	return err;
@@ -1440,6 +1447,7 @@
 	union ib_gid *gids;
 	int err;
 	struct mlx4_dev	*dev = gw->dev->dev;
+	int is_bonded = mlx4_is_bonded(dev);
 
 	if (!gw->dev->ib_active)
 		return;
@@ -1459,7 +1467,10 @@
 	if (err)
 		pr_warn("set port command failed\n");
 	else
-		mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+		if ((gw->port == 1) || !is_bonded)
+			mlx4_ib_dispatch_event(gw->dev,
+					       is_bonded ? 1 : gw->port,
+					       IB_EVENT_GID_CHANGE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	kfree(gw);
@@ -1875,7 +1886,8 @@
 				 * don't want the bond IP based gids in the table since
 				 * flows that select port by gid may get the down port.
 				*/
-				if (port_state == IB_PORT_DOWN) {
+				if (port_state == IB_PORT_DOWN &&
+				    !mlx4_is_bonded(ibdev->dev)) {
 					reset_gid_table(ibdev, port);
 					mlx4_ib_set_default_gid(ibdev,
 								curr_netdev,
@@ -2047,6 +2059,7 @@
 	int err;
 	struct mlx4_ib_iboe *iboe;
 	int ib_num_ports = 0;
+	int num_req_counters;
 
 	pr_info_once("%s", mlx4_ib_version);
 
@@ -2086,7 +2099,8 @@
 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
 	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
 	ibdev->num_ports		= num_ports;
-	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
+	ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
+						1 : ibdev->num_ports;
 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->persist->pdev->dev;
 
@@ -2207,7 +2221,8 @@
 	if (init_node_data(ibdev))
 		goto err_map;
 
-	for (i = 0; i < ibdev->num_ports; ++i) {
+	num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
+	for (i = 0; i < num_req_counters; ++i) {
 		mutex_init(&ibdev->qp1_proxy_lock[i]);
 		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
 						IB_LINK_LAYER_ETHERNET) {
@@ -2218,6 +2233,10 @@
 			ibdev->counters[i] = -1;
 		}
 	}
+	if (mlx4_is_bonded(dev))
+		for (i = 1; i < ibdev->num_ports ; ++i)
+			ibdev->counters[i] = ibdev->counters[0];
+
 
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		ib_num_ports++;
@@ -2538,6 +2557,38 @@
 	return;
 }
 
+static void handle_bonded_port_state_event(struct work_struct *work)
+{
+	struct ib_event_work *ew =
+		container_of(work, struct ib_event_work, work);
+	struct mlx4_ib_dev *ibdev = ew->ib_dev;
+	enum ib_port_state bonded_port_state = IB_PORT_NOP;
+	int i;
+	struct ib_event ibev;
+
+	kfree(ew);
+	spin_lock_bh(&ibdev->iboe.lock);
+	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
+		struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
+
+		enum ib_port_state curr_port_state =
+			(netif_running(curr_netdev) &&
+			 netif_carrier_ok(curr_netdev)) ?
+			IB_PORT_ACTIVE : IB_PORT_DOWN;
+
+		bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
+			curr_port_state : IB_PORT_ACTIVE;
+	}
+	spin_unlock_bh(&ibdev->iboe.lock);
+
+	ibev.device = &ibdev->ib_dev;
+	ibev.element.port_num = 1;
+	ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
+		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+	ib_dispatch_event(&ibev);
+}
+
 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 			  enum mlx4_dev_event event, unsigned long param)
 {
@@ -2547,6 +2598,18 @@
 	struct ib_event_work *ew;
 	int p = 0;
 
+	if (mlx4_is_bonded(dev) &&
+	    ((event == MLX4_DEV_EVENT_PORT_UP) ||
+	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
+		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
+		if (!ew)
+			return;
+		INIT_WORK(&ew->work, handle_bonded_port_state_event);
+		ew->ib_dev = ibdev;
+		queue_work(wq, &ew->work);
+		return;
+	}
+
 	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
 		eqe = (struct mlx4_eqe *)param;
 	else
@@ -2607,7 +2670,7 @@
 	}
 
 	ibev.device	      = ibdev_ptr;
-	ibev.element.port_num = (u8) p;
+	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
 
 	ib_dispatch_event(&ibev);
 }
@@ -2616,7 +2679,8 @@
 	.add		= mlx4_ib_add,
 	.remove		= mlx4_ib_remove,
 	.event		= mlx4_ib_event,
-	.protocol	= MLX4_PROT_IB_IPV6
+	.protocol	= MLX4_PROT_IB_IPV6,
+	.flags		= MLX4_INTFF_BONDING
 };
 
 static int __init mlx4_ib_init(void)