net/mlx5_core: Use private health thread for each device

Use a single threaded work queue for each device in the system instead of
using one thread for any device. This is required so we can concurrently
process system error handling for all the devices that need that.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 8770968..9b81e1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -57,31 +57,16 @@
 	MLX5_HEALTH_SYNDR_HIGH_TEMP		= 0x10
 };
 
-static DEFINE_SPINLOCK(health_lock);
-static LIST_HEAD(health_list);
-static struct work_struct health_work;
-
 static void health_care(struct work_struct *work)
 {
-	struct mlx5_core_health *health, *n;
+	struct mlx5_core_health *health;
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
-	LIST_HEAD(tlist);
 
-	spin_lock_irq(&health_lock);
-	list_splice_init(&health_list, &tlist);
-
-	spin_unlock_irq(&health_lock);
-
-	list_for_each_entry_safe(health, n, &tlist, list) {
-		priv = container_of(health, struct mlx5_priv, health);
-		dev = container_of(priv, struct mlx5_core_dev, priv);
-		mlx5_core_warn(dev, "handling bad device here\n");
-		/* nothing yet */
-		spin_lock_irq(&health_lock);
-		list_del_init(&health->list);
-		spin_unlock_irq(&health_lock);
-	}
+	health = container_of(work, struct mlx5_core_health, work);
+	priv = container_of(health, struct mlx5_priv, health);
+	dev = container_of(priv, struct mlx5_core_dev, priv);
+	mlx5_core_warn(dev, "handling bad device here\n");
 }
 
 static const char *hsynd_str(u8 synd)
@@ -168,11 +153,7 @@
 	if (health->miss_counter == MAX_MISSES) {
 		mlx5_core_err(dev, "device's health compromised\n");
 		print_health_info(dev);
-		spin_lock_irq(&health_lock);
-		list_add_tail(&health->list, &health_list);
-		spin_unlock_irq(&health_lock);
-
-		queue_work(mlx5_core_wq, &health_work);
+		queue_work(health->wq, &health->work);
 	} else {
 		get_random_bytes(&next, sizeof(next));
 		next %= HZ;
@@ -185,7 +166,6 @@
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
-	INIT_LIST_HEAD(&health->list);
 	init_timer(&health->timer);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
@@ -201,18 +181,33 @@
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	del_timer_sync(&health->timer);
-
-	spin_lock_irq(&health_lock);
-	if (!list_empty(&health->list))
-		list_del_init(&health->list);
-	spin_unlock_irq(&health_lock);
 }
 
-void mlx5_health_cleanup(void)
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	destroy_workqueue(health->wq);
 }
 
-void  __init mlx5_health_init(void)
+int mlx5_health_init(struct mlx5_core_dev *dev)
 {
-	INIT_WORK(&health_work, health_care);
+	struct mlx5_core_health *health;
+	char *name;
+
+	health = &dev->priv.health;
+	name = kmalloc(64, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, "mlx5_health");
+	strcat(name, dev_name(&dev->pdev->dev));
+	health->wq = create_singlethread_workqueue(name);
+	kfree(name);
+	if (!health->wq)
+		return -ENOMEM;
+
+	INIT_WORK(&health->work, health_care);
+
+	return 0;
 }