drm/amdkfd: Add multiple kgd support

The current code can only support one kgd instance. We have to
support multiple kgd instances in one system. i.e two amdgpu or two
radeon or one amdgpu + one radeon or more than two kgd instances.

Signed-off-by: Xihan Zhang <xihan.zhang@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 50fc8ba..19a4fba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -442,7 +442,8 @@
 		return -EINVAL;
 
 	/* Reading GPU clock counter from KGD */
-	args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
+	args->gpu_clock_counter =
+		dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
 
 	/* No access to rdtsc. Using raw monotonic time */
 	getrawmonotonic64(&time);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 5bc32c2..ca7f2d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -94,7 +94,8 @@
 	return NULL;
 }
 
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
 {
 	struct kfd_dev *kfd;
 
@@ -112,6 +113,11 @@
 	kfd->device_info = device_info;
 	kfd->pdev = pdev;
 	kfd->init_complete = false;
+	kfd->kfd2kgd = f2g;
+
+	mutex_init(&kfd->doorbell_mutex);
+	memset(&kfd->doorbell_available_index, 0,
+		sizeof(kfd->doorbell_available_index));
 
 	return kfd;
 }
@@ -200,8 +206,9 @@
 	/* add another 512KB for all other allocations on gart (HPD, fences) */
 	size += 512 * 1024;
 
-	if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
-			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
+	if (kfd->kfd2kgd->init_gtt_mem_allocation(
+			kfd->kgd, size, &kfd->gtt_mem,
+			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
 		dev_err(kfd_device,
 			"Could not allocate %d bytes for device (%x:%x)\n",
 			size, kfd->pdev->vendor, kfd->pdev->device);
@@ -270,7 +277,7 @@
 kfd_topology_add_device_error:
 	kfd_gtt_sa_fini(kfd);
 kfd_gtt_sa_init_error:
-	kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+	kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 	dev_err(kfd_device,
 		"device (%x:%x) NOT added due to errors\n",
 		kfd->pdev->vendor, kfd->pdev->device);
@@ -285,7 +292,7 @@
 		amd_iommu_free_device(kfd->pdev);
 		kfd_topology_remove_device(kfd);
 		kfd_gtt_sa_fini(kfd);
-		kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 	}
 
 	kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index be68d58..d717430 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -82,7 +82,8 @@
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
-	return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
+	return dqm->dev->kfd2kgd->program_sh_mem_settings(
+						dqm->dev->kgd, qpd->vmid,
 						qpd->sh_mem_config,
 						qpd->sh_mem_ape1_base,
 						qpd->sh_mem_ape1_limit,
@@ -457,9 +458,12 @@
 {
 	uint32_t pasid_mapping;
 
-	pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
-						ATC_VMID_PASID_MAPPING_VALID;
-	return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
+	pasid_mapping = (pasid == 0) ? 0 :
+		(uint32_t)pasid |
+		ATC_VMID_PASID_MAPPING_VALID;
+
+	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
+						dqm->dev->kgd, pasid_mapping,
 						vmid);
 }
 
@@ -511,7 +515,7 @@
 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
 		/* = log2(bytes/4)-1 */
-		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
+		dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 1a9b355..17e56dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -32,9 +32,6 @@
  * and that's assures that any user process won't get access to the
  * kernel doorbells page
  */
-static DEFINE_MUTEX(doorbell_mutex);
-static unsigned long doorbell_available_index[
-	DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 };
 
 #define KERNEL_DOORBELL_PASID 1
 #define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
@@ -170,12 +167,12 @@
 
 	BUG_ON(!kfd || !doorbell_off);
 
-	mutex_lock(&doorbell_mutex);
-	inx = find_first_zero_bit(doorbell_available_index,
+	mutex_lock(&kfd->doorbell_mutex);
+	inx = find_first_zero_bit(kfd->doorbell_available_index,
 					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 
-	__set_bit(inx, doorbell_available_index);
-	mutex_unlock(&doorbell_mutex);
+	__set_bit(inx, kfd->doorbell_available_index);
+	mutex_unlock(&kfd->doorbell_mutex);
 
 	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 		return NULL;
@@ -203,9 +200,9 @@
 
 	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
 
-	mutex_lock(&doorbell_mutex);
-	__clear_bit(inx, doorbell_available_index);
-	mutex_unlock(&doorbell_mutex);
+	mutex_lock(&kfd->doorbell_mutex);
+	__clear_bit(inx, kfd->doorbell_available_index);
+	mutex_unlock(&kfd->doorbell_mutex);
 }
 
 inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 3f34ae1..4e0a68f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -34,7 +34,6 @@
 #define KFD_DRIVER_MINOR	7
 #define KFD_DRIVER_PATCHLEVEL	1
 
-const struct kfd2kgd_calls *kfd2kgd;
 static const struct kgd2kfd_calls kgd2kfd = {
 	.exit		= kgd2kfd_exit,
 	.probe		= kgd2kfd_probe,
@@ -55,9 +54,7 @@
 MODULE_PARM_DESC(max_num_of_queues_per_device,
 	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
 
-bool kgd2kfd_init(unsigned interface_version,
-		  const struct kfd2kgd_calls *f2g,
-		  const struct kgd2kfd_calls **g2f)
+bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
 {
 	/*
 	 * Only one interface version is supported,
@@ -66,11 +63,6 @@
 	if (interface_version != KFD_INTERFACE_VERSION)
 		return false;
 
-	/* Protection against multiple amd kgd loads */
-	if (kfd2kgd)
-		return true;
-
-	kfd2kgd = f2g;
 	*g2f = &kgd2kfd;
 
 	return true;
@@ -85,8 +77,6 @@
 {
 	int err;
 
-	kfd2kgd = NULL;
-
 	/* Verify module parameters */
 	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
 		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index a09e18a..4349794 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -151,14 +151,15 @@
 static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr)
 {
-	return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+	return mm->dev->kfd2kgd->hqd_load
+		(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
 }
 
 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
 			uint32_t pipe_id, uint32_t queue_id,
 			uint32_t __user *wptr)
 {
-	return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
 }
 
 static int update_mqd(struct mqd_manager *mm, void *mqd,
@@ -245,7 +246,7 @@
 			unsigned int timeout, uint32_t pipe_id,
 			uint32_t queue_id)
 {
-	return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
 					pipe_id, queue_id);
 }
 
@@ -258,7 +259,7 @@
 				unsigned int timeout, uint32_t pipe_id,
 				uint32_t queue_id)
 {
-	return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
 }
 
 static bool is_occupied(struct mqd_manager *mm, void *mqd,
@@ -266,7 +267,7 @@
 			uint32_t queue_id)
 {
 
-	return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
+	return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
 					pipe_id, queue_id);
 
 }
@@ -275,7 +276,7 @@
 			uint64_t queue_address,	uint32_t pipe_id,
 			uint32_t queue_id)
 {
-	return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b7bd7af..f21fcce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -148,6 +148,11 @@
 
 	struct kgd2kfd_shared_resources shared_resources;
 
+	const struct kfd2kgd_calls *kfd2kgd;
+	struct mutex doorbell_mutex;
+	unsigned long doorbell_available_index[DIV_ROUND_UP(
+		KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+
 	void *gtt_mem;
 	uint64_t gtt_start_gpu_addr;
 	void *gtt_start_cpu_ptr;
@@ -164,13 +169,12 @@
 
 /* KGD2KFD callbacks */
 void kgd2kfd_exit(void);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev);
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+			struct pci_dev *pdev, const struct kfd2kgd_calls *f2g);
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
-			 const struct kgd2kfd_shared_resources *gpu_resources);
+			const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 
-extern const struct kfd2kgd_calls *kfd2kgd;
-
 enum kfd_mempool {
 	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
 	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4983993..661c660 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -726,13 +726,14 @@
 		}
 
 		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
-				kfd2kgd->get_max_engine_clock_in_mhz(
+			dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
 					dev->gpu->kgd));
 		sysfs_show_64bit_prop(buffer, "local_mem_size",
-				kfd2kgd->get_vmem_size(dev->gpu->kgd));
+			dev->gpu->kfd2kgd->get_vmem_size(
+					dev->gpu->kgd));
 
 		sysfs_show_32bit_prop(buffer, "fw_version",
-				kfd2kgd->get_fw_version(
+			dev->gpu->kfd2kgd->get_fw_version(
 						dev->gpu->kgd,
 						KGD_ENGINE_MEC1));
 	}
@@ -1099,8 +1100,9 @@
 	buf[2] = gpu->pdev->subsystem_device;
 	buf[3] = gpu->pdev->device;
 	buf[4] = gpu->pdev->bus->number;
-	buf[5] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) & 0xffffffff);
-	buf[6] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) >> 32);
+	buf[5] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd)
+			& 0xffffffff);
+	buf[6] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd) >> 32);
 
 	for (i = 0, hashout = 0; i < 7; i++)
 		hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 239bc16..dabd944 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -77,37 +77,6 @@
 };
 
 /**
- * struct kgd2kfd_calls
- *
- * @exit: Notifies amdkfd that kgd module is unloaded
- *
- * @probe: Notifies amdkfd about a probe done on a device in the kgd driver.
- *
- * @device_init: Initialize the newly probed device (if it is a device that
- * amdkfd supports)
- *
- * @device_exit: Notifies amdkfd about a removal of a kgd device
- *
- * @suspend: Notifies amdkfd about a suspend action done to a kgd device
- *
- * @resume: Notifies amdkfd about a resume action done to a kgd device
- *
- * This structure contains function callback pointers so the kgd driver
- * will notify to the amdkfd about certain status changes.
- *
- */
-struct kgd2kfd_calls {
-	void (*exit)(void);
-	struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev);
-	bool (*device_init)(struct kfd_dev *kfd,
-			const struct kgd2kfd_shared_resources *gpu_resources);
-	void (*device_exit)(struct kfd_dev *kfd);
-	void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
-	void (*suspend)(struct kfd_dev *kfd);
-	int (*resume)(struct kfd_dev *kfd);
-};
-
-/**
  * struct kfd2kgd_calls
  *
  * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture.
@@ -196,8 +165,39 @@
 				enum kgd_engine_type type);
 };
 
+/**
+ * struct kgd2kfd_calls
+ *
+ * @exit: Notifies amdkfd that kgd module is unloaded
+ *
+ * @probe: Notifies amdkfd about a probe done on a device in the kgd driver.
+ *
+ * @device_init: Initialize the newly probed device (if it is a device that
+ * amdkfd supports)
+ *
+ * @device_exit: Notifies amdkfd about a removal of a kgd device
+ *
+ * @suspend: Notifies amdkfd about a suspend action done to a kgd device
+ *
+ * @resume: Notifies amdkfd about a resume action done to a kgd device
+ *
+ * This structure contains function callback pointers so the kgd driver
+ * will notify to the amdkfd about certain status changes.
+ *
+ */
+struct kgd2kfd_calls {
+	void (*exit)(void);
+	struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev,
+		const struct kfd2kgd_calls *f2g);
+	bool (*device_init)(struct kfd_dev *kfd,
+			const struct kgd2kfd_shared_resources *gpu_resources);
+	void (*device_exit)(struct kfd_dev *kfd);
+	void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
+	void (*suspend)(struct kfd_dev *kfd);
+	int (*resume)(struct kfd_dev *kfd);
+};
+
 bool kgd2kfd_init(unsigned interface_version,
-		const struct kfd2kgd_calls *f2g,
 		const struct kgd2kfd_calls **g2f);
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 061eaa9..4cdcaf8 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -103,15 +103,14 @@
 bool radeon_kfd_init(void)
 {
 #if defined(CONFIG_HSA_AMD_MODULE)
-	bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*,
-				const struct kgd2kfd_calls**);
+	bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 
 	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 
 	if (kgd2kfd_init_p == NULL)
 		return false;
 
-	if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
+	if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
 		symbol_put(kgd2kfd_init);
 		kgd2kfd = NULL;
 
@@ -120,7 +119,7 @@
 
 	return true;
 #elif defined(CONFIG_HSA_AMD)
-	if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
+	if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
 		kgd2kfd = NULL;
 
 		return false;
@@ -143,7 +142,8 @@
 void radeon_kfd_device_probe(struct radeon_device *rdev)
 {
 	if (kgd2kfd)
-		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev);
+		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
+			rdev->pdev, &kfd2kgd);
 }
 
 void radeon_kfd_device_init(struct radeon_device *rdev)