@@ -24,6 +24,7 @@
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -40,6 +41,11 @@ struct kfd_deviceid {
const struct kfd_device_info *device_info;
};
+struct kfd_device_init_work {
+ struct work_struct kfd_work;
+ struct kfd_dev *dev;
+};
+
/* Please keep this sorted by increasing device id. */
static const struct kfd_deviceid supported_devices[] = {
{ 0x1304, &kaveri_device_info }, /* Kaveri */
@@ -99,6 +105,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
kfd->pdev = pdev;
kfd->init_complete = false;
+ kfd->kfd_dev_wq = create_workqueue("kfd_dev_wq");
+
return kfd;
}
@@ -161,13 +169,10 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
kfd_unbind_process_from_device(dev, pasid);
}
-bool kgd2kfd_device_init(struct kfd_dev *kfd,
- const struct kgd2kfd_shared_resources *gpu_resources)
+static bool kfd_kgd_device_init(struct kfd_dev *kfd)
{
unsigned int size;
- kfd->shared_resources = *gpu_resources;
-
/* calculate max size of mqds needed for queues */
size = max_num_of_processes *
max_num_of_queues_per_process *
@@ -249,6 +254,63 @@ out:
return kfd->init_complete;
}
+static void kfd_device_wq_init_device(struct work_struct *work)
+{
+ struct kfd_device_init_work *my_work;
+ struct kfd_dev *kfd;
+
+ my_work = (struct kfd_device_init_work *) work;
+
+ kfd = my_work->dev;
+
+ /*
+ * As long as amdkfd or amd_iommu_v2 are not initialized, we
+ * yield the processor
+ */
+ while ((!amdkfd_is_init_completed()) ||
+ (!amd_iommu_v2_is_init_completed()))
+ schedule();
+
+ kfd_kgd_device_init(kfd);
+}
+
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ const struct kgd2kfd_shared_resources *gpu_resources)
+{
+ struct kfd_device_init_work *work;
+
+ kfd->shared_resources = *gpu_resources;
+
+ /*
+ * When amd_iommu_v2, amdkfd and radeon are compiled inside the kernel,
+ * there is no mechanism to enforce order of loading between the
+ * drivers. Therefore, we need to use an explicit form of
+ * synchronization to know when amdkfd and amd_iommu_v2 have finished
+ * there initialization routines
+ */
+ if ((!amdkfd_is_init_completed()) ||
+ (!amd_iommu_v2_is_init_completed())) {
+ BUG_ON(!kfd->kfd_dev_wq);
+
+ work = (struct kfd_device_init_work *)
+ kmalloc(sizeof(struct kfd_device_init_work),
+ GFP_ATOMIC);
+
+ if (!work) {
+ pr_err("kfd: no memory for device work queue\n");
+ return false;
+ }
+
+ INIT_WORK((struct work_struct *) work,
+ kfd_device_wq_init_device);
+ work->dev = kfd;
+ queue_work(kfd->kfd_dev_wq, (struct work_struct *) work);
+ return true;
+ }
+
+ return kfd_kgd_device_init(kfd);
+}
+
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
@@ -258,6 +320,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_topology_remove_device(kfd);
}
+ flush_workqueue(kfd->kfd_dev_wq);
+ destroy_workqueue(kfd->kfd_dev_wq);
kfree(kfd);
}
@@ -151,6 +151,8 @@ struct kfd_dev {
* from the HW ring into a SW ring.
*/
bool interrupts_active;
+
+ struct workqueue_struct *kfd_dev_wq;
};
int amdkfd_is_init_completed(void);
When amd_iommu_v2, amdkfd and radeon are all compiled inside the kernel image (not as modules), radeon probes the existing GPU before amdkfd and amd_iommu_v2 are even loaded. When radeon encounters an AMD GPU, it will pass that information to amdkfd. However, that call will fail and will cause a kernel BUG. We could poll in radeon on when amdkfd and amd_iommu_v2 have been loaded, but that would stall radeon. Therefore, this patch moves the amdkfd part of GPU initialization to a workqueue. When radeon calls amdkfd to perform GPU related initialization, it will check if both amdkfd and amd_iommu_v2 have been loaded. If so, which is the situation when the three drivers are compiled as modules, it will call the relevant amdkfd function directly. If not, it will queue the initialization work on the workqueue. The work function will schedule itself until both amdkfd and amd_iommu_v2 have been loaded. Then, it will call the relevant amdkfd function. The workqueue is defined per kfd_dev structure (per GPU). Signed-off-by: Oded Gabbay <oded.gabbay@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 72 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + 2 files changed, 70 insertions(+), 4 deletions(-)