diff mbox series

[v4,07/14] drm/amdgpu: Register IOMMU topology notifier per device.

Message ID 1611003683-3534-8-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky Jan. 18, 2021, 9:01 p.m. UTC
Handle all DMA IOMMU gropup related dependencies before the
group is removed.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
 6 files changed, 65 insertions(+), 1 deletion(-)

Comments

Alex Deucher Jan. 18, 2021, 9:52 p.m. UTC | #1
On Mon, Jan 18, 2021 at 4:02 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> Handle all DMA IOMMU gropup related dependencies before the

gropup -> group

Alex

> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>  6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
>  #include <linux/dma-fence.h>
>  #include <linux/pci.h>
>  #include <linux/aer.h>
> +#include <linux/notifier.h>
>
>  #include <drm/ttm/ttm_bo_api.h>
>  #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>
>         bool                            in_pci_err_recovery;
>         struct pci_saved_state          *pci_state;
> +
> +       struct notifier_block           nb;
> +       struct blocking_notifier_head   notifier;
> +       struct list_head                device_bo_list;
>  };
>
>  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
>  #include <drm/task_barrier.h>
>  #include <linux/pm_runtime.h>
>
> +#include <linux/iommu.h>
> +
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>  };
>
>
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> +                                    unsigned long action, void *data)
> +{
> +       struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> +       struct amdgpu_bo *bo = NULL;
> +
> +       /*
> +        * Following is a set of IOMMU group dependencies taken care of before
> +        * device's IOMMU group is removed
> +        */
> +       if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> +               spin_lock(&ttm_bo_glob.lru_lock);
> +               list_for_each_entry(bo, &adev->device_bo_list, bo) {
> +                       if (bo->tbo.ttm)
> +                               ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> +               }
> +               spin_unlock(&ttm_bo_glob.lru_lock);
> +
> +               if (adev->irq.ih.use_bus_addr)
> +                       amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> +               if (adev->irq.ih1.use_bus_addr)
> +                       amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> +               if (adev->irq.ih2.use_bus_addr)
> +                       amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> +               amdgpu_gart_dummy_page_fini(adev);
> +       }
> +
> +       return NOTIFY_OK;
> +}
> +
> +
>  /**
>   * amdgpu_device_init - initialize the driver
>   *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>
>         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>
> +       INIT_LIST_HEAD(&adev->device_bo_list);
> +
>         adev->gfx.gfx_off_req_count = 1;
>         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>         if (amdgpu_device_cache_pci_state(adev->pdev))
>                 pci_restore_state(pdev);
>
> +       BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> +       adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> +       if (adev->dev->iommu_group) {
> +               r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> +               if (r)
> +                       goto failed;
> +       }
> +
>         return 0;
>
>  failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>   *
>   * Frees the dummy page used by the driver (all asics).
>   */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>  {
>         if (!adev->dummy_page_addr)
>                 return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>  int amdgpu_gart_init(struct amdgpu_device *adev);
>  void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>                        int pages);
>  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>         }
>         amdgpu_bo_unref(&bo->parent);
>
> +       spin_lock(&ttm_bo_glob.lru_lock);
> +       list_del(&bo->bo);
> +       spin_unlock(&ttm_bo_glob.lru_lock);
> +
>         kfree(bo->metadata);
>         kfree(bo);
>  }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>         if (bp->type == ttm_bo_type_device)
>                 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>
> +       INIT_LIST_HEAD(&bo->bo);
> +
> +       spin_lock(&ttm_bo_glob.lru_lock);
> +       list_add_tail(&bo->bo, &adev->device_bo_list);
> +       spin_unlock(&ttm_bo_glob.lru_lock);
> +
>         return 0;
>
>  fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>         struct list_head                shadow_list;
>
>         struct kgd_mem                  *kfd_bo;
> +
> +       struct list_head                bo;
>  };
>
>  static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
> --
> 2.7.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Jan. 19, 2021, 8:48 a.m. UTC | #2
Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
>   #include <linux/dma-fence.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/notifier.h>
>   
>   #include <drm/ttm/ttm_bo_api.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>   
>   	bool                            in_pci_err_recovery;
>   	struct pci_saved_state          *pci_state;
> +
> +	struct notifier_block		nb;
> +	struct blocking_notifier_head	notifier;
> +	struct list_head		device_bo_list;
>   };
>   
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
>   #include <drm/task_barrier.h>
>   #include <linux/pm_runtime.h>
>   
> +#include <linux/iommu.h>
> +
>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>   };
>   
>   
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> +				     unsigned long action, void *data)
> +{
> +	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> +	struct amdgpu_bo *bo = NULL;
> +
> +	/*
> +	 * Following is a set of IOMMU group dependencies taken care of before
> +	 * device's IOMMU group is removed
> +	 */
> +	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> +		spin_lock(&ttm_bo_glob.lru_lock);
> +		list_for_each_entry(bo, &adev->device_bo_list, bo) {
> +			if (bo->tbo.ttm)
> +				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> +		}
> +		spin_unlock(&ttm_bo_glob.lru_lock);

That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the 
device_bo_list without a lock in this moment.

Christian.

> +
> +		if (adev->irq.ih.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> +		if (adev->irq.ih1.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> +		if (adev->irq.ih2.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> +		amdgpu_gart_dummy_page_fini(adev);
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +
>   /**
>    * amdgpu_device_init - initialize the driver
>    *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   
>   	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>   
> +	INIT_LIST_HEAD(&adev->device_bo_list);
> +
>   	adev->gfx.gfx_off_req_count = 1;
>   	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>   
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	if (amdgpu_device_cache_pci_state(adev->pdev))
>   		pci_restore_state(pdev);
>   
> +	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> +	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> +	if (adev->dev->iommu_group) {
> +		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> +		if (r)
> +			goto failed;
> +	}
> +
>   	return 0;
>   
>   failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>    *
>    * Frees the dummy page used by the driver (all asics).
>    */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>   {
>   	if (!adev->dummy_page_addr)
>   		return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>   int amdgpu_gart_init(struct amdgpu_device *adev);
>   void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   		       int pages);
>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   	}
>   	amdgpu_bo_unref(&bo->parent);
>   
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_del(&bo->bo);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	kfree(bo->metadata);
>   	kfree(bo);
>   }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>   	if (bp->type == ttm_bo_type_device)
>   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>   
> +	INIT_LIST_HEAD(&bo->bo);
> +
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_add_tail(&bo->bo, &adev->device_bo_list);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	return 0;
>   
>   fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>   	struct list_head		shadow_list;
>   
>   	struct kgd_mem                  *kfd_bo;
> +
> +	struct list_head		bo;
>   };
>   
>   static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
Daniel Vetter Jan. 19, 2021, 1:45 p.m. UTC | #3
On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> > Handle all DMA IOMMU gropup related dependencies before the
> > group is removed.
> > 
> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
> >   6 files changed, 65 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 478a7d8..2953420 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -51,6 +51,7 @@
> >   #include <linux/dma-fence.h>
> >   #include <linux/pci.h>
> >   #include <linux/aer.h>
> > +#include <linux/notifier.h>
> >   #include <drm/ttm/ttm_bo_api.h>
> >   #include <drm/ttm/ttm_bo_driver.h>
> > @@ -1041,6 +1042,10 @@ struct amdgpu_device {
> >   	bool                            in_pci_err_recovery;
> >   	struct pci_saved_state          *pci_state;
> > +
> > +	struct notifier_block		nb;
> > +	struct blocking_notifier_head	notifier;
> > +	struct list_head		device_bo_list;
> >   };
> >   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 45e23e3..e99f4f1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -70,6 +70,8 @@
> >   #include <drm/task_barrier.h>
> >   #include <linux/pm_runtime.h>
> > +#include <linux/iommu.h>
> > +
> >   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> >   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> >   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> > @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
> >   };
> > +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> > +				     unsigned long action, void *data)
> > +{
> > +	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> > +	struct amdgpu_bo *bo = NULL;
> > +
> > +	/*
> > +	 * Following is a set of IOMMU group dependencies taken care of before
> > +	 * device's IOMMU group is removed
> > +	 */
> > +	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> > +
> > +		spin_lock(&ttm_bo_glob.lru_lock);
> > +		list_for_each_entry(bo, &adev->device_bo_list, bo) {
> > +			if (bo->tbo.ttm)
> > +				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> > +		}
> > +		spin_unlock(&ttm_bo_glob.lru_lock);
> 
> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
> 
> You need to use a mutex here or even better make sure you can access the
> device_bo_list without a lock in this moment.

I'd also be worried about the notifier mutex getting really badly in the
way.

Plus I'm worried why we even need this, it sounds a bit like papering over
the iommu subsystem. Assuming we clean up all our iommu mappings in our
device hotunplug/unload code, why do we still need to have an additional
iommu notifier on top, with all kinds of additional headaches? The iommu
shouldn't clean up before the devices in its group have cleaned up.

I think we need more info here on what the exact problem is first.
-Daniel

> 
> Christian.
> 
> > +
> > +		if (adev->irq.ih.use_bus_addr)
> > +			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> > +		if (adev->irq.ih1.use_bus_addr)
> > +			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> > +		if (adev->irq.ih2.use_bus_addr)
> > +			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> > +
> > +		amdgpu_gart_dummy_page_fini(adev);
> > +	}
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +
> >   /**
> >    * amdgpu_device_init - initialize the driver
> >    *
> > @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >   	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> > +	INIT_LIST_HEAD(&adev->device_bo_list);
> > +
> >   	adev->gfx.gfx_off_req_count = 1;
> >   	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> > @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >   	if (amdgpu_device_cache_pci_state(adev->pdev))
> >   		pci_restore_state(pdev);
> > +	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> > +	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> > +
> > +	if (adev->dev->iommu_group) {
> > +		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> > +		if (r)
> > +			goto failed;
> > +	}
> > +
> >   	return 0;
> >   failed:
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > index 0db9330..486ad6d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
> >    *
> >    * Frees the dummy page used by the driver (all asics).
> >    */
> > -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> > +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> >   {
> >   	if (!adev->dummy_page_addr)
> >   		return;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> > index afa2e28..5678d9c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> > @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
> >   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
> >   int amdgpu_gart_init(struct amdgpu_device *adev);
> >   void amdgpu_gart_fini(struct amdgpu_device *adev);
> > +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
> >   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> >   		       int pages);
> >   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > index 6cc9919..4a1de69 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
> >   	}
> >   	amdgpu_bo_unref(&bo->parent);
> > +	spin_lock(&ttm_bo_glob.lru_lock);
> > +	list_del(&bo->bo);
> > +	spin_unlock(&ttm_bo_glob.lru_lock);
> > +
> >   	kfree(bo->metadata);
> >   	kfree(bo);
> >   }
> > @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
> >   	if (bp->type == ttm_bo_type_device)
> >   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> > +	INIT_LIST_HEAD(&bo->bo);
> > +
> > +	spin_lock(&ttm_bo_glob.lru_lock);
> > +	list_add_tail(&bo->bo, &adev->device_bo_list);
> > +	spin_unlock(&ttm_bo_glob.lru_lock);
> > +
> >   	return 0;
> >   fail_unreserve:
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > index 9ac3756..5ae8555 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > @@ -110,6 +110,8 @@ struct amdgpu_bo {
> >   	struct list_head		shadow_list;
> >   	struct kgd_mem                  *kfd_bo;
> > +
> > +	struct list_head		bo;
> >   };
> >   static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
>
Andrey Grodzovsky Jan. 19, 2021, 9:21 p.m. UTC | #4
On 1/19/21 8:45 AM, Daniel Vetter wrote:
> On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>>> Handle all DMA IOMMU gropup related dependencies before the
>>> group is removed.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>>    6 files changed, 65 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 478a7d8..2953420 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -51,6 +51,7 @@
>>>    #include <linux/dma-fence.h>
>>>    #include <linux/pci.h>
>>>    #include <linux/aer.h>
>>> +#include <linux/notifier.h>
>>>    #include <drm/ttm/ttm_bo_api.h>
>>>    #include <drm/ttm/ttm_bo_driver.h>
>>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>>    	bool                            in_pci_err_recovery;
>>>    	struct pci_saved_state          *pci_state;
>>> +
>>> +	struct notifier_block		nb;
>>> +	struct blocking_notifier_head	notifier;
>>> +	struct list_head		device_bo_list;
>>>    };
>>>    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 45e23e3..e99f4f1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -70,6 +70,8 @@
>>>    #include <drm/task_barrier.h>
>>>    #include <linux/pm_runtime.h>
>>> +#include <linux/iommu.h>
>>> +
>>>    MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>    MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>    MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>>>    };
>>> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>>> +				     unsigned long action, void *data)
>>> +{
>>> +	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
>>> +	struct amdgpu_bo *bo = NULL;
>>> +
>>> +	/*
>>> +	 * Following is a set of IOMMU group dependencies taken care of before
>>> +	 * device's IOMMU group is removed
>>> +	 */
>>> +	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>>> +
>>> +		spin_lock(&ttm_bo_glob.lru_lock);
>>> +		list_for_each_entry(bo, &adev->device_bo_list, bo) {
>>> +			if (bo->tbo.ttm)
>>> +				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>>> +		}
>>> +		spin_unlock(&ttm_bo_glob.lru_lock);
>> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>>
>> You need to use a mutex here or even better make sure you can access the
>> device_bo_list without a lock in this moment.
> I'd also be worried about the notifier mutex getting really badly in the
> way.
>
> Plus I'm worried why we even need this, it sounds a bit like papering over
> the iommu subsystem. Assuming we clean up all our iommu mappings in our
> device hotunplug/unload code, why do we still need to have an additional
> iommu notifier on top, with all kinds of additional headaches? The iommu
> shouldn't clean up before the devices in its group have cleaned up.
>
> I think we need more info here on what the exact problem is first.
> -Daniel


Originally I experienced the  crash bellow on IOMMU enabled device, it happens 
post device removal from PCI topology -
during shutting down of user client holding last reference to drm device file (X 
in my case).
The crash is because by the time I get to this point struct device->iommu_group 
pointer is NULL
already since the IOMMU group for the device is unset during PCI removal. So 
this contradicts what you said above
that the iommu shouldn't clean up before the devices in its group have cleaned up.
So instead of guessing when is the right place to place all IOMMU related 
cleanups it makes sense
to get notification from IOMMU subsystem in the form of event 
IOMMU_GROUP_NOTIFY_DEL_DEVICE
and use that place to do all the relevant cleanups.

Andrey


[  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 
00000000000000c8
[  123.810080 <    0.000006>] #PF: supervisor read access in kernel mode
[  123.810082 <    0.000002>] #PF: error_code(0x0000) - not-present page
[  123.810085 <    0.000003>] PGD 0 P4D 0
[  123.810089 <    0.000004>] Oops: 0000 [#1] SMP NOPTI
[  123.810094 <    0.000005>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: 
G           O      5.9.0-rc2-dev+ #59
[  123.810096 <    0.000002>] Hardware name: System manufacturer System Product 
Name/PRIME X470-PRO, BIOS 4406 02/28/2019
[  123.810105 <    0.000009>] *RIP: 0010:iommu_get_dma_domain*+0x10/0x20
[  123.810108 <    0.000003>] Code: b0 48 c7 87 98 00 00 00 00 00 00 00 31 c0 c3 
b8 f4 ff ff ff eb a6 0f 1f 40 00 0f 1f 44 00 00 48 8b 87 d0 02 00 00 55 48 89 e5 
<48> 8b 80 c8 00 00 00 5d c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 48
[  123.810111 <    0.000003>] RSP: 0018:ffffa2e201f7f980 EFLAGS: 00010246
[  123.810114 <    0.000003>] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 
0000000000000000
[  123.810116 <    0.000002>] RDX: 0000000000001000 RSI: 00000000bf5cb000 RDI: 
ffff93c259dc60b0
[  123.810117 <    0.000001>] RBP: ffffa2e201f7f980 R08: 0000000000000000 R09: 
0000000000000000
[  123.810119 <    0.000002>] R10: ffffa2e201f7faf0 R11: 0000000000000001 R12: 
00000000bf5cb000
[  123.810121 <    0.000002>] R13: 0000000000001000 R14: ffff93c24cef9c50 R15: 
ffff93c256c05688
[  123.810124 <    0.000003>] FS:  00007f5e5e8d3700(0000) 
GS:ffff93c25e940000(0000) knlGS:0000000000000000
[  123.810126 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  123.810128 <    0.000002>] CR2: 00000000000000c8 CR3: 000000027fe0a000 CR4: 
00000000003506e0
[  123.810130 <    0.000002>] Call Trace:
[  123.810136 <    0.000006>]  __iommu_dma_unmap+0x2e/0x100
[  123.810141 <    0.000005>]  ? kfree+0x389/0x3a0
[  123.810144 <    0.000003>]  iommu_dma_unmap_page+0xe/0x10
[  123.810149 <    0.000005>] dma_unmap_page_attrs+0x4d/0xf0
[  123.810159 <    0.000010>]  ? ttm_bo_del_from_lru+0x8e/0xb0 [ttm]
[  123.810165 <    0.000006>] ttm_unmap_and_unpopulate_pages+0x8e/0xc0 [ttm]
[  123.810252 <    0.000087>] amdgpu_ttm_tt_unpopulate+0xaa/0xd0 [amdgpu]
[  123.810258 <    0.000006>]  ttm_tt_unpopulate+0x59/0x70 [ttm]
[  123.810264 <    0.000006>]  ttm_tt_destroy+0x6a/0x70 [ttm]
[  123.810270 <    0.000006>] ttm_bo_cleanup_memtype_use+0x36/0xa0 [ttm]
[  123.810276 <    0.000006>]  ttm_bo_put+0x1e7/0x400 [ttm]
[  123.810358 <    0.000082>]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
[  123.810440 <    0.000082>] amdgpu_gem_object_free+0x37/0x50 [amdgpu]
[  123.810459 <    0.000019>]  drm_gem_object_free+0x35/0x40 [drm]
[  123.810476 <    0.000017>] drm_gem_object_handle_put_unlocked+0x9d/0xd0 [drm]
[  123.810494 <    0.000018>] drm_gem_object_release_handle+0x74/0x90 [drm]
[  123.810511 <    0.000017>]  ? drm_gem_object_handle_put_unlocked+0xd0/0xd0 [drm]
[  123.810516 <    0.000005>]  idr_for_each+0x4d/0xd0
[  123.810534 <    0.000018>]  drm_gem_release+0x20/0x30 [drm]
[  123.810550 <    0.000016>]  drm_file_free+0x251/0x2a0 [drm]
[  123.810567 <    0.000017>] drm_close_helper.isra.14+0x61/0x70 [drm]
[  123.810583 <    0.000016>]  drm_release+0x6a/0xe0 [drm]
[  123.810588 <    0.000005>]  __fput+0xa2/0x250
[  123.810592 <    0.000004>]  ____fput+0xe/0x10
[  123.810595 <    0.000003>]  task_work_run+0x6c/0xa0
[  123.810600 <    0.000005>]  do_exit+0x376/0xb60
[  123.810604 <    0.000004>]  do_group_exit+0x43/0xa0
[  123.810608 <    0.000004>]  get_signal+0x18b/0x8e0
[  123.810612 <    0.000004>]  ? do_futex+0x595/0xc20
[  123.810617 <    0.000005>]  arch_do_signal+0x34/0x880
[  123.810620 <    0.000003>]  ? check_preempt_curr+0x50/0x60
[  123.810623 <    0.000003>]  ? ttwu_do_wakeup+0x1e/0x160
[  123.810626 <    0.000003>]  ? ttwu_do_activate+0x61/0x70
[  123.810630 <    0.000004>] exit_to_user_mode_prepare+0x124/0x1b0
[  123.810635 <    0.000005>] syscall_exit_to_user_mode+0x31/0x170
[  123.810639 <    0.000004>]  do_syscall_64+0x43/0x80


Andrey


>
>> Christian.
>>
>>> +
>>> +		if (adev->irq.ih.use_bus_addr)
>>> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>>> +		if (adev->irq.ih1.use_bus_addr)
>>> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>>> +		if (adev->irq.ih2.use_bus_addr)
>>> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>>> +
>>> +		amdgpu_gart_dummy_page_fini(adev);
>>> +	}
>>> +
>>> +	return NOTIFY_OK;
>>> +}
>>> +
>>> +
>>>    /**
>>>     * amdgpu_device_init - initialize the driver
>>>     *
>>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>    	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>> +	INIT_LIST_HEAD(&adev->device_bo_list);
>>> +
>>>    	adev->gfx.gfx_off_req_count = 1;
>>>    	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>    	if (amdgpu_device_cache_pci_state(adev->pdev))
>>>    		pci_restore_state(pdev);
>>> +	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>>> +	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>>> +
>>> +	if (adev->dev->iommu_group) {
>>> +		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
>>> +		if (r)
>>> +			goto failed;
>>> +	}
>>> +
>>>    	return 0;
>>>    failed:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> index 0db9330..486ad6d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>>>     *
>>>     * Frees the dummy page used by the driver (all asics).
>>>     */
>>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>    {
>>>    	if (!adev->dummy_page_addr)
>>>    		return;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> index afa2e28..5678d9c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>>>    void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>>    int amdgpu_gart_init(struct amdgpu_device *adev);
>>>    void amdgpu_gart_fini(struct amdgpu_device *adev);
>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>>    int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>>    		       int pages);
>>>    int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 6cc9919..4a1de69 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>>>    	}
>>>    	amdgpu_bo_unref(&bo->parent);
>>> +	spin_lock(&ttm_bo_glob.lru_lock);
>>> +	list_del(&bo->bo);
>>> +	spin_unlock(&ttm_bo_glob.lru_lock);
>>> +
>>>    	kfree(bo->metadata);
>>>    	kfree(bo);
>>>    }
>>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>>    	if (bp->type == ttm_bo_type_device)
>>>    		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>> +	INIT_LIST_HEAD(&bo->bo);
>>> +
>>> +	spin_lock(&ttm_bo_glob.lru_lock);
>>> +	list_add_tail(&bo->bo, &adev->device_bo_list);
>>> +	spin_unlock(&ttm_bo_glob.lru_lock);
>>> +
>>>    	return 0;
>>>    fail_unreserve:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 9ac3756..5ae8555 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>>    	struct list_head		shadow_list;
>>>    	struct kgd_mem                  *kfd_bo;
>>> +
>>> +	struct list_head		bo;
>>>    };
>>>    static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
Daniel Vetter Jan. 19, 2021, 10:01 p.m. UTC | #5
On Tue, Jan 19, 2021 at 10:22 PM Andrey Grodzovsky
<Andrey.Grodzovsky@amd.com> wrote:
>
>
> On 1/19/21 8:45 AM, Daniel Vetter wrote:
>
> On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
>
> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
>   #include <linux/dma-fence.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/notifier.h>
>   #include <drm/ttm/ttm_bo_api.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>   bool                            in_pci_err_recovery;
>   struct pci_saved_state          *pci_state;
> +
> + struct notifier_block nb;
> + struct blocking_notifier_head notifier;
> + struct list_head device_bo_list;
>   };
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
>   #include <drm/task_barrier.h>
>   #include <linux/pm_runtime.h>
> +#include <linux/iommu.h>
> +
>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>   };
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> +     unsigned long action, void *data)
> +{
> + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> + struct amdgpu_bo *bo = NULL;
> +
> + /*
> + * Following is a set of IOMMU group dependencies taken care of before
> + * device's IOMMU group is removed
> + */
> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
> + if (bo->tbo.ttm)
> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> + }
> + spin_unlock(&ttm_bo_glob.lru_lock);
>
> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>
> You need to use a mutex here or even better make sure you can access the
> device_bo_list without a lock in this moment.
>
> I'd also be worried about the notifier mutex getting really badly in the
> way.
>
> Plus I'm worried why we even need this, it sounds a bit like papering over
> the iommu subsystem. Assuming we clean up all our iommu mappings in our
> device hotunplug/unload code, why do we still need to have an additional
> iommu notifier on top, with all kinds of additional headaches? The iommu
> shouldn't clean up before the devices in its group have cleaned up.
>
> I think we need more info here on what the exact problem is first.
> -Daniel
>
>
> Originally I experienced the  crash bellow on IOMMU enabled device, it happens post device removal from PCI topology -
> during shutting down of user client holding last reference to drm device file (X in my case).
> The crash is because by the time I get to this point struct device->iommu_group pointer is NULL
> already since the IOMMU group for the device is unset during PCI removal. So this contradicts what you said above
> that the iommu shouldn't clean up before the devices in its group have cleaned up.
> So instead of guessing when is the right place to place all IOMMU related cleanups it makes sense
> to get notification from IOMMU subsystem in the form of event IOMMU_GROUP_NOTIFY_DEL_DEVICE
> and use that place to do all the relevant cleanups.

Yeah that goes boom, but you shouldn't need this special iommu cleanup
handler. Making sure that all the dma-api mappings are gone needs to
be done as part of the device hotunplug, you can't delay that to the
last drm_device cleanup.

So I most of the patch here with pulling that out (should be outright
removed from the final release code even) is good, just not yet how
you call that new code. Probably these bits (aside from walking all
buffers and unpopulating the tt) should be done from the early_free
callback you're adding.

Also what I just realized: For normal unload you need to make sure the
hw is actually stopped first, before we unmap buffers. Otherwise
driver unload will likely result in wedged hw, probably not what you
want for debugging.
-Daniel

> Andrey
>
>
> [  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 00000000000000c8
> [  123.810080 <    0.000006>] #PF: supervisor read access in kernel mode
> [  123.810082 <    0.000002>] #PF: error_code(0x0000) - not-present page
> [  123.810085 <    0.000003>] PGD 0 P4D 0
> [  123.810089 <    0.000004>] Oops: 0000 [#1] SMP NOPTI
> [  123.810094 <    0.000005>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: G           O      5.9.0-rc2-dev+ #59
> [  123.810096 <    0.000002>] Hardware name: System manufacturer System Product Name/PRIME X470-PRO, BIOS 4406 02/28/2019
> [  123.810105 <    0.000009>] RIP: 0010:iommu_get_dma_domain+0x10/0x20
> [  123.810108 <    0.000003>] Code: b0 48 c7 87 98 00 00 00 00 00 00 00 31 c0 c3 b8 f4 ff ff ff eb a6 0f 1f 40 00 0f 1f 44 00 00 48 8b 87 d0 02 00 00 55 48 89 e5 <48> 8b 80 c8 00 00 00 5d c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 48
> [  123.810111 <    0.000003>] RSP: 0018:ffffa2e201f7f980 EFLAGS: 00010246
> [  123.810114 <    0.000003>] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000000
> [  123.810116 <    0.000002>] RDX: 0000000000001000 RSI: 00000000bf5cb000 RDI: ffff93c259dc60b0
> [  123.810117 <    0.000001>] RBP: ffffa2e201f7f980 R08: 0000000000000000 R09: 0000000000000000
> [  123.810119 <    0.000002>] R10: ffffa2e201f7faf0 R11: 0000000000000001 R12: 00000000bf5cb000
> [  123.810121 <    0.000002>] R13: 0000000000001000 R14: ffff93c24cef9c50 R15: ffff93c256c05688
> [  123.810124 <    0.000003>] FS:  00007f5e5e8d3700(0000) GS:ffff93c25e940000(0000) knlGS:0000000000000000
> [  123.810126 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  123.810128 <    0.000002>] CR2: 00000000000000c8 CR3: 000000027fe0a000 CR4: 00000000003506e0
> [  123.810130 <    0.000002>] Call Trace:
> [  123.810136 <    0.000006>]  __iommu_dma_unmap+0x2e/0x100
> [  123.810141 <    0.000005>]  ? kfree+0x389/0x3a0
> [  123.810144 <    0.000003>]  iommu_dma_unmap_page+0xe/0x10
> [  123.810149 <    0.000005>] dma_unmap_page_attrs+0x4d/0xf0
> [  123.810159 <    0.000010>]  ? ttm_bo_del_from_lru+0x8e/0xb0 [ttm]
> [  123.810165 <    0.000006>] ttm_unmap_and_unpopulate_pages+0x8e/0xc0 [ttm]
> [  123.810252 <    0.000087>] amdgpu_ttm_tt_unpopulate+0xaa/0xd0 [amdgpu]
> [  123.810258 <    0.000006>]  ttm_tt_unpopulate+0x59/0x70 [ttm]
> [  123.810264 <    0.000006>]  ttm_tt_destroy+0x6a/0x70 [ttm]
> [  123.810270 <    0.000006>] ttm_bo_cleanup_memtype_use+0x36/0xa0 [ttm]
> [  123.810276 <    0.000006>]  ttm_bo_put+0x1e7/0x400 [ttm]
> [  123.810358 <    0.000082>]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
> [  123.810440 <    0.000082>] amdgpu_gem_object_free+0x37/0x50 [amdgpu]
> [  123.810459 <    0.000019>]  drm_gem_object_free+0x35/0x40 [drm]
> [  123.810476 <    0.000017>] drm_gem_object_handle_put_unlocked+0x9d/0xd0 [drm]
> [  123.810494 <    0.000018>] drm_gem_object_release_handle+0x74/0x90 [drm]
> [  123.810511 <    0.000017>]  ? drm_gem_object_handle_put_unlocked+0xd0/0xd0 [drm]
> [  123.810516 <    0.000005>]  idr_for_each+0x4d/0xd0
> [  123.810534 <    0.000018>]  drm_gem_release+0x20/0x30 [drm]
> [  123.810550 <    0.000016>]  drm_file_free+0x251/0x2a0 [drm]
> [  123.810567 <    0.000017>] drm_close_helper.isra.14+0x61/0x70 [drm]
> [  123.810583 <    0.000016>]  drm_release+0x6a/0xe0 [drm]
> [  123.810588 <    0.000005>]  __fput+0xa2/0x250
> [  123.810592 <    0.000004>]  ____fput+0xe/0x10
> [  123.810595 <    0.000003>]  task_work_run+0x6c/0xa0
> [  123.810600 <    0.000005>]  do_exit+0x376/0xb60
> [  123.810604 <    0.000004>]  do_group_exit+0x43/0xa0
> [  123.810608 <    0.000004>]  get_signal+0x18b/0x8e0
> [  123.810612 <    0.000004>]  ? do_futex+0x595/0xc20
> [  123.810617 <    0.000005>]  arch_do_signal+0x34/0x880
> [  123.810620 <    0.000003>]  ? check_preempt_curr+0x50/0x60
> [  123.810623 <    0.000003>]  ? ttwu_do_wakeup+0x1e/0x160
> [  123.810626 <    0.000003>]  ? ttwu_do_activate+0x61/0x70
> [  123.810630 <    0.000004>] exit_to_user_mode_prepare+0x124/0x1b0
> [  123.810635 <    0.000005>] syscall_exit_to_user_mode+0x31/0x170
> [  123.810639 <    0.000004>]  do_syscall_64+0x43/0x80
>
>
> Andrey
>
>
>
> Christian.
>
> +
> + if (adev->irq.ih.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> + if (adev->irq.ih1.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> + if (adev->irq.ih2.use_bus_addr)
> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> + amdgpu_gart_dummy_page_fini(adev);
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +
>   /**
>    * amdgpu_device_init - initialize the driver
>    *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> + INIT_LIST_HEAD(&adev->device_bo_list);
> +
>   adev->gfx.gfx_off_req_count = 1;
>   adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   if (amdgpu_device_cache_pci_state(adev->pdev))
>   pci_restore_state(pdev);
> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> + if (adev->dev->iommu_group) {
> + r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> + if (r)
> + goto failed;
> + }
> +
>   return 0;
>   failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>    *
>    * Frees the dummy page used by the driver (all asics).
>    */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>   {
>   if (!adev->dummy_page_addr)
>   return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>   int amdgpu_gart_init(struct amdgpu_device *adev);
>   void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>         int pages);
>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   }
>   amdgpu_bo_unref(&bo->parent);
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_del(&bo->bo);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   kfree(bo->metadata);
>   kfree(bo);
>   }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>   if (bp->type == ttm_bo_type_device)
>   bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> + INIT_LIST_HEAD(&bo->bo);
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> + list_add_tail(&bo->bo, &adev->device_bo_list);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   return 0;
>   fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>   struct list_head shadow_list;
>   struct kgd_mem                  *kfd_bo;
> +
> + struct list_head bo;
>   };
>   static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
Andrey Grodzovsky Jan. 20, 2021, 4:21 a.m. UTC | #6
On 1/19/21 5:01 PM, Daniel Vetter wrote:
> On Tue, Jan 19, 2021 at 10:22 PM Andrey Grodzovsky
> <Andrey.Grodzovsky@amd.com> wrote:
>>
>> On 1/19/21 8:45 AM, Daniel Vetter wrote:
>>
>> On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
>>
>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>>
>> Handle all DMA IOMMU gropup related dependencies before the
>> group is removed.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>    6 files changed, 65 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 478a7d8..2953420 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -51,6 +51,7 @@
>>    #include <linux/dma-fence.h>
>>    #include <linux/pci.h>
>>    #include <linux/aer.h>
>> +#include <linux/notifier.h>
>>    #include <drm/ttm/ttm_bo_api.h>
>>    #include <drm/ttm/ttm_bo_driver.h>
>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>    bool                            in_pci_err_recovery;
>>    struct pci_saved_state          *pci_state;
>> +
>> + struct notifier_block nb;
>> + struct blocking_notifier_head notifier;
>> + struct list_head device_bo_list;
>>    };
>>    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 45e23e3..e99f4f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -70,6 +70,8 @@
>>    #include <drm/task_barrier.h>
>>    #include <linux/pm_runtime.h>
>> +#include <linux/iommu.h>
>> +
>>    MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>    MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>    MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>>    };
>> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>> +     unsigned long action, void *data)
>> +{
>> + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
>> + struct amdgpu_bo *bo = NULL;
>> +
>> + /*
>> + * Following is a set of IOMMU group dependencies taken care of before
>> + * device's IOMMU group is removed
>> + */
>> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>> +
>> + spin_lock(&ttm_bo_glob.lru_lock);
>> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
>> + if (bo->tbo.ttm)
>> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>> + }
>> + spin_unlock(&ttm_bo_glob.lru_lock);
>>
>> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>>
>> You need to use a mutex here or even better make sure you can access the
>> device_bo_list without a lock in this moment.
>>
>> I'd also be worried about the notifier mutex getting really badly in the
>> way.
>>
>> Plus I'm worried why we even need this, it sounds a bit like papering over
>> the iommu subsystem. Assuming we clean up all our iommu mappings in our
>> device hotunplug/unload code, why do we still need to have an additional
>> iommu notifier on top, with all kinds of additional headaches? The iommu
>> shouldn't clean up before the devices in its group have cleaned up.
>>
>> I think we need more info here on what the exact problem is first.
>> -Daniel
>>
>>
>> Originally I experienced the  crash bellow on IOMMU enabled device, it happens post device removal from PCI topology -
>> during shutting down of user client holding last reference to drm device file (X in my case).
>> The crash is because by the time I get to this point struct device->iommu_group pointer is NULL
>> already since the IOMMU group for the device is unset during PCI removal. So this contradicts what you said above
>> that the iommu shouldn't clean up before the devices in its group have cleaned up.
>> So instead of guessing when is the right place to place all IOMMU related cleanups it makes sense
>> to get notification from IOMMU subsystem in the form of event IOMMU_GROUP_NOTIFY_DEL_DEVICE
>> and use that place to do all the relevant cleanups.
> Yeah that goes boom, but you shouldn't need this special iommu cleanup
> handler. Making sure that all the dma-api mappings are gone needs to
> be done as part of the device hotunplug, you can't delay that to the
> last drm_device cleanup.
>
> So I most of the patch here with pulling that out (should be outright
> removed from the final release code even) is good, just not yet how
> you call that new code. Probably these bits (aside from walking all
> buffers and unpopulating the tt) should be done from the early_free
> callback you're adding.
>
> Also what I just realized: For normal unload you need to make sure the
> hw is actually stopped first, before we unmap buffers. Otherwise
> driver unload will likely result in wedged hw, probably not what you
> want for debugging.
> -Daniel

Since device removal from IOMMU group and this hook in particular
takes place before call to amdgpu_pci_remove essentially it means
that for IOMMU use case the entire amdgpu_device_fini_hw function
shouold be called here to stop the HW instead from amdgpu_pci_remove.

Looking at this from another perspective, AFAIK on each new device probing
either due to PCI bus rescan or driver reload we are resetting the ASIC before doing
any init operations (assuming we successfully gained MMIO access) and so maybe
your concern is not an issue ?

Andrey


>
>> Andrey
>>
>>
>> [  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 00000000000000c8
>> [  123.810080 <    0.000006>] #PF: supervisor read access in kernel mode
>> [  123.810082 <    0.000002>] #PF: error_code(0x0000) - not-present page
>> [  123.810085 <    0.000003>] PGD 0 P4D 0
>> [  123.810089 <    0.000004>] Oops: 0000 [#1] SMP NOPTI
>> [  123.810094 <    0.000005>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: G           O      5.9.0-rc2-dev+ #59
>> [  123.810096 <    0.000002>] Hardware name: System manufacturer System Product Name/PRIME X470-PRO, BIOS 4406 02/28/2019
>> [  123.810105 <    0.000009>] RIP: 0010:iommu_get_dma_domain+0x10/0x20
>> [  123.810108 <    0.000003>] Code: b0 48 c7 87 98 00 00 00 00 00 00 00 31 c0 c3 b8 f4 ff ff ff eb a6 0f 1f 40 00 0f 1f 44 00 00 48 8b 87 d0 02 00 00 55 48 89 e5 <48> 8b 80 c8 00 00 00 5d c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 48
>> [  123.810111 <    0.000003>] RSP: 0018:ffffa2e201f7f980 EFLAGS: 00010246
>> [  123.810114 <    0.000003>] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000000
>> [  123.810116 <    0.000002>] RDX: 0000000000001000 RSI: 00000000bf5cb000 RDI: ffff93c259dc60b0
>> [  123.810117 <    0.000001>] RBP: ffffa2e201f7f980 R08: 0000000000000000 R09: 0000000000000000
>> [  123.810119 <    0.000002>] R10: ffffa2e201f7faf0 R11: 0000000000000001 R12: 00000000bf5cb000
>> [  123.810121 <    0.000002>] R13: 0000000000001000 R14: ffff93c24cef9c50 R15: ffff93c256c05688
>> [  123.810124 <    0.000003>] FS:  00007f5e5e8d3700(0000) GS:ffff93c25e940000(0000) knlGS:0000000000000000
>> [  123.810126 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [  123.810128 <    0.000002>] CR2: 00000000000000c8 CR3: 000000027fe0a000 CR4: 00000000003506e0
>> [  123.810130 <    0.000002>] Call Trace:
>> [  123.810136 <    0.000006>]  __iommu_dma_unmap+0x2e/0x100
>> [  123.810141 <    0.000005>]  ? kfree+0x389/0x3a0
>> [  123.810144 <    0.000003>]  iommu_dma_unmap_page+0xe/0x10
>> [  123.810149 <    0.000005>] dma_unmap_page_attrs+0x4d/0xf0
>> [  123.810159 <    0.000010>]  ? ttm_bo_del_from_lru+0x8e/0xb0 [ttm]
>> [  123.810165 <    0.000006>] ttm_unmap_and_unpopulate_pages+0x8e/0xc0 [ttm]
>> [  123.810252 <    0.000087>] amdgpu_ttm_tt_unpopulate+0xaa/0xd0 [amdgpu]
>> [  123.810258 <    0.000006>]  ttm_tt_unpopulate+0x59/0x70 [ttm]
>> [  123.810264 <    0.000006>]  ttm_tt_destroy+0x6a/0x70 [ttm]
>> [  123.810270 <    0.000006>] ttm_bo_cleanup_memtype_use+0x36/0xa0 [ttm]
>> [  123.810276 <    0.000006>]  ttm_bo_put+0x1e7/0x400 [ttm]
>> [  123.810358 <    0.000082>]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
>> [  123.810440 <    0.000082>] amdgpu_gem_object_free+0x37/0x50 [amdgpu]
>> [  123.810459 <    0.000019>]  drm_gem_object_free+0x35/0x40 [drm]
>> [  123.810476 <    0.000017>] drm_gem_object_handle_put_unlocked+0x9d/0xd0 [drm]
>> [  123.810494 <    0.000018>] drm_gem_object_release_handle+0x74/0x90 [drm]
>> [  123.810511 <    0.000017>]  ? drm_gem_object_handle_put_unlocked+0xd0/0xd0 [drm]
>> [  123.810516 <    0.000005>]  idr_for_each+0x4d/0xd0
>> [  123.810534 <    0.000018>]  drm_gem_release+0x20/0x30 [drm]
>> [  123.810550 <    0.000016>]  drm_file_free+0x251/0x2a0 [drm]
>> [  123.810567 <    0.000017>] drm_close_helper.isra.14+0x61/0x70 [drm]
>> [  123.810583 <    0.000016>]  drm_release+0x6a/0xe0 [drm]
>> [  123.810588 <    0.000005>]  __fput+0xa2/0x250
>> [  123.810592 <    0.000004>]  ____fput+0xe/0x10
>> [  123.810595 <    0.000003>]  task_work_run+0x6c/0xa0
>> [  123.810600 <    0.000005>]  do_exit+0x376/0xb60
>> [  123.810604 <    0.000004>]  do_group_exit+0x43/0xa0
>> [  123.810608 <    0.000004>]  get_signal+0x18b/0x8e0
>> [  123.810612 <    0.000004>]  ? do_futex+0x595/0xc20
>> [  123.810617 <    0.000005>]  arch_do_signal+0x34/0x880
>> [  123.810620 <    0.000003>]  ? check_preempt_curr+0x50/0x60
>> [  123.810623 <    0.000003>]  ? ttwu_do_wakeup+0x1e/0x160
>> [  123.810626 <    0.000003>]  ? ttwu_do_activate+0x61/0x70
>> [  123.810630 <    0.000004>] exit_to_user_mode_prepare+0x124/0x1b0
>> [  123.810635 <    0.000005>] syscall_exit_to_user_mode+0x31/0x170
>> [  123.810639 <    0.000004>]  do_syscall_64+0x43/0x80
>>
>>
>> Andrey
>>
>>
>>
>> Christian.
>>
>> +
>> + if (adev->irq.ih.use_bus_addr)
>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>> + if (adev->irq.ih1.use_bus_addr)
>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>> + if (adev->irq.ih2.use_bus_addr)
>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>> +
>> + amdgpu_gart_dummy_page_fini(adev);
>> + }
>> +
>> + return NOTIFY_OK;
>> +}
>> +
>> +
>>    /**
>>     * amdgpu_device_init - initialize the driver
>>     *
>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>    INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>> + INIT_LIST_HEAD(&adev->device_bo_list);
>> +
>>    adev->gfx.gfx_off_req_count = 1;
>>    adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>    if (amdgpu_device_cache_pci_state(adev->pdev))
>>    pci_restore_state(pdev);
>> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>> +
>> + if (adev->dev->iommu_group) {
>> + r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
>> + if (r)
>> + goto failed;
>> + }
>> +
>>    return 0;
>>    failed:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 0db9330..486ad6d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>>     *
>>     * Frees the dummy page used by the driver (all asics).
>>     */
>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>    {
>>    if (!adev->dummy_page_addr)
>>    return;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> index afa2e28..5678d9c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>>    void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>    int amdgpu_gart_init(struct amdgpu_device *adev);
>>    void amdgpu_gart_fini(struct amdgpu_device *adev);
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>    int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>          int pages);
>>    int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 6cc9919..4a1de69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>>    }
>>    amdgpu_bo_unref(&bo->parent);
>> + spin_lock(&ttm_bo_glob.lru_lock);
>> + list_del(&bo->bo);
>> + spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>    kfree(bo->metadata);
>>    kfree(bo);
>>    }
>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>    if (bp->type == ttm_bo_type_device)
>>    bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>> + INIT_LIST_HEAD(&bo->bo);
>> +
>> + spin_lock(&ttm_bo_glob.lru_lock);
>> + list_add_tail(&bo->bo, &adev->device_bo_list);
>> + spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>    return 0;
>>    fail_unreserve:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 9ac3756..5ae8555 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>    struct list_head shadow_list;
>>    struct kgd_mem                  *kfd_bo;
>> +
>> + struct list_head bo;
>>    };
>>    static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
>
>
Andrey Grodzovsky Jan. 20, 2021, 5:01 a.m. UTC | #7
On 1/19/21 3:48 AM, Christian König wrote:
> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>> Handle all DMA IOMMU gropup related dependencies before the
>> group is removed.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>   6 files changed, 65 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 478a7d8..2953420 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -51,6 +51,7 @@
>>   #include <linux/dma-fence.h>
>>   #include <linux/pci.h>
>>   #include <linux/aer.h>
>> +#include <linux/notifier.h>
>>     #include <drm/ttm/ttm_bo_api.h>
>>   #include <drm/ttm/ttm_bo_driver.h>
>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>         bool                            in_pci_err_recovery;
>>       struct pci_saved_state          *pci_state;
>> +
>> +    struct notifier_block        nb;
>> +    struct blocking_notifier_head    notifier;
>> +    struct list_head        device_bo_list;
>>   };
>>     static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 45e23e3..e99f4f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -70,6 +70,8 @@
>>   #include <drm/task_barrier.h>
>>   #include <linux/pm_runtime.h>
>>   +#include <linux/iommu.h>
>> +
>>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] 
>> = {
>>   };
>>     +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>> +                     unsigned long action, void *data)
>> +{
>> +    struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
>> +    struct amdgpu_bo *bo = NULL;
>> +
>> +    /*
>> +     * Following is a set of IOMMU group dependencies taken care of before
>> +     * device's IOMMU group is removed
>> +     */
>> +    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>> +
>> +        spin_lock(&ttm_bo_glob.lru_lock);
>> +        list_for_each_entry(bo, &adev->device_bo_list, bo) {
>> +            if (bo->tbo.ttm)
>> +                ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>> +        }
>> +        spin_unlock(&ttm_bo_glob.lru_lock);
>
> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>
> You need to use a mutex here or even better make sure you can access the 
> device_bo_list without a lock in this moment.
>
> Christian.


I can think of switching to RCU list ? Otherwise, elements are added
on BO create and deleted on BO destroy, how can i prevent any of those from
happening while in this section besides mutex ? Make a copy list and run over it 
instead ?

Andrey


>
>> +
>> +        if (adev->irq.ih.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>> +        if (adev->irq.ih1.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>> +        if (adev->irq.ih2.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>> +
>> +        amdgpu_gart_dummy_page_fini(adev);
>> +    }
>> +
>> +    return NOTIFY_OK;
>> +}
>> +
>> +
>>   /**
>>    * amdgpu_device_init - initialize the driver
>>    *
>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>   +    INIT_LIST_HEAD(&adev->device_bo_list);
>> +
>>       adev->gfx.gfx_off_req_count = 1;
>>       adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>   @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>       if (amdgpu_device_cache_pci_state(adev->pdev))
>>           pci_restore_state(pdev);
>>   +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>> +    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>> +
>> +    if (adev->dev->iommu_group) {
>> +        r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
>> +        if (r)
>> +            goto failed;
>> +    }
>> +
>>       return 0;
>>     failed:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 0db9330..486ad6d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
>> *adev)
>>    *
>>    * Frees the dummy page used by the driver (all asics).
>>    */
>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>   {
>>       if (!adev->dummy_page_addr)
>>           return;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> index afa2e28..5678d9c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>   int amdgpu_gart_init(struct amdgpu_device *adev);
>>   void amdgpu_gart_fini(struct amdgpu_device *adev);
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>                  int pages);
>>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 6cc9919..4a1de69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>>       }
>>       amdgpu_bo_unref(&bo->parent);
>>   +    spin_lock(&ttm_bo_glob.lru_lock);
>> +    list_del(&bo->bo);
>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>       kfree(bo->metadata);
>>       kfree(bo);
>>   }
>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>       if (bp->type == ttm_bo_type_device)
>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>   +    INIT_LIST_HEAD(&bo->bo);
>> +
>> +    spin_lock(&ttm_bo_glob.lru_lock);
>> +    list_add_tail(&bo->bo, &adev->device_bo_list);
>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>       return 0;
>>     fail_unreserve:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 9ac3756..5ae8555 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>       struct list_head        shadow_list;
>>         struct kgd_mem                  *kfd_bo;
>> +
>> +    struct list_head        bo;
>>   };
>>     static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object 
>> *tbo)
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&amp;reserved=0 
>
Daniel Vetter Jan. 20, 2021, 8:38 a.m. UTC | #8
On Wed, Jan 20, 2021 at 5:21 AM Andrey Grodzovsky
<Andrey.Grodzovsky@amd.com> wrote:
>
>
> On 1/19/21 5:01 PM, Daniel Vetter wrote:
> > On Tue, Jan 19, 2021 at 10:22 PM Andrey Grodzovsky
> > <Andrey.Grodzovsky@amd.com> wrote:
> >>
> >> On 1/19/21 8:45 AM, Daniel Vetter wrote:
> >>
> >> On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
> >>
> >> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> >>
> >> Handle all DMA IOMMU gropup related dependencies before the
> >> group is removed.
> >>
> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> >> ---
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
> >>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
> >>    6 files changed, 65 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 478a7d8..2953420 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -51,6 +51,7 @@
> >>    #include <linux/dma-fence.h>
> >>    #include <linux/pci.h>
> >>    #include <linux/aer.h>
> >> +#include <linux/notifier.h>
> >>    #include <drm/ttm/ttm_bo_api.h>
> >>    #include <drm/ttm/ttm_bo_driver.h>
> >> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
> >>    bool                            in_pci_err_recovery;
> >>    struct pci_saved_state          *pci_state;
> >> +
> >> + struct notifier_block nb;
> >> + struct blocking_notifier_head notifier;
> >> + struct list_head device_bo_list;
> >>    };
> >>    static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 45e23e3..e99f4f1 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -70,6 +70,8 @@
> >>    #include <drm/task_barrier.h>
> >>    #include <linux/pm_runtime.h>
> >> +#include <linux/iommu.h>
> >> +
> >>    MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> >>    MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> >>    MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> >> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
> >>    };
> >> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> >> +     unsigned long action, void *data)
> >> +{
> >> + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> >> + struct amdgpu_bo *bo = NULL;
> >> +
> >> + /*
> >> + * Following is a set of IOMMU group dependencies taken care of before
> >> + * device's IOMMU group is removed
> >> + */
> >> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> >> +
> >> + spin_lock(&ttm_bo_glob.lru_lock);
> >> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
> >> + if (bo->tbo.ttm)
> >> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> >> + }
> >> + spin_unlock(&ttm_bo_glob.lru_lock);
> >>
> >> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
> >>
> >> You need to use a mutex here or even better make sure you can access the
> >> device_bo_list without a lock in this moment.
> >>
> >> I'd also be worried about the notifier mutex getting really badly in the
> >> way.
> >>
> >> Plus I'm worried why we even need this, it sounds a bit like papering over
> >> the iommu subsystem. Assuming we clean up all our iommu mappings in our
> >> device hotunplug/unload code, why do we still need to have an additional
> >> iommu notifier on top, with all kinds of additional headaches? The iommu
> >> shouldn't clean up before the devices in its group have cleaned up.
> >>
> >> I think we need more info here on what the exact problem is first.
> >> -Daniel
> >>
> >>
> >> Originally I experienced the  crash bellow on IOMMU enabled device, it happens post device removal from PCI topology -
> >> during shutting down of user client holding last reference to drm device file (X in my case).
> >> The crash is because by the time I get to this point struct device->iommu_group pointer is NULL
> >> already since the IOMMU group for the device is unset during PCI removal. So this contradicts what you said above
> >> that the iommu shouldn't clean up before the devices in its group have cleaned up.
> >> So instead of guessing when is the right place to place all IOMMU related cleanups it makes sense
> >> to get notification from IOMMU subsystem in the form of event IOMMU_GROUP_NOTIFY_DEL_DEVICE
> >> and use that place to do all the relevant cleanups.
> > Yeah that goes boom, but you shouldn't need this special iommu cleanup
> > handler. Making sure that all the dma-api mappings are gone needs to
> > be done as part of the device hotunplug, you can't delay that to the
> > last drm_device cleanup.
> >
> > So I most of the patch here with pulling that out (should be outright
> > removed from the final release code even) is good, just not yet how
> > you call that new code. Probably these bits (aside from walking all
> > buffers and unpopulating the tt) should be done from the early_free
> > callback you're adding.
> >
> > Also what I just realized: For normal unload you need to make sure the
> > hw is actually stopped first, before we unmap buffers. Otherwise
> > driver unload will likely result in wedged hw, probably not what you
> > want for debugging.
> > -Daniel
>
> Since device removal from IOMMU group and this hook in particular
> takes place before call to amdgpu_pci_remove essentially it means
> that for IOMMU use case the entire amdgpu_device_fini_hw function
> shouold be called here to stop the HW instead from amdgpu_pci_remove.

The crash you showed was on final drm_close, which should happen after
device removal, so that's clearly buggy. If the iommu subsystem
removes stuff before the driver could clean up already, then I think
that's an iommu bug or dma-api bug. Just plain using dma_map/unmap and
friends really shouldn't need notifier hacks like you're implementing
here. Can you pls show me a backtrace where dma_unmap_sg blows up when
it's put into the pci_driver remove callback?

> Looking at this from another perspective, AFAIK on each new device probing
> either due to PCI bus rescan or driver reload we are resetting the ASIC before doing
> any init operations (assuming we successfully gained MMIO access) and so maybe
> your concern is not an issue ?

Reset on probe is too late. The problem is that if you just remove the
driver, your device is doing dma at that moment. And you kinda have to
stop that before you free the mappings/memory. Of course when the
device is actually hotunplugged, then dma is guaranteed to have
stopped already. I'm not sure whether disabling the pci device is
enough to make sure no more dma happens, could be that's enough.
-Daniel

> Andrey
>
>
> >
> >> Andrey
> >>
> >>
> >> [  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 00000000000000c8
> >> [  123.810080 <    0.000006>] #PF: supervisor read access in kernel mode
> >> [  123.810082 <    0.000002>] #PF: error_code(0x0000) - not-present page
> >> [  123.810085 <    0.000003>] PGD 0 P4D 0
> >> [  123.810089 <    0.000004>] Oops: 0000 [#1] SMP NOPTI
> >> [  123.810094 <    0.000005>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: G           O      5.9.0-rc2-dev+ #59
> >> [  123.810096 <    0.000002>] Hardware name: System manufacturer System Product Name/PRIME X470-PRO, BIOS 4406 02/28/2019
> >> [  123.810105 <    0.000009>] RIP: 0010:iommu_get_dma_domain+0x10/0x20
> >> [  123.810108 <    0.000003>] Code: b0 48 c7 87 98 00 00 00 00 00 00 00 31 c0 c3 b8 f4 ff ff ff eb a6 0f 1f 40 00 0f 1f 44 00 00 48 8b 87 d0 02 00 00 55 48 89 e5 <48> 8b 80 c8 00 00 00 5d c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 48
> >> [  123.810111 <    0.000003>] RSP: 0018:ffffa2e201f7f980 EFLAGS: 00010246
> >> [  123.810114 <    0.000003>] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000000
> >> [  123.810116 <    0.000002>] RDX: 0000000000001000 RSI: 00000000bf5cb000 RDI: ffff93c259dc60b0
> >> [  123.810117 <    0.000001>] RBP: ffffa2e201f7f980 R08: 0000000000000000 R09: 0000000000000000
> >> [  123.810119 <    0.000002>] R10: ffffa2e201f7faf0 R11: 0000000000000001 R12: 00000000bf5cb000
> >> [  123.810121 <    0.000002>] R13: 0000000000001000 R14: ffff93c24cef9c50 R15: ffff93c256c05688
> >> [  123.810124 <    0.000003>] FS:  00007f5e5e8d3700(0000) GS:ffff93c25e940000(0000) knlGS:0000000000000000
> >> [  123.810126 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >> [  123.810128 <    0.000002>] CR2: 00000000000000c8 CR3: 000000027fe0a000 CR4: 00000000003506e0
> >> [  123.810130 <    0.000002>] Call Trace:
> >> [  123.810136 <    0.000006>]  __iommu_dma_unmap+0x2e/0x100
> >> [  123.810141 <    0.000005>]  ? kfree+0x389/0x3a0
> >> [  123.810144 <    0.000003>]  iommu_dma_unmap_page+0xe/0x10
> >> [  123.810149 <    0.000005>] dma_unmap_page_attrs+0x4d/0xf0
> >> [  123.810159 <    0.000010>]  ? ttm_bo_del_from_lru+0x8e/0xb0 [ttm]
> >> [  123.810165 <    0.000006>] ttm_unmap_and_unpopulate_pages+0x8e/0xc0 [ttm]
> >> [  123.810252 <    0.000087>] amdgpu_ttm_tt_unpopulate+0xaa/0xd0 [amdgpu]
> >> [  123.810258 <    0.000006>]  ttm_tt_unpopulate+0x59/0x70 [ttm]
> >> [  123.810264 <    0.000006>]  ttm_tt_destroy+0x6a/0x70 [ttm]
> >> [  123.810270 <    0.000006>] ttm_bo_cleanup_memtype_use+0x36/0xa0 [ttm]
> >> [  123.810276 <    0.000006>]  ttm_bo_put+0x1e7/0x400 [ttm]
> >> [  123.810358 <    0.000082>]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
> >> [  123.810440 <    0.000082>] amdgpu_gem_object_free+0x37/0x50 [amdgpu]
> >> [  123.810459 <    0.000019>]  drm_gem_object_free+0x35/0x40 [drm]
> >> [  123.810476 <    0.000017>] drm_gem_object_handle_put_unlocked+0x9d/0xd0 [drm]
> >> [  123.810494 <    0.000018>] drm_gem_object_release_handle+0x74/0x90 [drm]
> >> [  123.810511 <    0.000017>]  ? drm_gem_object_handle_put_unlocked+0xd0/0xd0 [drm]
> >> [  123.810516 <    0.000005>]  idr_for_each+0x4d/0xd0
> >> [  123.810534 <    0.000018>]  drm_gem_release+0x20/0x30 [drm]
> >> [  123.810550 <    0.000016>]  drm_file_free+0x251/0x2a0 [drm]
> >> [  123.810567 <    0.000017>] drm_close_helper.isra.14+0x61/0x70 [drm]
> >> [  123.810583 <    0.000016>]  drm_release+0x6a/0xe0 [drm]
> >> [  123.810588 <    0.000005>]  __fput+0xa2/0x250
> >> [  123.810592 <    0.000004>]  ____fput+0xe/0x10
> >> [  123.810595 <    0.000003>]  task_work_run+0x6c/0xa0
> >> [  123.810600 <    0.000005>]  do_exit+0x376/0xb60
> >> [  123.810604 <    0.000004>]  do_group_exit+0x43/0xa0
> >> [  123.810608 <    0.000004>]  get_signal+0x18b/0x8e0
> >> [  123.810612 <    0.000004>]  ? do_futex+0x595/0xc20
> >> [  123.810617 <    0.000005>]  arch_do_signal+0x34/0x880
> >> [  123.810620 <    0.000003>]  ? check_preempt_curr+0x50/0x60
> >> [  123.810623 <    0.000003>]  ? ttwu_do_wakeup+0x1e/0x160
> >> [  123.810626 <    0.000003>]  ? ttwu_do_activate+0x61/0x70
> >> [  123.810630 <    0.000004>] exit_to_user_mode_prepare+0x124/0x1b0
> >> [  123.810635 <    0.000005>] syscall_exit_to_user_mode+0x31/0x170
> >> [  123.810639 <    0.000004>]  do_syscall_64+0x43/0x80
> >>
> >>
> >> Andrey
> >>
> >>
> >>
> >> Christian.
> >>
> >> +
> >> + if (adev->irq.ih.use_bus_addr)
> >> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> >> + if (adev->irq.ih1.use_bus_addr)
> >> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> >> + if (adev->irq.ih2.use_bus_addr)
> >> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> >> +
> >> + amdgpu_gart_dummy_page_fini(adev);
> >> + }
> >> +
> >> + return NOTIFY_OK;
> >> +}
> >> +
> >> +
> >>    /**
> >>     * amdgpu_device_init - initialize the driver
> >>     *
> >> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>    INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> >> + INIT_LIST_HEAD(&adev->device_bo_list);
> >> +
> >>    adev->gfx.gfx_off_req_count = 1;
> >>    adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> >> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>    if (amdgpu_device_cache_pci_state(adev->pdev))
> >>    pci_restore_state(pdev);
> >> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> >> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> >> +
> >> + if (adev->dev->iommu_group) {
> >> + r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> >> + if (r)
> >> + goto failed;
> >> + }
> >> +
> >>    return 0;
> >>    failed:
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >> index 0db9330..486ad6d 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
> >>     *
> >>     * Frees the dummy page used by the driver (all asics).
> >>     */
> >> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> >> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> >>    {
> >>    if (!adev->dummy_page_addr)
> >>    return;
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >> index afa2e28..5678d9c 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
> >>    void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
> >>    int amdgpu_gart_init(struct amdgpu_device *adev);
> >>    void amdgpu_gart_fini(struct amdgpu_device *adev);
> >> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
> >>    int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> >>          int pages);
> >>    int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index 6cc9919..4a1de69 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
> >>    }
> >>    amdgpu_bo_unref(&bo->parent);
> >> + spin_lock(&ttm_bo_glob.lru_lock);
> >> + list_del(&bo->bo);
> >> + spin_unlock(&ttm_bo_glob.lru_lock);
> >> +
> >>    kfree(bo->metadata);
> >>    kfree(bo);
> >>    }
> >> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
> >>    if (bp->type == ttm_bo_type_device)
> >>    bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> >> + INIT_LIST_HEAD(&bo->bo);
> >> +
> >> + spin_lock(&ttm_bo_glob.lru_lock);
> >> + list_add_tail(&bo->bo, &adev->device_bo_list);
> >> + spin_unlock(&ttm_bo_glob.lru_lock);
> >> +
> >>    return 0;
> >>    fail_unreserve:
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> index 9ac3756..5ae8555 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> @@ -110,6 +110,8 @@ struct amdgpu_bo {
> >>    struct list_head shadow_list;
> >>    struct kgd_mem                  *kfd_bo;
> >> +
> >> + struct list_head bo;
> >>    };
> >>    static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
> >
> >
Andrey Grodzovsky Jan. 20, 2021, 7:38 p.m. UTC | #9
Ping

Andrey

On 1/20/21 12:01 AM, Andrey Grodzovsky wrote:
>
> On 1/19/21 3:48 AM, Christian König wrote:
>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>>> Handle all DMA IOMMU gropup related dependencies before the
>>> group is removed.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 
>>> ++++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>>   6 files changed, 65 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 478a7d8..2953420 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -51,6 +51,7 @@
>>>   #include <linux/dma-fence.h>
>>>   #include <linux/pci.h>
>>>   #include <linux/aer.h>
>>> +#include <linux/notifier.h>
>>>     #include <drm/ttm/ttm_bo_api.h>
>>>   #include <drm/ttm/ttm_bo_driver.h>
>>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>>         bool                            in_pci_err_recovery;
>>>       struct pci_saved_state          *pci_state;
>>> +
>>> +    struct notifier_block        nb;
>>> +    struct blocking_notifier_head    notifier;
>>> +    struct list_head        device_bo_list;
>>>   };
>>>     static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 45e23e3..e99f4f1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -70,6 +70,8 @@
>>>   #include <drm/task_barrier.h>
>>>   #include <linux/pm_runtime.h>
>>>   +#include <linux/iommu.h>
>>> +
>>>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>> @@ -3200,6 +3202,39 @@ static const struct attribute 
>>> *amdgpu_dev_attributes[] = {
>>>   };
>>>     +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>>> +                     unsigned long action, void *data)
>>> +{
>>> +    struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
>>> +    struct amdgpu_bo *bo = NULL;
>>> +
>>> +    /*
>>> +     * Following is a set of IOMMU group dependencies taken care of before
>>> +     * device's IOMMU group is removed
>>> +     */
>>> +    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>>> +
>>> +        spin_lock(&ttm_bo_glob.lru_lock);
>>> +        list_for_each_entry(bo, &adev->device_bo_list, bo) {
>>> +            if (bo->tbo.ttm)
>>> +                ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>>> +        }
>>> +        spin_unlock(&ttm_bo_glob.lru_lock);
>>
>> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>>
>> You need to use a mutex here or even better make sure you can access the 
>> device_bo_list without a lock in this moment.
>>
>> Christian.
>
>
> I can think of switching to RCU list ? Otherwise, elements are added
> on BO create and deleted on BO destroy, how can i prevent any of those from
> happening while in this section besides mutex ? Make a copy list and run over 
> it instead ?
>
> Andrey
>
>
>>
>>> +
>>> +        if (adev->irq.ih.use_bus_addr)
>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>>> +        if (adev->irq.ih1.use_bus_addr)
>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>>> +        if (adev->irq.ih2.use_bus_addr)
>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>>> +
>>> +        amdgpu_gart_dummy_page_fini(adev);
>>> +    }
>>> +
>>> +    return NOTIFY_OK;
>>> +}
>>> +
>>> +
>>>   /**
>>>    * amdgpu_device_init - initialize the driver
>>>    *
>>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>>   +    INIT_LIST_HEAD(&adev->device_bo_list);
>>> +
>>>       adev->gfx.gfx_off_req_count = 1;
>>>       adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>   @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>       if (amdgpu_device_cache_pci_state(adev->pdev))
>>>           pci_restore_state(pdev);
>>>   +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>>> +    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>>> +
>>> +    if (adev->dev->iommu_group) {
>>> +        r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
>>> +        if (r)
>>> +            goto failed;
>>> +    }
>>> +
>>>       return 0;
>>>     failed:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> index 0db9330..486ad6d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct 
>>> amdgpu_device *adev)
>>>    *
>>>    * Frees the dummy page used by the driver (all asics).
>>>    */
>>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>   {
>>>       if (!adev->dummy_page_addr)
>>>           return;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> index afa2e28..5678d9c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>>>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>>   int amdgpu_gart_init(struct amdgpu_device *adev);
>>>   void amdgpu_gart_fini(struct amdgpu_device *adev);
>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>>                  int pages);
>>>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 6cc9919..4a1de69 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>>>       }
>>>       amdgpu_bo_unref(&bo->parent);
>>>   +    spin_lock(&ttm_bo_glob.lru_lock);
>>> +    list_del(&bo->bo);
>>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>>> +
>>>       kfree(bo->metadata);
>>>       kfree(bo);
>>>   }
>>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>>       if (bp->type == ttm_bo_type_device)
>>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>   +    INIT_LIST_HEAD(&bo->bo);
>>> +
>>> +    spin_lock(&ttm_bo_glob.lru_lock);
>>> +    list_add_tail(&bo->bo, &adev->device_bo_list);
>>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>>> +
>>>       return 0;
>>>     fail_unreserve:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 9ac3756..5ae8555 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>>       struct list_head        shadow_list;
>>>         struct kgd_mem                  *kfd_bo;
>>> +
>>> +    struct list_head        bo;
>>>   };
>>>     static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct 
>>> ttm_buffer_object *tbo)
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&amp;reserved=0 
>>
Christian König Jan. 21, 2021, 10:42 a.m. UTC | #10
Am 20.01.21 um 20:38 schrieb Andrey Grodzovsky:
> Ping
>
> Andrey
>
> On 1/20/21 12:01 AM, Andrey Grodzovsky wrote:
>>
>> On 1/19/21 3:48 AM, Christian König wrote:
>>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>>>> Handle all DMA IOMMU gropup related dependencies before the
>>>> group is removed.
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 
>>>> ++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>>>   6 files changed, 65 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 478a7d8..2953420 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -51,6 +51,7 @@
>>>>   #include <linux/dma-fence.h>
>>>>   #include <linux/pci.h>
>>>>   #include <linux/aer.h>
>>>> +#include <linux/notifier.h>
>>>>     #include <drm/ttm/ttm_bo_api.h>
>>>>   #include <drm/ttm/ttm_bo_driver.h>
>>>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>>>         bool                            in_pci_err_recovery;
>>>>       struct pci_saved_state          *pci_state;
>>>> +
>>>> +    struct notifier_block        nb;
>>>> +    struct blocking_notifier_head    notifier;
>>>> +    struct list_head        device_bo_list;
>>>>   };
>>>>     static inline struct amdgpu_device *drm_to_adev(struct 
>>>> drm_device *ddev)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 45e23e3..e99f4f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -70,6 +70,8 @@
>>>>   #include <drm/task_barrier.h>
>>>>   #include <linux/pm_runtime.h>
>>>>   +#include <linux/iommu.h>
>>>> +
>>>>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>>> @@ -3200,6 +3202,39 @@ static const struct attribute 
>>>> *amdgpu_dev_attributes[] = {
>>>>   };
>>>>     +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>>>> +                     unsigned long action, void *data)
>>>> +{
>>>> +    struct amdgpu_device *adev = container_of(nb, struct 
>>>> amdgpu_device, nb);
>>>> +    struct amdgpu_bo *bo = NULL;
>>>> +
>>>> +    /*
>>>> +     * Following is a set of IOMMU group dependencies taken care 
>>>> of before
>>>> +     * device's IOMMU group is removed
>>>> +     */
>>>> +    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>>>> +
>>>> +        spin_lock(&ttm_bo_glob.lru_lock);
>>>> +        list_for_each_entry(bo, &adev->device_bo_list, bo) {
>>>> +            if (bo->tbo.ttm)
>>>> +                ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>>>> +        }
>>>> +        spin_unlock(&ttm_bo_glob.lru_lock);
>>>
>>> That approach won't work. ttm_tt_unpopulate() might sleep on an 
>>> IOMMU lock.
>>>
>>> You need to use a mutex here or even better make sure you can access 
>>> the device_bo_list without a lock in this moment.
>>>
>>> Christian.
>>
>>
>> I can think of switching to RCU list ? Otherwise, elements are added
>> on BO create and deleted on BO destroy, how can i prevent any of 
>> those from
>> happening while in this section besides mutex ? Make a copy list and 
>> run over it instead ?

RCU won't work since the BO is not RCU protected.

What you can try something like this:

spin_lock(&ttm_bo_glob.lru_lock);
while (list_not_empty(&adev->device_bo_list)) {
     bo = list_first_entry(&adev->device_bo_list);
     list_del(bo->...);
     spin_unlock(&ttm_bo_glob.lru_lock);
     ttm_tt_unpopulate(bo);
     spin_lock(&ttm_bo_glob.lru_lock);
}...

Regards,
Christian.

>>
>> Andrey
>>
>>
>>>
>>>> +
>>>> +        if (adev->irq.ih.use_bus_addr)
>>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>>>> +        if (adev->irq.ih1.use_bus_addr)
>>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>>>> +        if (adev->irq.ih2.use_bus_addr)
>>>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>>>> +
>>>> +        amdgpu_gart_dummy_page_fini(adev);
>>>> +    }
>>>> +
>>>> +    return NOTIFY_OK;
>>>> +}
>>>> +
>>>> +
>>>>   /**
>>>>    * amdgpu_device_init - initialize the driver
>>>>    *
>>>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device 
>>>> *adev,
>>>>         INIT_WORK(&adev->xgmi_reset_work, 
>>>> amdgpu_device_xgmi_reset_func);
>>>>   +    INIT_LIST_HEAD(&adev->device_bo_list);
>>>> +
>>>>       adev->gfx.gfx_off_req_count = 1;
>>>>       adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>>   @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct 
>>>> amdgpu_device *adev,
>>>>       if (amdgpu_device_cache_pci_state(adev->pdev))
>>>>           pci_restore_state(pdev);
>>>>   +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>>>> +    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>>>> +
>>>> +    if (adev->dev->iommu_group) {
>>>> +        r = iommu_group_register_notifier(adev->dev->iommu_group, 
>>>> &adev->nb);
>>>> +        if (r)
>>>> +            goto failed;
>>>> +    }
>>>> +
>>>>       return 0;
>>>>     failed:
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> index 0db9330..486ad6d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct 
>>>> amdgpu_device *adev)
>>>>    *
>>>>    * Frees the dummy page used by the driver (all asics).
>>>>    */
>>>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>>   {
>>>>       if (!adev->dummy_page_addr)
>>>>           return;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> index afa2e28..5678d9c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct 
>>>> amdgpu_device *adev);
>>>>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>>>   int amdgpu_gart_init(struct amdgpu_device *adev);
>>>>   void amdgpu_gart_fini(struct amdgpu_device *adev);
>>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>>>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>>>                  int pages);
>>>>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index 6cc9919..4a1de69 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct 
>>>> ttm_buffer_object *tbo)
>>>>       }
>>>>       amdgpu_bo_unref(&bo->parent);
>>>>   +    spin_lock(&ttm_bo_glob.lru_lock);
>>>> +    list_del(&bo->bo);
>>>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>>>> +
>>>>       kfree(bo->metadata);
>>>>       kfree(bo);
>>>>   }
>>>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct 
>>>> amdgpu_device *adev,
>>>>       if (bp->type == ttm_bo_type_device)
>>>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>>   +    INIT_LIST_HEAD(&bo->bo);
>>>> +
>>>> +    spin_lock(&ttm_bo_glob.lru_lock);
>>>> +    list_add_tail(&bo->bo, &adev->device_bo_list);
>>>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>>>> +
>>>>       return 0;
>>>>     fail_unreserve:
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index 9ac3756..5ae8555 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>>>       struct list_head        shadow_list;
>>>>         struct kgd_mem                  *kfd_bo;
>>>> +
>>>> +    struct list_head        bo;
>>>>   };
>>>>     static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct 
>>>> ttm_buffer_object *tbo)
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&amp;reserved=0 
>>>
Daniel Vetter Jan. 21, 2021, 10:48 a.m. UTC | #11
On Wed, Jan 20, 2021 at 8:16 PM Andrey Grodzovsky
<Andrey.Grodzovsky@amd.com> wrote:
>
>
> On 1/20/21 3:38 AM, Daniel Vetter wrote:
> > On Wed, Jan 20, 2021 at 5:21 AM Andrey Grodzovsky
> > <Andrey.Grodzovsky@amd.com> wrote:
> >>
> >> On 1/19/21 5:01 PM, Daniel Vetter wrote:
> >>> On Tue, Jan 19, 2021 at 10:22 PM Andrey Grodzovsky
> >>> <Andrey.Grodzovsky@amd.com> wrote:
> >>>> On 1/19/21 8:45 AM, Daniel Vetter wrote:
> >>>>
> >>>> On Tue, Jan 19, 2021 at 09:48:03AM +0100, Christian König wrote:
> >>>>
> >>>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> >>>>
> >>>> Handle all DMA IOMMU gropup related dependencies before the
> >>>> group is removed.
> >>>>
> >>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> >>>> ---
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
> >>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
> >>>>     6 files changed, 65 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>> index 478a7d8..2953420 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>> @@ -51,6 +51,7 @@
> >>>>     #include <linux/dma-fence.h>
> >>>>     #include <linux/pci.h>
> >>>>     #include <linux/aer.h>
> >>>> +#include <linux/notifier.h>
> >>>>     #include <drm/ttm/ttm_bo_api.h>
> >>>>     #include <drm/ttm/ttm_bo_driver.h>
> >>>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
> >>>>     bool                            in_pci_err_recovery;
> >>>>     struct pci_saved_state          *pci_state;
> >>>> +
> >>>> + struct notifier_block nb;
> >>>> + struct blocking_notifier_head notifier;
> >>>> + struct list_head device_bo_list;
> >>>>     };
> >>>>     static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> index 45e23e3..e99f4f1 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> @@ -70,6 +70,8 @@
> >>>>     #include <drm/task_barrier.h>
> >>>>     #include <linux/pm_runtime.h>
> >>>> +#include <linux/iommu.h>
> >>>> +
> >>>>     MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> >>>>     MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> >>>>     MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> >>>> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
> >>>>     };
> >>>> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> >>>> +     unsigned long action, void *data)
> >>>> +{
> >>>> + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> >>>> + struct amdgpu_bo *bo = NULL;
> >>>> +
> >>>> + /*
> >>>> + * Following is a set of IOMMU group dependencies taken care of before
> >>>> + * device's IOMMU group is removed
> >>>> + */
> >>>> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> >>>> +
> >>>> + spin_lock(&ttm_bo_glob.lru_lock);
> >>>> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
> >>>> + if (bo->tbo.ttm)
> >>>> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> >>>> + }
> >>>> + spin_unlock(&ttm_bo_glob.lru_lock);
> >>>>
> >>>> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
> >>>>
> >>>> You need to use a mutex here or even better make sure you can access the
> >>>> device_bo_list without a lock in this moment.
> >>>>
> >>>> I'd also be worried about the notifier mutex getting really badly in the
> >>>> way.
> >>>>
> >>>> Plus I'm worried why we even need this, it sounds a bit like papering over
> >>>> the iommu subsystem. Assuming we clean up all our iommu mappings in our
> >>>> device hotunplug/unload code, why do we still need to have an additional
> >>>> iommu notifier on top, with all kinds of additional headaches? The iommu
> >>>> shouldn't clean up before the devices in its group have cleaned up.
> >>>>
> >>>> I think we need more info here on what the exact problem is first.
> >>>> -Daniel
> >>>>
> >>>>
> >>>> Originally I experienced the  crash bellow on IOMMU enabled device, it happens post device removal from PCI topology -
> >>>> during shutting down of user client holding last reference to drm device file (X in my case).
> >>>> The crash is because by the time I get to this point struct device->iommu_group pointer is NULL
> >>>> already since the IOMMU group for the device is unset during PCI removal. So this contradicts what you said above
> >>>> that the iommu shouldn't clean up before the devices in its group have cleaned up.
> >>>> So instead of guessing when is the right place to place all IOMMU related cleanups it makes sense
> >>>> to get notification from IOMMU subsystem in the form of event IOMMU_GROUP_NOTIFY_DEL_DEVICE
> >>>> and use that place to do all the relevant cleanups.
> >>> Yeah that goes boom, but you shouldn't need this special iommu cleanup
> >>> handler. Making sure that all the dma-api mappings are gone needs to
> >>> be done as part of the device hotunplug, you can't delay that to the
> >>> last drm_device cleanup.
> >>>
> >>> So I most of the patch here with pulling that out (should be outright
> >>> removed from the final release code even) is good, just not yet how
> >>> you call that new code. Probably these bits (aside from walking all
> >>> buffers and unpopulating the tt) should be done from the early_free
> >>> callback you're adding.
> >>>
> >>> Also what I just realized: For normal unload you need to make sure the
> >>> hw is actually stopped first, before we unmap buffers. Otherwise
> >>> driver unload will likely result in wedged hw, probably not what you
> >>> want for debugging.
> >>> -Daniel
> >> Since device removal from IOMMU group and this hook in particular
> >> takes place before call to amdgpu_pci_remove essentially it means
> >> that for IOMMU use case the entire amdgpu_device_fini_hw function
> >> shouold be called here to stop the HW instead from amdgpu_pci_remove.
> > The crash you showed was on final drm_close, which should happen after
> > device removal, so that's clearly buggy. If the iommu subsystem
> > removes stuff before the driver could clean up already, then I think
> > that's an iommu bug or dma-api bug. Just plain using dma_map/unmap and
> > friends really shouldn't need notifier hacks like you're implementing
> > here. Can you pls show me a backtrace where dma_unmap_sg blows up when
> > it's put into the pci_driver remove callback?
>
>
> It's not blowing up and has the same effect as using this notifier because setting
> of device->iommu_group pointer to NULL takes place at the same call stack but
> after amdgpu_pci_remove is called (see pci_stop_and_remove_bus_device).
> But i think that using notifier callback is better then just sticking
> the cleanup code in amdgpu_pci_remove because this is IOMMU specific cleanup and
> also coupled
> in the code to the place where device->iommu_group is unset.

Notifiers are a locking pain, plus dma_unmap_* is really just plain
normal driver cleanup work. If you want neat&automatic cleanup, look
at devm_ family of functions. There's imo really no reason to have
this notifier, and only reasons against it.

I think intermediately the cleanest solution is to put each cleanup
into a corresponding hw_fini callback (early_free is maybe a bit
ambiguous in what exactly it means).
-Daniel

> Andrey
>
>
> >
> >> Looking at this from another perspective, AFAIK on each new device probing
> >> either due to PCI bus rescan or driver reload we are resetting the ASIC before doing
> >> any init operations (assuming we successfully gained MMIO access) and so maybe
> >> your concern is not an issue ?
> > Reset on probe is too late. The problem is that if you just remove the
> > driver, your device is doing dma at that moment. And you kinda have to
> > stop that before you free the mappings/memory. Of course when the
> > device is actually hotunplugged, then dma is guaranteed to have
> > stopped already. I'm not sure whether disabling the pci device is
> > enough to make sure no more dma happens, could be that's enough.
> > -Daniel
> >
> >> Andrey
> >>
> >>
> >>>> Andrey
> >>>>
> >>>>
> >>>> [  123.810074 <   28.126960>] BUG: kernel NULL pointer dereference, address: 00000000000000c8
> >>>> [  123.810080 <    0.000006>] #PF: supervisor read access in kernel mode
> >>>> [  123.810082 <    0.000002>] #PF: error_code(0x0000) - not-present page
> >>>> [  123.810085 <    0.000003>] PGD 0 P4D 0
> >>>> [  123.810089 <    0.000004>] Oops: 0000 [#1] SMP NOPTI
> >>>> [  123.810094 <    0.000005>] CPU: 5 PID: 1418 Comm: Xorg:shlo4 Tainted: G           O      5.9.0-rc2-dev+ #59
> >>>> [  123.810096 <    0.000002>] Hardware name: System manufacturer System Product Name/PRIME X470-PRO, BIOS 4406 02/28/2019
> >>>> [  123.810105 <    0.000009>] RIP: 0010:iommu_get_dma_domain+0x10/0x20
> >>>> [  123.810108 <    0.000003>] Code: b0 48 c7 87 98 00 00 00 00 00 00 00 31 c0 c3 b8 f4 ff ff ff eb a6 0f 1f 40 00 0f 1f 44 00 00 48 8b 87 d0 02 00 00 55 48 89 e5 <48> 8b 80 c8 00 00 00 5d c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 48
> >>>> [  123.810111 <    0.000003>] RSP: 0018:ffffa2e201f7f980 EFLAGS: 00010246
> >>>> [  123.810114 <    0.000003>] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000000
> >>>> [  123.810116 <    0.000002>] RDX: 0000000000001000 RSI: 00000000bf5cb000 RDI: ffff93c259dc60b0
> >>>> [  123.810117 <    0.000001>] RBP: ffffa2e201f7f980 R08: 0000000000000000 R09: 0000000000000000
> >>>> [  123.810119 <    0.000002>] R10: ffffa2e201f7faf0 R11: 0000000000000001 R12: 00000000bf5cb000
> >>>> [  123.810121 <    0.000002>] R13: 0000000000001000 R14: ffff93c24cef9c50 R15: ffff93c256c05688
> >>>> [  123.810124 <    0.000003>] FS:  00007f5e5e8d3700(0000) GS:ffff93c25e940000(0000) knlGS:0000000000000000
> >>>> [  123.810126 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >>>> [  123.810128 <    0.000002>] CR2: 00000000000000c8 CR3: 000000027fe0a000 CR4: 00000000003506e0
> >>>> [  123.810130 <    0.000002>] Call Trace:
> >>>> [  123.810136 <    0.000006>]  __iommu_dma_unmap+0x2e/0x100
> >>>> [  123.810141 <    0.000005>]  ? kfree+0x389/0x3a0
> >>>> [  123.810144 <    0.000003>]  iommu_dma_unmap_page+0xe/0x10
> >>>> [  123.810149 <    0.000005>] dma_unmap_page_attrs+0x4d/0xf0
> >>>> [  123.810159 <    0.000010>]  ? ttm_bo_del_from_lru+0x8e/0xb0 [ttm]
> >>>> [  123.810165 <    0.000006>] ttm_unmap_and_unpopulate_pages+0x8e/0xc0 [ttm]
> >>>> [  123.810252 <    0.000087>] amdgpu_ttm_tt_unpopulate+0xaa/0xd0 [amdgpu]
> >>>> [  123.810258 <    0.000006>]  ttm_tt_unpopulate+0x59/0x70 [ttm]
> >>>> [  123.810264 <    0.000006>]  ttm_tt_destroy+0x6a/0x70 [ttm]
> >>>> [  123.810270 <    0.000006>] ttm_bo_cleanup_memtype_use+0x36/0xa0 [ttm]
> >>>> [  123.810276 <    0.000006>]  ttm_bo_put+0x1e7/0x400 [ttm]
> >>>> [  123.810358 <    0.000082>]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
> >>>> [  123.810440 <    0.000082>] amdgpu_gem_object_free+0x37/0x50 [amdgpu]
> >>>> [  123.810459 <    0.000019>]  drm_gem_object_free+0x35/0x40 [drm]
> >>>> [  123.810476 <    0.000017>] drm_gem_object_handle_put_unlocked+0x9d/0xd0 [drm]
> >>>> [  123.810494 <    0.000018>] drm_gem_object_release_handle+0x74/0x90 [drm]
> >>>> [  123.810511 <    0.000017>]  ? drm_gem_object_handle_put_unlocked+0xd0/0xd0 [drm]
> >>>> [  123.810516 <    0.000005>]  idr_for_each+0x4d/0xd0
> >>>> [  123.810534 <    0.000018>]  drm_gem_release+0x20/0x30 [drm]
> >>>> [  123.810550 <    0.000016>]  drm_file_free+0x251/0x2a0 [drm]
> >>>> [  123.810567 <    0.000017>] drm_close_helper.isra.14+0x61/0x70 [drm]
> >>>> [  123.810583 <    0.000016>]  drm_release+0x6a/0xe0 [drm]
> >>>> [  123.810588 <    0.000005>]  __fput+0xa2/0x250
> >>>> [  123.810592 <    0.000004>]  ____fput+0xe/0x10
> >>>> [  123.810595 <    0.000003>]  task_work_run+0x6c/0xa0
> >>>> [  123.810600 <    0.000005>]  do_exit+0x376/0xb60
> >>>> [  123.810604 <    0.000004>]  do_group_exit+0x43/0xa0
> >>>> [  123.810608 <    0.000004>]  get_signal+0x18b/0x8e0
> >>>> [  123.810612 <    0.000004>]  ? do_futex+0x595/0xc20
> >>>> [  123.810617 <    0.000005>]  arch_do_signal+0x34/0x880
> >>>> [  123.810620 <    0.000003>]  ? check_preempt_curr+0x50/0x60
> >>>> [  123.810623 <    0.000003>]  ? ttwu_do_wakeup+0x1e/0x160
> >>>> [  123.810626 <    0.000003>]  ? ttwu_do_activate+0x61/0x70
> >>>> [  123.810630 <    0.000004>] exit_to_user_mode_prepare+0x124/0x1b0
> >>>> [  123.810635 <    0.000005>] syscall_exit_to_user_mode+0x31/0x170
> >>>> [  123.810639 <    0.000004>]  do_syscall_64+0x43/0x80
> >>>>
> >>>>
> >>>> Andrey
> >>>>
> >>>>
> >>>>
> >>>> Christian.
> >>>>
> >>>> +
> >>>> + if (adev->irq.ih.use_bus_addr)
> >>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> >>>> + if (adev->irq.ih1.use_bus_addr)
> >>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> >>>> + if (adev->irq.ih2.use_bus_addr)
> >>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> >>>> +
> >>>> + amdgpu_gart_dummy_page_fini(adev);
> >>>> + }
> >>>> +
> >>>> + return NOTIFY_OK;
> >>>> +}
> >>>> +
> >>>> +
> >>>>     /**
> >>>>      * amdgpu_device_init - initialize the driver
> >>>>      *
> >>>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>>>     INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> >>>> + INIT_LIST_HEAD(&adev->device_bo_list);
> >>>> +
> >>>>     adev->gfx.gfx_off_req_count = 1;
> >>>>     adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> >>>> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>>>     if (amdgpu_device_cache_pci_state(adev->pdev))
> >>>>     pci_restore_state(pdev);
> >>>> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> >>>> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> >>>> +
> >>>> + if (adev->dev->iommu_group) {
> >>>> + r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> >>>> + if (r)
> >>>> + goto failed;
> >>>> + }
> >>>> +
> >>>>     return 0;
> >>>>     failed:
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >>>> index 0db9330..486ad6d 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> >>>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
> >>>>      *
> >>>>      * Frees the dummy page used by the driver (all asics).
> >>>>      */
> >>>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> >>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> >>>>     {
> >>>>     if (!adev->dummy_page_addr)
> >>>>     return;
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >>>> index afa2e28..5678d9c 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> >>>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
> >>>>     void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
> >>>>     int amdgpu_gart_init(struct amdgpu_device *adev);
> >>>>     void amdgpu_gart_fini(struct amdgpu_device *adev);
> >>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
> >>>>     int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> >>>>           int pages);
> >>>>     int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >>>> index 6cc9919..4a1de69 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >>>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
> >>>>     }
> >>>>     amdgpu_bo_unref(&bo->parent);
> >>>> + spin_lock(&ttm_bo_glob.lru_lock);
> >>>> + list_del(&bo->bo);
> >>>> + spin_unlock(&ttm_bo_glob.lru_lock);
> >>>> +
> >>>>     kfree(bo->metadata);
> >>>>     kfree(bo);
> >>>>     }
> >>>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
> >>>>     if (bp->type == ttm_bo_type_device)
> >>>>     bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> >>>> + INIT_LIST_HEAD(&bo->bo);
> >>>> +
> >>>> + spin_lock(&ttm_bo_glob.lru_lock);
> >>>> + list_add_tail(&bo->bo, &adev->device_bo_list);
> >>>> + spin_unlock(&ttm_bo_glob.lru_lock);
> >>>> +
> >>>>     return 0;
> >>>>     fail_unreserve:
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >>>> index 9ac3756..5ae8555 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >>>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
> >>>>     struct list_head shadow_list;
> >>>>     struct kgd_mem                  *kfd_bo;
> >>>> +
> >>>> + struct list_head bo;
> >>>>     };
> >>>>     static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
> >>>
> >
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 478a7d8..2953420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -51,6 +51,7 @@ 
 #include <linux/dma-fence.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
+#include <linux/notifier.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
@@ -1041,6 +1042,10 @@  struct amdgpu_device {
 
 	bool                            in_pci_err_recovery;
 	struct pci_saved_state          *pci_state;
+
+	struct notifier_block		nb;
+	struct blocking_notifier_head	notifier;
+	struct list_head		device_bo_list;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45e23e3..e99f4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,8 @@ 
 #include <drm/task_barrier.h>
 #include <linux/pm_runtime.h>
 
+#include <linux/iommu.h>
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3200,6 +3202,39 @@  static const struct attribute *amdgpu_dev_attributes[] = {
 };
 
 
+static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
+				     unsigned long action, void *data)
+{
+	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
+	struct amdgpu_bo *bo = NULL;
+
+	/*
+	 * Following is a set of IOMMU group dependencies taken care of before
+	 * device's IOMMU group is removed
+	 */
+	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
+
+		spin_lock(&ttm_bo_glob.lru_lock);
+		list_for_each_entry(bo, &adev->device_bo_list, bo) {
+			if (bo->tbo.ttm)
+				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
+		}
+		spin_unlock(&ttm_bo_glob.lru_lock);
+
+		if (adev->irq.ih.use_bus_addr)
+			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+		if (adev->irq.ih1.use_bus_addr)
+			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+		if (adev->irq.ih2.use_bus_addr)
+			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+
+		amdgpu_gart_dummy_page_fini(adev);
+	}
+
+	return NOTIFY_OK;
+}
+
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -3304,6 +3339,8 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 
 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
+	INIT_LIST_HEAD(&adev->device_bo_list);
+
 	adev->gfx.gfx_off_req_count = 1;
 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
@@ -3575,6 +3612,15 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 	if (amdgpu_device_cache_pci_state(adev->pdev))
 		pci_restore_state(pdev);
 
+	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
+	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
+
+	if (adev->dev->iommu_group) {
+		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
+		if (r)
+			goto failed;
+	}
+
 	return 0;
 
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0db9330..486ad6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@  static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
  *
  * Frees the dummy page used by the driver (all asics).
  */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 {
 	if (!adev->dummy_page_addr)
 		return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index afa2e28..5678d9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -61,6 +61,7 @@  int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
 int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 		       int pages);
 int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6cc9919..4a1de69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -94,6 +94,10 @@  static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
 	}
 	amdgpu_bo_unref(&bo->parent);
 
+	spin_lock(&ttm_bo_glob.lru_lock);
+	list_del(&bo->bo);
+	spin_unlock(&ttm_bo_glob.lru_lock);
+
 	kfree(bo->metadata);
 	kfree(bo);
 }
@@ -613,6 +617,12 @@  static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	if (bp->type == ttm_bo_type_device)
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
+	INIT_LIST_HEAD(&bo->bo);
+
+	spin_lock(&ttm_bo_glob.lru_lock);
+	list_add_tail(&bo->bo, &adev->device_bo_list);
+	spin_unlock(&ttm_bo_glob.lru_lock);
+
 	return 0;
 
 fail_unreserve:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9ac3756..5ae8555 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -110,6 +110,8 @@  struct amdgpu_bo {
 	struct list_head		shadow_list;
 
 	struct kgd_mem                  *kfd_bo;
+
+	struct list_head		bo;
 };
 
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)