Message ID | 20210428151207.1212258-17-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RFC Support hot device unplug in amdgpu | expand |
Am 28.04.21 um 17:11 schrieb Andrey Grodzovsky: > Access to those must be prevented post pci_remove That is certainly a no-go. We want to get rid of the kernel pointers in BOs, not add another one. Christian. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +++ > 4 files changed, 71 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 30a24db5f4d1..3e4755fc10c8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1056,6 +1056,11 @@ struct amdgpu_device { > struct pci_saved_state *pci_state; > > struct list_head device_bo_list; > + > + /* List of all MMIO BOs */ > + struct list_head mmio_list; > + struct mutex mmio_list_lock; > + > }; > > static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 22b09c4db255..3ddad6cba62d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3320,6 +3320,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, > INIT_LIST_HEAD(&adev->shadow_list); > mutex_init(&adev->shadow_list_lock); > > + INIT_LIST_HEAD(&adev->mmio_list); > + mutex_init(&adev->mmio_list_lock); > + > INIT_DELAYED_WORK(&adev->delayed_init_work, > amdgpu_device_delayed_init_work_handler); > INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, > @@ -3636,6 +3639,36 @@ static void amdgpu_clear_dma_mappings(struct amdgpu_device *adev) > spin_unlock(&adev->mman.bdev.lru_lock); > } > > +static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) > +{ > + struct amdgpu_bo *bo; > + > + /* Clear all CPU mappings pointing to this device */ > + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); > + > + /* Unmap all MMIO mapped kernel BOs */ > + mutex_lock(&adev->mmio_list_lock); > + list_for_each_entry(bo, &adev->mmio_list, mmio_list) { > + amdgpu_bo_kunmap(bo); > + if (*bo->kmap_ptr) > + *bo->kmap_ptr = NULL; > + } > + mutex_unlock(&adev->mmio_list_lock); > + > + /* Unmap all mapped bars - Doorbell, registers and VRAM */ > + amdgpu_device_doorbell_fini(adev); > + > + iounmap(adev->rmmio); > + adev->rmmio = NULL; > + if (adev->mman.aper_base_kaddr) > + iounmap(adev->mman.aper_base_kaddr); > + adev->mman.aper_base_kaddr = NULL; > + > + /* Memory manager related */ > + arch_phys_wc_del(adev->gmc.vram_mtrr); > + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); > +} > + > /** > * amdgpu_device_fini - tear down the driver > * > @@ -3683,6 +3716,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) > amdgpu_clear_dma_mappings(adev); > > amdgpu_gart_dummy_page_fini(adev); > + > + amdgpu_device_unmap_mmio(adev); > } > > void amdgpu_device_fini_sw(struct amdgpu_device *adev) > @@ -3713,9 +3748,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) > if (adev->rio_mem) > pci_iounmap(adev->pdev, adev->rio_mem); > adev->rio_mem = NULL; > - iounmap(adev->rmmio); > - adev->rmmio = NULL; > - amdgpu_device_doorbell_fini(adev); > > if (IS_ENABLED(CONFIG_PERF_EVENTS)) > amdgpu_pmu_fini(adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 62d829f5e62c..9b05e3b96fa0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -531,6 +531,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, > return -ENOMEM; > drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); > INIT_LIST_HEAD(&bo->shadow_list); > + > + INIT_LIST_HEAD(&bo->mmio_list); > + > bo->vm_bo = NULL; > bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : > bp->domain; > @@ -774,9 +777,21 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) > if (r) > return r; > > - if (ptr) > + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + > + mutex_lock(&adev->mmio_list_lock); > + list_add_tail(&bo->mmio_list, &adev->mmio_list); > + mutex_unlock(&adev->mmio_list_lock); > + } > + > + if (ptr) { > *ptr = amdgpu_bo_kptr(bo); > > + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) > + bo->kmap_ptr = ptr; > + } > + > return 0; > } > > @@ -804,8 +819,17 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) > */ > void amdgpu_bo_kunmap(struct amdgpu_bo *bo) > { > - if (bo->kmap.bo) > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + > + if (bo->kmap.bo) { > + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { > + mutex_lock(&adev->mmio_list_lock); > + list_del_init(&bo->mmio_list); > + mutex_unlock(&adev->mmio_list_lock); > + } > + > ttm_bo_kunmap(&bo->kmap); > + } > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 5ae8555ef275..3129d9bbfa22 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -112,6 +112,11 @@ struct amdgpu_bo { > struct kgd_mem *kfd_bo; > > struct list_head bo; > + > + struct list_head mmio_list; > + /* Address of kernel VA pointer to MMIO so they can be updated post remap */ > + void **kmap_ptr; > + > }; > > static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
On 2021-04-29 3:19 a.m., Christian König wrote: > Am 28.04.21 um 17:11 schrieb Andrey Grodzovsky: >> Access to those must be prevented post pci_remove > > That is certainly a no-go. We want to get rid of the kernel pointers in > BOs, not add another one. > > Christian. As we discussed internally, will drop the entire explicit BOs unmapping approach as unmapping the VRAM bar alone will give the same results. Andrey > >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++++++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +++ >> 4 files changed, 71 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 30a24db5f4d1..3e4755fc10c8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -1056,6 +1056,11 @@ struct amdgpu_device { >> struct pci_saved_state *pci_state; >> struct list_head device_bo_list; >> + >> + /* List of all MMIO BOs */ >> + struct list_head mmio_list; >> + struct mutex mmio_list_lock; >> + >> }; >> static inline struct amdgpu_device *drm_to_adev(struct drm_device >> *ddev) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 22b09c4db255..3ddad6cba62d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -3320,6 +3320,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, >> INIT_LIST_HEAD(&adev->shadow_list); >> mutex_init(&adev->shadow_list_lock); >> + INIT_LIST_HEAD(&adev->mmio_list); >> + mutex_init(&adev->mmio_list_lock); >> + >> INIT_DELAYED_WORK(&adev->delayed_init_work, >> amdgpu_device_delayed_init_work_handler); >> INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, >> @@ -3636,6 +3639,36 @@ static void amdgpu_clear_dma_mappings(struct >> amdgpu_device *adev) >> spin_unlock(&adev->mman.bdev.lru_lock); >> } >> +static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) >> +{ >> + struct amdgpu_bo *bo; >> + >> + /* Clear all CPU mappings pointing to this device */ >> + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); >> + >> + /* Unmap all MMIO mapped kernel BOs */ >> + mutex_lock(&adev->mmio_list_lock); >> + list_for_each_entry(bo, &adev->mmio_list, mmio_list) { >> + amdgpu_bo_kunmap(bo); >> + if (*bo->kmap_ptr) >> + *bo->kmap_ptr = NULL; >> + } >> + mutex_unlock(&adev->mmio_list_lock); >> + >> + /* Unmap all mapped bars - Doorbell, registers and VRAM */ >> + amdgpu_device_doorbell_fini(adev); >> + >> + iounmap(adev->rmmio); >> + adev->rmmio = NULL; >> + if (adev->mman.aper_base_kaddr) >> + iounmap(adev->mman.aper_base_kaddr); >> + adev->mman.aper_base_kaddr = NULL; >> + >> + /* Memory manager related */ >> + arch_phys_wc_del(adev->gmc.vram_mtrr); >> + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); >> +} >> + >> /** >> * amdgpu_device_fini - tear down the driver >> * >> @@ -3683,6 +3716,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device >> *adev) >> amdgpu_clear_dma_mappings(adev); >> amdgpu_gart_dummy_page_fini(adev); >> + >> + amdgpu_device_unmap_mmio(adev); >> } >> void amdgpu_device_fini_sw(struct amdgpu_device *adev) >> @@ -3713,9 +3748,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device >> *adev) >> if (adev->rio_mem) >> pci_iounmap(adev->pdev, adev->rio_mem); >> adev->rio_mem = NULL; >> - iounmap(adev->rmmio); >> - adev->rmmio = NULL; >> - amdgpu_device_doorbell_fini(adev); >> if (IS_ENABLED(CONFIG_PERF_EVENTS)) >> amdgpu_pmu_fini(adev); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> index 62d829f5e62c..9b05e3b96fa0 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> @@ -531,6 +531,9 @@ static int amdgpu_bo_do_create(struct >> amdgpu_device *adev, >> return -ENOMEM; >> drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, >> size); >> INIT_LIST_HEAD(&bo->shadow_list); >> + >> + INIT_LIST_HEAD(&bo->mmio_list); >> + >> bo->vm_bo = NULL; >> bo->preferred_domains = bp->preferred_domain ? >> bp->preferred_domain : >> bp->domain; >> @@ -774,9 +777,21 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) >> if (r) >> return r; >> - if (ptr) >> + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { >> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >> + >> + mutex_lock(&adev->mmio_list_lock); >> + list_add_tail(&bo->mmio_list, &adev->mmio_list); >> + mutex_unlock(&adev->mmio_list_lock); >> + } >> + >> + if (ptr) { >> *ptr = amdgpu_bo_kptr(bo); >> + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) >> + bo->kmap_ptr = ptr; >> + } >> + >> return 0; >> } >> @@ -804,8 +819,17 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) >> */ >> void amdgpu_bo_kunmap(struct amdgpu_bo *bo) >> { >> - if (bo->kmap.bo) >> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >> + >> + if (bo->kmap.bo) { >> + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { >> + mutex_lock(&adev->mmio_list_lock); >> + list_del_init(&bo->mmio_list); >> + mutex_unlock(&adev->mmio_list_lock); >> + } >> + >> ttm_bo_kunmap(&bo->kmap); >> + } >> } >> /** >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> index 5ae8555ef275..3129d9bbfa22 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> @@ -112,6 +112,11 @@ struct amdgpu_bo { >> struct kgd_mem *kfd_bo; >> struct list_head bo; >> + >> + struct list_head mmio_list; >> + /* Address of kernel VA pointer to MMIO so they can be updated >> post remap */ >> + void **kmap_ptr; >> + >> }; >> static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct >> ttm_buffer_object *tbo) >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 30a24db5f4d1..3e4755fc10c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1056,6 +1056,11 @@ struct amdgpu_device { struct pci_saved_state *pci_state; struct list_head device_bo_list; + + /* List of all MMIO BOs */ + struct list_head mmio_list; + struct mutex mmio_list_lock; + }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 22b09c4db255..3ddad6cba62d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3320,6 +3320,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); + INIT_LIST_HEAD(&adev->mmio_list); + mutex_init(&adev->mmio_list_lock); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -3636,6 +3639,36 @@ static void amdgpu_clear_dma_mappings(struct amdgpu_device *adev) spin_unlock(&adev->mman.bdev.lru_lock); } +static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) +{ + struct amdgpu_bo *bo; + + /* Clear all CPU mappings pointing to this device */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + + /* Unmap all MMIO mapped kernel BOs */ + mutex_lock(&adev->mmio_list_lock); + list_for_each_entry(bo, &adev->mmio_list, mmio_list) { + amdgpu_bo_kunmap(bo); + if (*bo->kmap_ptr) + *bo->kmap_ptr = NULL; + } + mutex_unlock(&adev->mmio_list_lock); + + /* Unmap all mapped bars - Doorbell, registers and VRAM */ + amdgpu_device_doorbell_fini(adev); + + iounmap(adev->rmmio); + adev->rmmio = NULL; + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; + + /* Memory manager related */ + arch_phys_wc_del(adev->gmc.vram_mtrr); + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); +} + /** * amdgpu_device_fini - tear down the driver * @@ -3683,6 +3716,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_clear_dma_mappings(adev); amdgpu_gart_dummy_page_fini(adev); + + amdgpu_device_unmap_mmio(adev); } void amdgpu_device_fini_sw(struct amdgpu_device *adev) @@ -3713,9 +3748,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) if (adev->rio_mem) pci_iounmap(adev->pdev, adev->rio_mem); adev->rio_mem = NULL; - iounmap(adev->rmmio); - adev->rmmio = NULL; - amdgpu_device_doorbell_fini(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 62d829f5e62c..9b05e3b96fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -531,6 +531,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, return -ENOMEM; drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); INIT_LIST_HEAD(&bo->shadow_list); + + INIT_LIST_HEAD(&bo->mmio_list); + bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; @@ -774,9 +777,21 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (r) return r; - if (ptr) + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + + mutex_lock(&adev->mmio_list_lock); + list_add_tail(&bo->mmio_list, &adev->mmio_list); + mutex_unlock(&adev->mmio_list_lock); + } + + if (ptr) { *ptr = amdgpu_bo_kptr(bo); + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) + bo->kmap_ptr = ptr; + } + return 0; } @@ -804,8 +819,17 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) */ void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { - if (bo->kmap.bo) + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + + if (bo->kmap.bo) { + if (bo->kmap.bo_kmap_type == ttm_bo_map_iomap) { + mutex_lock(&adev->mmio_list_lock); + list_del_init(&bo->mmio_list); + mutex_unlock(&adev->mmio_list_lock); + } + ttm_bo_kunmap(&bo->kmap); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5ae8555ef275..3129d9bbfa22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -112,6 +112,11 @@ struct amdgpu_bo { struct kgd_mem *kfd_bo; struct list_head bo; + + struct list_head mmio_list; + /* Address of kernel VA pointer to MMIO so they can be updated post remap */ + void **kmap_ptr; + }; static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
Access to those must be prevented post pci_remove Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +++ 4 files changed, 71 insertions(+), 5 deletions(-)