diff mbox series

[v2,4/8] drm/amdgpu: Split amdgpu_device_fini into early and late

Message ID 1592719388-13819-5-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky June 21, 2020, 6:03 a.m. UTC
Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and releasing
driver data structures can be kept until drm_driver.release hook is called, i.e.
when the last device reference is dropped.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    | 24 +++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 23 +++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    |  3 +++
 7 files changed, 54 insertions(+), 24 deletions(-)

Comments

Daniel Vetter June 22, 2020, 9:48 a.m. UTC | #1
On Sun, Jun 21, 2020 at 02:03:04AM -0400, Andrey Grodzovsky wrote:
> Some of the stuff in amdgpu_device_fini such as HW interrupts
> disable and pending fences finilization must be done right away on
> pci_remove while most of the stuff which relates to finilizing and releasing
> driver data structures can be kept until drm_driver.release hook is called, i.e.
> when the last device reference is dropped.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Long term I think best if as much of this code is converted over to devm
(for hw stuff) and drmm (for sw stuff and allocations). Doing this all
manually is very error prone.

I've started various such patches and others followed, but thus far only
very simple drivers tackled. But it should be doable step by step at
least, so you should have incremental benefits in code complexity right
away I hope.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 +++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  6 ++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    | 24 +++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 23 +++++++++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    |  3 +++
>  7 files changed, 54 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 2a806cb..604a681 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1003,7 +1003,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  		       struct drm_device *ddev,
>  		       struct pci_dev *pdev,
>  		       uint32_t flags);
> -void amdgpu_device_fini(struct amdgpu_device *adev);
> +void amdgpu_device_fini_early(struct amdgpu_device *adev);
> +void amdgpu_device_fini_late(struct amdgpu_device *adev);
> +
>  int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>  
>  void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> @@ -1188,6 +1190,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
>  int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
>  void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  				 struct drm_file *file_priv);
> +void amdgpu_driver_release_kms(struct drm_device *dev);
> +
>  int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
>  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
>  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index cc41e8f..e7b9065 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2309,6 +2309,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>  {
>  	int i, r;
>  
> +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
> +
>  	amdgpu_ras_pre_fini(adev);
>  
>  	if (adev->gmc.xgmi.num_physical_nodes > 1)
> @@ -3304,10 +3306,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   * Tear down the driver info (all asics).
>   * Called at driver shutdown.
>   */
> -void amdgpu_device_fini(struct amdgpu_device *adev)
> +void amdgpu_device_fini_early(struct amdgpu_device *adev)
>  {
> -	int r;
> -
>  	DRM_INFO("amdgpu: finishing device.\n");
>  	flush_delayed_work(&adev->delayed_init_work);
>  	adev->shutdown = true;
> @@ -3330,7 +3330,13 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>  	if (adev->pm_sysfs_en)
>  		amdgpu_pm_sysfs_fini(adev);
>  	amdgpu_fbdev_fini(adev);
> -	r = amdgpu_device_ip_fini(adev);
> +
> +	amdgpu_irq_fini_early(adev);
> +}
> +
> +void amdgpu_device_fini_late(struct amdgpu_device *adev)
> +{
> +	amdgpu_device_ip_fini(adev);
>  	if (adev->firmware.gpu_info_fw) {
>  		release_firmware(adev->firmware.gpu_info_fw);
>  		adev->firmware.gpu_info_fw = NULL;
> @@ -3368,6 +3374,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>  		amdgpu_pmu_fini(adev);
>  	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
>  		amdgpu_discovery_fini(adev);
> +
>  }
>  
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 9e5afa5..43592dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1134,12 +1134,9 @@ amdgpu_pci_remove(struct pci_dev *pdev)
>  {
>  	struct drm_device *dev = pci_get_drvdata(pdev);
>  
> -#ifdef MODULE
> -	if (THIS_MODULE->state != MODULE_STATE_GOING)
> -#endif
> -		DRM_ERROR("Hotplug removal is not supported\n");
>  	drm_dev_unplug(dev);
>  	amdgpu_driver_unload_kms(dev);
> +
>  	pci_disable_device(pdev);
>  	pci_set_drvdata(pdev, NULL);
>  	drm_dev_put(dev);
> @@ -1445,6 +1442,7 @@ static struct drm_driver kms_driver = {
>  	.dumb_create = amdgpu_mode_dumb_create,
>  	.dumb_map_offset = amdgpu_mode_dumb_mmap,
>  	.fops = &amdgpu_driver_kms_fops,
> +	.release = &amdgpu_driver_release_kms,
>  
>  	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>  	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 0cc4c67..1697655 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -49,6 +49,7 @@
>  #include <drm/drm_irq.h>
>  #include <drm/drm_vblank.h>
>  #include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
>  #include "amdgpu.h"
>  #include "amdgpu_ih.h"
>  #include "atom.h"
> @@ -297,6 +298,20 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
>  	return 0;
>  }
>  
> +
> +void amdgpu_irq_fini_early(struct amdgpu_device *adev)
> +{
> +	if (adev->irq.installed) {
> +		drm_irq_uninstall(adev->ddev);
> +		adev->irq.installed = false;
> +		if (adev->irq.msi_enabled)
> +			pci_free_irq_vectors(adev->pdev);
> +
> +		if (!amdgpu_device_has_dc_support(adev))
> +			flush_work(&adev->hotplug_work);
> +	}
> +}
> +
>  /**
>   * amdgpu_irq_fini - shut down interrupt handling
>   *
> @@ -310,15 +325,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
>  {
>  	unsigned i, j;
>  
> -	if (adev->irq.installed) {
> -		drm_irq_uninstall(adev->ddev);
> -		adev->irq.installed = false;
> -		if (adev->irq.msi_enabled)
> -			pci_free_irq_vectors(adev->pdev);
> -		if (!amdgpu_device_has_dc_support(adev))
> -			flush_work(&adev->hotplug_work);
> -	}
> -
>  	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
>  		if (!adev->irq.client[i].sources)
>  			continue;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> index c718e94..718c70f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> @@ -104,6 +104,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg);
>  
>  int amdgpu_irq_init(struct amdgpu_device *adev);
>  void amdgpu_irq_fini(struct amdgpu_device *adev);
> +void amdgpu_irq_fini_early(struct amdgpu_device *adev);
>  int amdgpu_irq_add_id(struct amdgpu_device *adev,
>  		      unsigned client_id, unsigned src_id,
>  		      struct amdgpu_irq_src *source);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index c0b1904..9d0af22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -29,6 +29,7 @@
>  #include "amdgpu.h"
>  #include <drm/drm_debugfs.h>
>  #include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
>  #include "amdgpu_sched.h"
>  #include "amdgpu_uvd.h"
>  #include "amdgpu_vce.h"
> @@ -86,7 +87,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>  	amdgpu_unregister_gpu_instance(adev);
>  
>  	if (adev->rmmio == NULL)
> -		goto done_free;
> +		return;
>  
>  	if (adev->runpm) {
>  		pm_runtime_get_sync(dev->dev);
> @@ -95,11 +96,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>  
>  	amdgpu_acpi_fini(adev);
>  
> -	amdgpu_device_fini(adev);
> -
> -done_free:
> -	kfree(adev);
> -	dev->dev_private = NULL;
> +	amdgpu_device_fini_early(adev);
>  }
>  
>  void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
> @@ -1108,6 +1105,20 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	pm_runtime_put_autosuspend(dev->dev);
>  }
>  
> +
> +void amdgpu_driver_release_kms (struct drm_device *dev)
> +{
> +	struct amdgpu_device *adev = dev->dev_private;
> +
> +	amdgpu_device_fini_late(adev);
> +
> +	kfree(adev);
> +	dev->dev_private = NULL;
> +
> +	drm_dev_fini(dev);
> +	kfree(dev);
> +}
> +
>  /*
>   * VBlank related functions.
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 7348619..169c2239 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2056,9 +2056,12 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
>  {
>  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>  
> +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
> +
>  	if (!con)
>  		return 0;
>  
> +
>  	/* Need disable ras on all IPs here before ip [hw/sw]fini */
>  	amdgpu_ras_disable_all_features(adev, 0);
>  	amdgpu_ras_recovery_fini(adev);
> -- 
> 2.7.4
>
Andrey Grodzovsky Nov. 12, 2020, 4:19 a.m. UTC | #2
On 6/22/20 5:48 AM, Daniel Vetter wrote:
> On Sun, Jun 21, 2020 at 02:03:04AM -0400, Andrey Grodzovsky wrote:
>> Some of the stuff in amdgpu_device_fini such as HW interrupts
>> disable and pending fences finilization must be done right away on
>> pci_remove while most of the stuff which relates to finilizing and releasing
>> driver data structures can be kept until drm_driver.release hook is called, i.e.
>> when the last device reference is dropped.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> Long term I think best if as much of this code is converted over to devm
> (for hw stuff) and drmm (for sw stuff and allocations). Doing this all
> manually is very error prone.
>
> I've started various such patches and others followed, but thus far only
> very simple drivers tackled. But it should be doable step by step at
> least, so you should have incremental benefits in code complexity right
> away I hope.
> -Daniel


Sure, I will definitely add this to my TODOs for after landing (hopefully) this 
patch set (after a few more iterations)
as indeed the required changes for using devm and drmm are non trivial and I prefer
to avoid diverging here into multiple directions at once.

Andrey


>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 +++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  6 ++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    | 24 +++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h    |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 23 +++++++++++++++++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    |  3 +++
>>   7 files changed, 54 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 2a806cb..604a681 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1003,7 +1003,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>   		       struct drm_device *ddev,
>>   		       struct pci_dev *pdev,
>>   		       uint32_t flags);
>> -void amdgpu_device_fini(struct amdgpu_device *adev);
>> +void amdgpu_device_fini_early(struct amdgpu_device *adev);
>> +void amdgpu_device_fini_late(struct amdgpu_device *adev);
>> +
>>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>>   
>>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> @@ -1188,6 +1190,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
>>   int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
>>   void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   				 struct drm_file *file_priv);
>> +void amdgpu_driver_release_kms(struct drm_device *dev);
>> +
>>   int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
>>   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
>>   int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index cc41e8f..e7b9065 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2309,6 +2309,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>>   {
>>   	int i, r;
>>   
>> +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
>> +
>>   	amdgpu_ras_pre_fini(adev);
>>   
>>   	if (adev->gmc.xgmi.num_physical_nodes > 1)
>> @@ -3304,10 +3306,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>    * Tear down the driver info (all asics).
>>    * Called at driver shutdown.
>>    */
>> -void amdgpu_device_fini(struct amdgpu_device *adev)
>> +void amdgpu_device_fini_early(struct amdgpu_device *adev)
>>   {
>> -	int r;
>> -
>>   	DRM_INFO("amdgpu: finishing device.\n");
>>   	flush_delayed_work(&adev->delayed_init_work);
>>   	adev->shutdown = true;
>> @@ -3330,7 +3330,13 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>>   	if (adev->pm_sysfs_en)
>>   		amdgpu_pm_sysfs_fini(adev);
>>   	amdgpu_fbdev_fini(adev);
>> -	r = amdgpu_device_ip_fini(adev);
>> +
>> +	amdgpu_irq_fini_early(adev);
>> +}
>> +
>> +void amdgpu_device_fini_late(struct amdgpu_device *adev)
>> +{
>> +	amdgpu_device_ip_fini(adev);
>>   	if (adev->firmware.gpu_info_fw) {
>>   		release_firmware(adev->firmware.gpu_info_fw);
>>   		adev->firmware.gpu_info_fw = NULL;
>> @@ -3368,6 +3374,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>>   		amdgpu_pmu_fini(adev);
>>   	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
>>   		amdgpu_discovery_fini(adev);
>> +
>>   }
>>   
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 9e5afa5..43592dc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -1134,12 +1134,9 @@ amdgpu_pci_remove(struct pci_dev *pdev)
>>   {
>>   	struct drm_device *dev = pci_get_drvdata(pdev);
>>   
>> -#ifdef MODULE
>> -	if (THIS_MODULE->state != MODULE_STATE_GOING)
>> -#endif
>> -		DRM_ERROR("Hotplug removal is not supported\n");
>>   	drm_dev_unplug(dev);
>>   	amdgpu_driver_unload_kms(dev);
>> +
>>   	pci_disable_device(pdev);
>>   	pci_set_drvdata(pdev, NULL);
>>   	drm_dev_put(dev);
>> @@ -1445,6 +1442,7 @@ static struct drm_driver kms_driver = {
>>   	.dumb_create = amdgpu_mode_dumb_create,
>>   	.dumb_map_offset = amdgpu_mode_dumb_mmap,
>>   	.fops = &amdgpu_driver_kms_fops,
>> +	.release = &amdgpu_driver_release_kms,
>>   
>>   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>   	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> index 0cc4c67..1697655 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> @@ -49,6 +49,7 @@
>>   #include <drm/drm_irq.h>
>>   #include <drm/drm_vblank.h>
>>   #include <drm/amdgpu_drm.h>
>> +#include <drm/drm_drv.h>
>>   #include "amdgpu.h"
>>   #include "amdgpu_ih.h"
>>   #include "atom.h"
>> @@ -297,6 +298,20 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
>>   	return 0;
>>   }
>>   
>> +
>> +void amdgpu_irq_fini_early(struct amdgpu_device *adev)
>> +{
>> +	if (adev->irq.installed) {
>> +		drm_irq_uninstall(adev->ddev);
>> +		adev->irq.installed = false;
>> +		if (adev->irq.msi_enabled)
>> +			pci_free_irq_vectors(adev->pdev);
>> +
>> +		if (!amdgpu_device_has_dc_support(adev))
>> +			flush_work(&adev->hotplug_work);
>> +	}
>> +}
>> +
>>   /**
>>    * amdgpu_irq_fini - shut down interrupt handling
>>    *
>> @@ -310,15 +325,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
>>   {
>>   	unsigned i, j;
>>   
>> -	if (adev->irq.installed) {
>> -		drm_irq_uninstall(adev->ddev);
>> -		adev->irq.installed = false;
>> -		if (adev->irq.msi_enabled)
>> -			pci_free_irq_vectors(adev->pdev);
>> -		if (!amdgpu_device_has_dc_support(adev))
>> -			flush_work(&adev->hotplug_work);
>> -	}
>> -
>>   	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
>>   		if (!adev->irq.client[i].sources)
>>   			continue;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
>> index c718e94..718c70f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
>> @@ -104,6 +104,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg);
>>   
>>   int amdgpu_irq_init(struct amdgpu_device *adev);
>>   void amdgpu_irq_fini(struct amdgpu_device *adev);
>> +void amdgpu_irq_fini_early(struct amdgpu_device *adev);
>>   int amdgpu_irq_add_id(struct amdgpu_device *adev,
>>   		      unsigned client_id, unsigned src_id,
>>   		      struct amdgpu_irq_src *source);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index c0b1904..9d0af22 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -29,6 +29,7 @@
>>   #include "amdgpu.h"
>>   #include <drm/drm_debugfs.h>
>>   #include <drm/amdgpu_drm.h>
>> +#include <drm/drm_drv.h>
>>   #include "amdgpu_sched.h"
>>   #include "amdgpu_uvd.h"
>>   #include "amdgpu_vce.h"
>> @@ -86,7 +87,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>>   	amdgpu_unregister_gpu_instance(adev);
>>   
>>   	if (adev->rmmio == NULL)
>> -		goto done_free;
>> +		return;
>>   
>>   	if (adev->runpm) {
>>   		pm_runtime_get_sync(dev->dev);
>> @@ -95,11 +96,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>>   
>>   	amdgpu_acpi_fini(adev);
>>   
>> -	amdgpu_device_fini(adev);
>> -
>> -done_free:
>> -	kfree(adev);
>> -	dev->dev_private = NULL;
>> +	amdgpu_device_fini_early(adev);
>>   }
>>   
>>   void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
>> @@ -1108,6 +1105,20 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	pm_runtime_put_autosuspend(dev->dev);
>>   }
>>   
>> +
>> +void amdgpu_driver_release_kms (struct drm_device *dev)
>> +{
>> +	struct amdgpu_device *adev = dev->dev_private;
>> +
>> +	amdgpu_device_fini_late(adev);
>> +
>> +	kfree(adev);
>> +	dev->dev_private = NULL;
>> +
>> +	drm_dev_fini(dev);
>> +	kfree(dev);
>> +}
>> +
>>   /*
>>    * VBlank related functions.
>>    */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> index 7348619..169c2239 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> @@ -2056,9 +2056,12 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
>>   {
>>   	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>>   
>> +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
>> +
>>   	if (!con)
>>   		return 0;
>>   
>> +
>>   	/* Need disable ras on all IPs here before ip [hw/sw]fini */
>>   	amdgpu_ras_disable_all_features(adev, 0);
>>   	amdgpu_ras_recovery_fini(adev);
>> -- 
>> 2.7.4
>>
Daniel Vetter Nov. 12, 2020, 9:29 a.m. UTC | #3
On Wed, Nov 11, 2020 at 11:19:04PM -0500, Andrey Grodzovsky wrote:
> 
> On 6/22/20 5:48 AM, Daniel Vetter wrote:
> > On Sun, Jun 21, 2020 at 02:03:04AM -0400, Andrey Grodzovsky wrote:
> > > Some of the stuff in amdgpu_device_fini such as HW interrupts
> > > disable and pending fences finilization must be done right away on
> > > pci_remove while most of the stuff which relates to finilizing and releasing
> > > driver data structures can be kept until drm_driver.release hook is called, i.e.
> > > when the last device reference is dropped.
> > > 
> > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> > Long term I think best if as much of this code is converted over to devm
> > (for hw stuff) and drmm (for sw stuff and allocations). Doing this all
> > manually is very error prone.
> > 
> > I've started various such patches and others followed, but thus far only
> > very simple drivers tackled. But it should be doable step by step at
> > least, so you should have incremental benefits in code complexity right
> > away I hope.
> > -Daniel
> 
> 
> Sure, I will definitely add this to my TODOs for after landing (hopefully)
> this patch set (after a few more iterations)
> as indeed the required changes for using devm and drmm are non trivial and I prefer
> to avoid diverging here into multiple directions at once.

For the display side there's a very nice patch series from Philip Zabel
pending:

https://lore.kernel.org/dri-devel/20200911135724.25833-1-p.zabel@pengutronix.de/

I think you'll want to use this. It's not landed yet, so a nudge from
someone else also using it would help I think.

Cheers, Daniel

> 
> Andrey
> 
> 
> > 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 +++++-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++----
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  6 ++----
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    | 24 +++++++++++++++---------
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h    |  1 +
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 23 +++++++++++++++++------
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    |  3 +++
> > >   7 files changed, 54 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > index 2a806cb..604a681 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > @@ -1003,7 +1003,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> > >   		       struct drm_device *ddev,
> > >   		       struct pci_dev *pdev,
> > >   		       uint32_t flags);
> > > -void amdgpu_device_fini(struct amdgpu_device *adev);
> > > +void amdgpu_device_fini_early(struct amdgpu_device *adev);
> > > +void amdgpu_device_fini_late(struct amdgpu_device *adev);
> > > +
> > >   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
> > >   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> > > @@ -1188,6 +1190,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
> > >   int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
> > >   void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   				 struct drm_file *file_priv);
> > > +void amdgpu_driver_release_kms(struct drm_device *dev);
> > > +
> > >   int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
> > >   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
> > >   int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index cc41e8f..e7b9065 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -2309,6 +2309,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
> > >   {
> > >   	int i, r;
> > > +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
> > > +
> > >   	amdgpu_ras_pre_fini(adev);
> > >   	if (adev->gmc.xgmi.num_physical_nodes > 1)
> > > @@ -3304,10 +3306,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> > >    * Tear down the driver info (all asics).
> > >    * Called at driver shutdown.
> > >    */
> > > -void amdgpu_device_fini(struct amdgpu_device *adev)
> > > +void amdgpu_device_fini_early(struct amdgpu_device *adev)
> > >   {
> > > -	int r;
> > > -
> > >   	DRM_INFO("amdgpu: finishing device.\n");
> > >   	flush_delayed_work(&adev->delayed_init_work);
> > >   	adev->shutdown = true;
> > > @@ -3330,7 +3330,13 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
> > >   	if (adev->pm_sysfs_en)
> > >   		amdgpu_pm_sysfs_fini(adev);
> > >   	amdgpu_fbdev_fini(adev);
> > > -	r = amdgpu_device_ip_fini(adev);
> > > +
> > > +	amdgpu_irq_fini_early(adev);
> > > +}
> > > +
> > > +void amdgpu_device_fini_late(struct amdgpu_device *adev)
> > > +{
> > > +	amdgpu_device_ip_fini(adev);
> > >   	if (adev->firmware.gpu_info_fw) {
> > >   		release_firmware(adev->firmware.gpu_info_fw);
> > >   		adev->firmware.gpu_info_fw = NULL;
> > > @@ -3368,6 +3374,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
> > >   		amdgpu_pmu_fini(adev);
> > >   	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
> > >   		amdgpu_discovery_fini(adev);
> > > +
> > >   }
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > index 9e5afa5..43592dc 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > @@ -1134,12 +1134,9 @@ amdgpu_pci_remove(struct pci_dev *pdev)
> > >   {
> > >   	struct drm_device *dev = pci_get_drvdata(pdev);
> > > -#ifdef MODULE
> > > -	if (THIS_MODULE->state != MODULE_STATE_GOING)
> > > -#endif
> > > -		DRM_ERROR("Hotplug removal is not supported\n");
> > >   	drm_dev_unplug(dev);
> > >   	amdgpu_driver_unload_kms(dev);
> > > +
> > >   	pci_disable_device(pdev);
> > >   	pci_set_drvdata(pdev, NULL);
> > >   	drm_dev_put(dev);
> > > @@ -1445,6 +1442,7 @@ static struct drm_driver kms_driver = {
> > >   	.dumb_create = amdgpu_mode_dumb_create,
> > >   	.dumb_map_offset = amdgpu_mode_dumb_mmap,
> > >   	.fops = &amdgpu_driver_kms_fops,
> > > +	.release = &amdgpu_driver_release_kms,
> > >   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> > >   	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> > > index 0cc4c67..1697655 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> > > @@ -49,6 +49,7 @@
> > >   #include <drm/drm_irq.h>
> > >   #include <drm/drm_vblank.h>
> > >   #include <drm/amdgpu_drm.h>
> > > +#include <drm/drm_drv.h>
> > >   #include "amdgpu.h"
> > >   #include "amdgpu_ih.h"
> > >   #include "atom.h"
> > > @@ -297,6 +298,20 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
> > >   	return 0;
> > >   }
> > > +
> > > +void amdgpu_irq_fini_early(struct amdgpu_device *adev)
> > > +{
> > > +	if (adev->irq.installed) {
> > > +		drm_irq_uninstall(adev->ddev);
> > > +		adev->irq.installed = false;
> > > +		if (adev->irq.msi_enabled)
> > > +			pci_free_irq_vectors(adev->pdev);
> > > +
> > > +		if (!amdgpu_device_has_dc_support(adev))
> > > +			flush_work(&adev->hotplug_work);
> > > +	}
> > > +}
> > > +
> > >   /**
> > >    * amdgpu_irq_fini - shut down interrupt handling
> > >    *
> > > @@ -310,15 +325,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
> > >   {
> > >   	unsigned i, j;
> > > -	if (adev->irq.installed) {
> > > -		drm_irq_uninstall(adev->ddev);
> > > -		adev->irq.installed = false;
> > > -		if (adev->irq.msi_enabled)
> > > -			pci_free_irq_vectors(adev->pdev);
> > > -		if (!amdgpu_device_has_dc_support(adev))
> > > -			flush_work(&adev->hotplug_work);
> > > -	}
> > > -
> > >   	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
> > >   		if (!adev->irq.client[i].sources)
> > >   			continue;
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> > > index c718e94..718c70f 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> > > @@ -104,6 +104,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg);
> > >   int amdgpu_irq_init(struct amdgpu_device *adev);
> > >   void amdgpu_irq_fini(struct amdgpu_device *adev);
> > > +void amdgpu_irq_fini_early(struct amdgpu_device *adev);
> > >   int amdgpu_irq_add_id(struct amdgpu_device *adev,
> > >   		      unsigned client_id, unsigned src_id,
> > >   		      struct amdgpu_irq_src *source);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index c0b1904..9d0af22 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -29,6 +29,7 @@
> > >   #include "amdgpu.h"
> > >   #include <drm/drm_debugfs.h>
> > >   #include <drm/amdgpu_drm.h>
> > > +#include <drm/drm_drv.h>
> > >   #include "amdgpu_sched.h"
> > >   #include "amdgpu_uvd.h"
> > >   #include "amdgpu_vce.h"
> > > @@ -86,7 +87,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
> > >   	amdgpu_unregister_gpu_instance(adev);
> > >   	if (adev->rmmio == NULL)
> > > -		goto done_free;
> > > +		return;
> > >   	if (adev->runpm) {
> > >   		pm_runtime_get_sync(dev->dev);
> > > @@ -95,11 +96,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
> > >   	amdgpu_acpi_fini(adev);
> > > -	amdgpu_device_fini(adev);
> > > -
> > > -done_free:
> > > -	kfree(adev);
> > > -	dev->dev_private = NULL;
> > > +	amdgpu_device_fini_early(adev);
> > >   }
> > >   void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
> > > @@ -1108,6 +1105,20 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   	pm_runtime_put_autosuspend(dev->dev);
> > >   }
> > > +
> > > +void amdgpu_driver_release_kms (struct drm_device *dev)
> > > +{
> > > +	struct amdgpu_device *adev = dev->dev_private;
> > > +
> > > +	amdgpu_device_fini_late(adev);
> > > +
> > > +	kfree(adev);
> > > +	dev->dev_private = NULL;
> > > +
> > > +	drm_dev_fini(dev);
> > > +	kfree(dev);
> > > +}
> > > +
> > >   /*
> > >    * VBlank related functions.
> > >    */
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > index 7348619..169c2239 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > @@ -2056,9 +2056,12 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
> > >   {
> > >   	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> > > +	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
> > > +
> > >   	if (!con)
> > >   		return 0;
> > > +
> > >   	/* Need disable ras on all IPs here before ip [hw/sw]fini */
> > >   	amdgpu_ras_disable_all_features(adev, 0);
> > >   	amdgpu_ras_recovery_fini(adev);
> > > -- 
> > > 2.7.4
> > >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2a806cb..604a681 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1003,7 +1003,9 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
 		       uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_early(struct amdgpu_device *adev);
+void amdgpu_device_fini_late(struct amdgpu_device *adev);
+
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
@@ -1188,6 +1190,8 @@  void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index cc41e8f..e7b9065 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2309,6 +2309,8 @@  static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 {
 	int i, r;
 
+	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
+
 	amdgpu_ras_pre_fini(adev);
 
 	if (adev->gmc.xgmi.num_physical_nodes > 1)
@@ -3304,10 +3306,8 @@  int amdgpu_device_init(struct amdgpu_device *adev,
  * Tear down the driver info (all asics).
  * Called at driver shutdown.
  */
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_early(struct amdgpu_device *adev)
 {
-	int r;
-
 	DRM_INFO("amdgpu: finishing device.\n");
 	flush_delayed_work(&adev->delayed_init_work);
 	adev->shutdown = true;
@@ -3330,7 +3330,13 @@  void amdgpu_device_fini(struct amdgpu_device *adev)
 	if (adev->pm_sysfs_en)
 		amdgpu_pm_sysfs_fini(adev);
 	amdgpu_fbdev_fini(adev);
-	r = amdgpu_device_ip_fini(adev);
+
+	amdgpu_irq_fini_early(adev);
+}
+
+void amdgpu_device_fini_late(struct amdgpu_device *adev)
+{
+	amdgpu_device_ip_fini(adev);
 	if (adev->firmware.gpu_info_fw) {
 		release_firmware(adev->firmware.gpu_info_fw);
 		adev->firmware.gpu_info_fw = NULL;
@@ -3368,6 +3374,7 @@  void amdgpu_device_fini(struct amdgpu_device *adev)
 		amdgpu_pmu_fini(adev);
 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
 		amdgpu_discovery_fini(adev);
+
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9e5afa5..43592dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1134,12 +1134,9 @@  amdgpu_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
-#ifdef MODULE
-	if (THIS_MODULE->state != MODULE_STATE_GOING)
-#endif
-		DRM_ERROR("Hotplug removal is not supported\n");
 	drm_dev_unplug(dev);
 	amdgpu_driver_unload_kms(dev);
+
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	drm_dev_put(dev);
@@ -1445,6 +1442,7 @@  static struct drm_driver kms_driver = {
 	.dumb_create = amdgpu_mode_dumb_create,
 	.dumb_map_offset = amdgpu_mode_dumb_mmap,
 	.fops = &amdgpu_driver_kms_fops,
+	.release = &amdgpu_driver_release_kms,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 0cc4c67..1697655 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -49,6 +49,7 @@ 
 #include <drm/drm_irq.h>
 #include <drm/drm_vblank.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "atom.h"
@@ -297,6 +298,20 @@  int amdgpu_irq_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+
+void amdgpu_irq_fini_early(struct amdgpu_device *adev)
+{
+	if (adev->irq.installed) {
+		drm_irq_uninstall(adev->ddev);
+		adev->irq.installed = false;
+		if (adev->irq.msi_enabled)
+			pci_free_irq_vectors(adev->pdev);
+
+		if (!amdgpu_device_has_dc_support(adev))
+			flush_work(&adev->hotplug_work);
+	}
+}
+
 /**
  * amdgpu_irq_fini - shut down interrupt handling
  *
@@ -310,15 +325,6 @@  void amdgpu_irq_fini(struct amdgpu_device *adev)
 {
 	unsigned i, j;
 
-	if (adev->irq.installed) {
-		drm_irq_uninstall(adev->ddev);
-		adev->irq.installed = false;
-		if (adev->irq.msi_enabled)
-			pci_free_irq_vectors(adev->pdev);
-		if (!amdgpu_device_has_dc_support(adev))
-			flush_work(&adev->hotplug_work);
-	}
-
 	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
 		if (!adev->irq.client[i].sources)
 			continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index c718e94..718c70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -104,6 +104,7 @@  irqreturn_t amdgpu_irq_handler(int irq, void *arg);
 
 int amdgpu_irq_init(struct amdgpu_device *adev);
 void amdgpu_irq_fini(struct amdgpu_device *adev);
+void amdgpu_irq_fini_early(struct amdgpu_device *adev);
 int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		      unsigned client_id, unsigned src_id,
 		      struct amdgpu_irq_src *source);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index c0b1904..9d0af22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -29,6 +29,7 @@ 
 #include "amdgpu.h"
 #include <drm/drm_debugfs.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
@@ -86,7 +87,7 @@  void amdgpu_driver_unload_kms(struct drm_device *dev)
 	amdgpu_unregister_gpu_instance(adev);
 
 	if (adev->rmmio == NULL)
-		goto done_free;
+		return;
 
 	if (adev->runpm) {
 		pm_runtime_get_sync(dev->dev);
@@ -95,11 +96,7 @@  void amdgpu_driver_unload_kms(struct drm_device *dev)
 
 	amdgpu_acpi_fini(adev);
 
-	amdgpu_device_fini(adev);
-
-done_free:
-	kfree(adev);
-	dev->dev_private = NULL;
+	amdgpu_device_fini_early(adev);
 }
 
 void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
@@ -1108,6 +1105,20 @@  void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	pm_runtime_put_autosuspend(dev->dev);
 }
 
+
+void amdgpu_driver_release_kms (struct drm_device *dev)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+
+	amdgpu_device_fini_late(adev);
+
+	kfree(adev);
+	dev->dev_private = NULL;
+
+	drm_dev_fini(dev);
+	kfree(dev);
+}
+
 /*
  * VBlank related functions.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7348619..169c2239 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2056,9 +2056,12 @@  int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
+	//DRM_ERROR("adev 0x%llx", (long long unsigned int)adev);
+
 	if (!con)
 		return 0;
 
+
 	/* Need disable ras on all IPs here before ip [hw/sw]fini */
 	amdgpu_ras_disable_all_features(adev, 0);
 	amdgpu_ras_recovery_fini(adev);