diff mbox series

[v2] drm/amdgpu: Fix potential double free and null pointer dereference

Message ID 20221122042849.1097369-1-windhl@126.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/amdgpu: Fix potential double free and null pointer dereference | expand

Commit Message

Liang He Nov. 22, 2022, 4:28 a.m. UTC
In amdgpu_get_xgmi_hive(), we should not call kfree() after
kobject_put() as the PUT will call kfree().

In amdgpu_device_ip_init(), we need to check the returned *hive*
which can be NULL before we dereference it.

Signed-off-by: Liang He <windhl@126.com>
---
 v1->v2: we need the extra GET to keep *hive* alive, it is
 my fault to remove the GET in v1.


 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 2 --
 2 files changed, 5 insertions(+), 2 deletions(-)

Comments

Luben Tuikov Nov. 23, 2022, 12:10 a.m. UTC | #1
amdgpu_xgmi_hive_type does provide a release method which frees the allocated "hive",
so we don't need a kfree() after a kobject_put().

Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>

Regards,
Luben

On 2022-11-21 23:28, Liang He wrote:
> In amdgpu_get_xgmi_hive(), we should not call kfree() after
> kobject_put() as the PUT will call kfree().
> 
> In amdgpu_device_ip_init(), we need to check the returned *hive*
> which can be NULL before we dereference it.
> 
> Signed-off-by: Liang He <windhl@126.com>
> ---
>  v1->v2: we need the extra GET to keep *hive* alive, it is
>  my fault to remove the GET in v1.
> 
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 2 --
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index f1e9663b4051..00976e15b698 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2462,6 +2462,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>  			if (!amdgpu_sriov_vf(adev)) {
>  				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
>  
> +				if (WARN_ON(!hive)) {
> +					r = -ENOENT;
> +					goto init_failed;
> +				}
> +
>  				if (!hive->reset_domain ||
>  				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
>  					r = -ENOENT;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 47159e9a0884..4b9e7b050ccd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>  	if (ret) {
>  		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
>  		kobject_put(&hive->kobj);
> -		kfree(hive);
>  		hive = NULL;
>  		goto pro_end;
>  	}
> @@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>  				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
>  				ret = -ENOMEM;
>  				kobject_put(&hive->kobj);
> -				kfree(hive);
>  				hive = NULL;
>  				goto pro_end;
>  			}
Luben Tuikov Nov. 24, 2022, 4:44 p.m. UTC | #2
Applied.

Regards,
Luben

On 2022-11-22 19:10, Luben Tuikov wrote:
> amdgpu_xgmi_hive_type does provide a release method which frees the allocated "hive",
> so we don't need a kfree() after a kobject_put().
> 
> Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
> 
> Regards,
> Luben
> 
> On 2022-11-21 23:28, Liang He wrote:
>> In amdgpu_get_xgmi_hive(), we should not call kfree() after
>> kobject_put() as the PUT will call kfree().
>>
>> In amdgpu_device_ip_init(), we need to check the returned *hive*
>> which can be NULL before we dereference it.
>>
>> Signed-off-by: Liang He <windhl@126.com>
>> ---
>>  v1->v2: we need the extra GET to keep *hive* alive, it is
>>  my fault to remove the GET in v1.
>>
>>
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 2 --
>>  2 files changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index f1e9663b4051..00976e15b698 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2462,6 +2462,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>  			if (!amdgpu_sriov_vf(adev)) {
>>  				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
>>  
>> +				if (WARN_ON(!hive)) {
>> +					r = -ENOENT;
>> +					goto init_failed;
>> +				}
>> +
>>  				if (!hive->reset_domain ||
>>  				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
>>  					r = -ENOENT;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> index 47159e9a0884..4b9e7b050ccd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> @@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>>  	if (ret) {
>>  		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
>>  		kobject_put(&hive->kobj);
>> -		kfree(hive);
>>  		hive = NULL;
>>  		goto pro_end;
>>  	}
>> @@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>>  				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
>>  				ret = -ENOMEM;
>>  				kobject_put(&hive->kobj);
>> -				kfree(hive);
>>  				hive = NULL;
>>  				goto pro_end;
>>  			}
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f1e9663b4051..00976e15b698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2462,6 +2462,11 @@  static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			if (!amdgpu_sriov_vf(adev)) {
 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
 
+				if (WARN_ON(!hive)) {
+					r = -ENOENT;
+					goto init_failed;
+				}
+
 				if (!hive->reset_domain ||
 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
 					r = -ENOENT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 47159e9a0884..4b9e7b050ccd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -386,7 +386,6 @@  struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 	if (ret) {
 		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
 		kobject_put(&hive->kobj);
-		kfree(hive);
 		hive = NULL;
 		goto pro_end;
 	}
@@ -410,7 +409,6 @@  struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
 				ret = -ENOMEM;
 				kobject_put(&hive->kobj);
-				kfree(hive);
 				hive = NULL;
 				goto pro_end;
 			}