diff mbox series

drm/amdkfd: rework criu_restore_bos error handling

Message ID 20220218173913.3376948-1-trix@redhat.com (mailing list archive)
State New, archived
Headers show
Series drm/amdkfd: rework criu_restore_bos error handling | expand

Commit Message

Tom Rix Feb. 18, 2022, 5:39 p.m. UTC
From: Tom Rix <trix@redhat.com>

Clang static analysis reports this problem
kfd_chardev.c:2327:2: warning: 1st function call argument
  is an uninitialized value
  kvfree(bo_privs);
  ^~~~~~~~~~~~~~~~

If the copy_from_users(bo_buckets, ...) fails, there is a jump to
the generic error handler at exit:.  The freeing of bo_privs and
unwinding of the dmabuf_fd loop do not need to be done.

Add some specific labels for the early failures.
Reorder the frees to be the reverse of their allocs.

Move the initialize of 'i' back to the loop.
The problem with the early frees predates the loop
unwinding problem.

Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
Signed-off-by: Tom Rix <trix@redhat.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

Comments

Felix Kuehling Feb. 18, 2022, 6:35 p.m. UTC | #1
Am 2022-02-18 um 12:39 schrieb trix@redhat.com:
> From: Tom Rix <trix@redhat.com>
>
> Clang static analysis reports this problem
> kfd_chardev.c:2327:2: warning: 1st function call argument
>    is an uninitialized value
>    kvfree(bo_privs);
>    ^~~~~~~~~~~~~~~~
>
> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
> the generic error handler at exit:.  The freeing of bo_privs and
> unwinding of the dmabuf_fd loop do not need to be done.
>
> Add some specific labels for the early failures.
> Reorder the frees to be the reverse of their allocs.
>
> Move the initialize of 'i' back to the loop.
> The problem with the early frees predates the loop
> unwinding problem.

I think the existing error handling strategy in this function is fine. 
Having only one exit label avoids potential issues when using the wrong 
label. Freeing NULL pointers is not a problem. The loop becomes a noop 
if i==0 (this was fixed by you in a previous patch). The only real 
problem I see is that bo_privs is not initialized. So this should really 
be a one-line or maybe two-line fix:

	struct kfd_criu_bo_bucket *bo_buckets = NULL;
	struct kfd_criu_bo_priv_data *bo_privs = NULL;

Regards,
   Felix


>
> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
> Signed-off-by: Tom Rix <trix@redhat.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>   1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 965af2a08bc0..1d5f41ac3832 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
>   	const bool criu_resume = true;
>   	bool flush_tlbs = false;
>   	int ret = 0, j = 0;
> -	uint32_t i = 0;
> +	uint32_t i;
>   
>   	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
>   		return -EINVAL;
> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct kfd_process *p,
>   	if (ret) {
>   		pr_err("Failed to copy BOs information from user\n");
>   		ret = -EFAULT;
> -		goto exit;
> +		goto free_buckets;
>   	}
>   
>   	bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
>   	if (!bo_privs) {
>   		ret = -ENOMEM;
> -		goto exit;
> +		goto free_buckets;
>   	}
>   
>   	ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct kfd_process *p,
>   	if (ret) {
>   		pr_err("Failed to copy BOs information from user\n");
>   		ret = -EFAULT;
> -		goto exit;
> +		goto free_privs;
>   	}
>   	*priv_offset += args->num_bos * sizeof(*bo_privs);
>   
>   	/* Create and map new BOs */
> -	for (; i < args->num_bos; i++) {
> +	for (i = 0; i < args->num_bos; i++) {
>   		struct kfd_criu_bo_bucket *bo_bucket;
>   		struct kfd_criu_bo_priv_data *bo_priv;
>   		struct kfd_dev *dev;
> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process *p,
>   		if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>   			close_fd(bo_buckets[i].dmabuf_fd);
>   	}
> -	kvfree(bo_buckets);
> +free_privs:
>   	kvfree(bo_privs);
> +free_buckets:
> +	kvfree(bo_buckets);
> +
>   	return ret;
>   }
>
Tom Rix Feb. 19, 2022, 2:34 a.m. UTC | #2
On 2/18/22 10:35 AM, Felix Kuehling wrote:
> Am 2022-02-18 um 12:39 schrieb trix@redhat.com:
>> From: Tom Rix <trix@redhat.com>
>>
>> Clang static analysis reports this problem
>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>    is an uninitialized value
>>    kvfree(bo_privs);
>>    ^~~~~~~~~~~~~~~~
>>
>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>> the generic error handler at exit:.  The freeing of bo_privs and
>> unwinding of the dmabuf_fd loop do not need to be done.
>>
>> Add some specific labels for the early failures.
>> Reorder the frees to be the reverse of their allocs.
>>
>> Move the initialize of 'i' back to the loop.
>> The problem with the early frees predates the loop
>> unwinding problem.
>
> I think the existing error handling strategy in this function is fine. 
> Having only one exit label avoids potential issues when using the 
> wrong label. Freeing NULL pointers is not a problem. The loop becomes 
> a noop if i==0 (this was fixed by you in a previous patch). The only 
> real problem I see is that bo_privs is not initialized. So this should 
> really be a one-line or maybe two-line fix:
>
>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>     struct kfd_criu_bo_priv_data *bo_privs = NULL;

This is the other way I considered to fix the problem. So it will work.

Tom

>
> Regards,
>   Felix
>
>
>>
>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>> Signed-off-by: Tom Rix <trix@redhat.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 965af2a08bc0..1d5f41ac3832 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
>>       const bool criu_resume = true;
>>       bool flush_tlbs = false;
>>       int ret = 0, j = 0;
>> -    uint32_t i = 0;
>> +    uint32_t i;
>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > 
>> max_priv_data_size)
>>           return -EINVAL;
>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct 
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), 
>> GFP_KERNEL);
>>       if (!bo_privs) {
>>           ret = -ENOMEM;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         ret = copy_from_user(bo_privs, (void __user *)args->priv_data 
>> + *priv_offset,
>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct 
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_privs;
>>       }
>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>         /* Create and map new BOs */
>> -    for (; i < args->num_bos; i++) {
>> +    for (i = 0; i < args->num_bos; i++) {
>>           struct kfd_criu_bo_bucket *bo_bucket;
>>           struct kfd_criu_bo_priv_data *bo_priv;
>>           struct kfd_dev *dev;
>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process 
>> *p,
>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>               close_fd(bo_buckets[i].dmabuf_fd);
>>       }
>> -    kvfree(bo_buckets);
>> +free_privs:
>>       kvfree(bo_privs);
>> +free_buckets:
>> +    kvfree(bo_buckets);
>> +
>>       return ret;
>>   }
>
Felix Kuehling Feb. 19, 2022, 3:03 a.m. UTC | #3
Am 2022-02-18 um 21:34 schrieb Tom Rix:
>
> On 2/18/22 10:35 AM, Felix Kuehling wrote:
>> Am 2022-02-18 um 12:39 schrieb trix@redhat.com:
>>> From: Tom Rix <trix@redhat.com>
>>>
>>> Clang static analysis reports this problem
>>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>>    is an uninitialized value
>>>    kvfree(bo_privs);
>>>    ^~~~~~~~~~~~~~~~
>>>
>>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>>> the generic error handler at exit:.  The freeing of bo_privs and
>>> unwinding of the dmabuf_fd loop do not need to be done.
>>>
>>> Add some specific labels for the early failures.
>>> Reorder the frees to be the reverse of their allocs.
>>>
>>> Move the initialize of 'i' back to the loop.
>>> The problem with the early frees predates the loop
>>> unwinding problem.
>>
>> I think the existing error handling strategy in this function is 
>> fine. Having only one exit label avoids potential issues when using 
>> the wrong label. Freeing NULL pointers is not a problem. The loop 
>> becomes a noop if i==0 (this was fixed by you in a previous patch). 
>> The only real problem I see is that bo_privs is not initialized. So 
>> this should really be a one-line or maybe two-line fix:
>>
>>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>>     struct kfd_criu_bo_priv_data *bo_privs = NULL;
>
> This is the other way I considered to fix the problem. So it will work.

OK. I have already submitted this version to amd-staging-drm-next. Thank 
you for reporting the problem.

Regards,
   Felix


>
> Tom
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>>> Signed-off-by: Tom Rix <trix@redhat.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 965af2a08bc0..1d5f41ac3832 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process 
>>> *p,
>>>       const bool criu_resume = true;
>>>       bool flush_tlbs = false;
>>>       int ret = 0, j = 0;
>>> -    uint32_t i = 0;
>>> +    uint32_t i;
>>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > 
>>> max_priv_data_size)
>>>           return -EINVAL;
>>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct 
>>> kfd_process *p,
>>>       if (ret) {
>>>           pr_err("Failed to copy BOs information from user\n");
>>>           ret = -EFAULT;
>>> -        goto exit;
>>> +        goto free_buckets;
>>>       }
>>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), 
>>> GFP_KERNEL);
>>>       if (!bo_privs) {
>>>           ret = -ENOMEM;
>>> -        goto exit;
>>> +        goto free_buckets;
>>>       }
>>>         ret = copy_from_user(bo_privs, (void __user 
>>> *)args->priv_data + *priv_offset,
>>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct 
>>> kfd_process *p,
>>>       if (ret) {
>>>           pr_err("Failed to copy BOs information from user\n");
>>>           ret = -EFAULT;
>>> -        goto exit;
>>> +        goto free_privs;
>>>       }
>>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>>         /* Create and map new BOs */
>>> -    for (; i < args->num_bos; i++) {
>>> +    for (i = 0; i < args->num_bos; i++) {
>>>           struct kfd_criu_bo_bucket *bo_bucket;
>>>           struct kfd_criu_bo_priv_data *bo_priv;
>>>           struct kfd_dev *dev;
>>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct 
>>> kfd_process *p,
>>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>>               close_fd(bo_buckets[i].dmabuf_fd);
>>>       }
>>> -    kvfree(bo_buckets);
>>> +free_privs:
>>>       kvfree(bo_privs);
>>> +free_buckets:
>>> +    kvfree(bo_buckets);
>>> +
>>>       return ret;
>>>   }
>>
>
Christian König Feb. 21, 2022, 7:09 a.m. UTC | #4
Am 18.02.22 um 19:35 schrieb Felix Kuehling:
> Am 2022-02-18 um 12:39 schrieb trix@redhat.com:
>> From: Tom Rix <trix@redhat.com>
>>
>> Clang static analysis reports this problem
>> kfd_chardev.c:2327:2: warning: 1st function call argument
>>    is an uninitialized value
>>    kvfree(bo_privs);
>>    ^~~~~~~~~~~~~~~~
>>
>> If the copy_from_users(bo_buckets, ...) fails, there is a jump to
>> the generic error handler at exit:.  The freeing of bo_privs and
>> unwinding of the dmabuf_fd loop do not need to be done.
>>
>> Add some specific labels for the early failures.
>> Reorder the frees to be the reverse of their allocs.
>>
>> Move the initialize of 'i' back to the loop.
>> The problem with the early frees predates the loop
>> unwinding problem.
>
> I think the existing error handling strategy in this function is fine. 
> Having only one exit label avoids potential issues when using the 
> wrong label. Freeing NULL pointers is not a problem. The loop becomes 
> a noop if i==0 (this was fixed by you in a previous patch). The only 
> real problem I see is that bo_privs is not initialized. So this should 
> really be a one-line or maybe two-line fix:
>
>     struct kfd_criu_bo_bucket *bo_buckets = NULL;
>     struct kfd_criu_bo_priv_data *bo_privs = NULL;

That is usually seen as very bad practice and there is a strongly 
discourage against that.

Using multiple error handling labels is preferred because it reflects 
the cleanup procedure you need to do for each step.

In other words one error label and one kfree() for each kmalloc() (or 
other function which can go wrong) is the desired way to go.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
>> Signed-off-by: Tom Rix <trix@redhat.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++++++++------
>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 965af2a08bc0..1d5f41ac3832 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -2102,7 +2102,7 @@ static int criu_restore_bos(struct kfd_process *p,
>>       const bool criu_resume = true;
>>       bool flush_tlbs = false;
>>       int ret = 0, j = 0;
>> -    uint32_t i = 0;
>> +    uint32_t i;
>>         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > 
>> max_priv_data_size)
>>           return -EINVAL;
>> @@ -2119,13 +2119,13 @@ static int criu_restore_bos(struct 
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), 
>> GFP_KERNEL);
>>       if (!bo_privs) {
>>           ret = -ENOMEM;
>> -        goto exit;
>> +        goto free_buckets;
>>       }
>>         ret = copy_from_user(bo_privs, (void __user *)args->priv_data 
>> + *priv_offset,
>> @@ -2133,12 +2133,12 @@ static int criu_restore_bos(struct 
>> kfd_process *p,
>>       if (ret) {
>>           pr_err("Failed to copy BOs information from user\n");
>>           ret = -EFAULT;
>> -        goto exit;
>> +        goto free_privs;
>>       }
>>       *priv_offset += args->num_bos * sizeof(*bo_privs);
>>         /* Create and map new BOs */
>> -    for (; i < args->num_bos; i++) {
>> +    for (i = 0; i < args->num_bos; i++) {
>>           struct kfd_criu_bo_bucket *bo_bucket;
>>           struct kfd_criu_bo_priv_data *bo_priv;
>>           struct kfd_dev *dev;
>> @@ -2323,8 +2323,11 @@ static int criu_restore_bos(struct kfd_process 
>> *p,
>>           if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
>>               close_fd(bo_buckets[i].dmabuf_fd);
>>       }
>> -    kvfree(bo_buckets);
>> +free_privs:
>>       kvfree(bo_privs);
>> +free_buckets:
>> +    kvfree(bo_buckets);
>> +
>>       return ret;
>>   }
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 965af2a08bc0..1d5f41ac3832 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2102,7 +2102,7 @@  static int criu_restore_bos(struct kfd_process *p,
 	const bool criu_resume = true;
 	bool flush_tlbs = false;
 	int ret = 0, j = 0;
-	uint32_t i = 0;
+	uint32_t i;
 
 	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
 		return -EINVAL;
@@ -2119,13 +2119,13 @@  static int criu_restore_bos(struct kfd_process *p,
 	if (ret) {
 		pr_err("Failed to copy BOs information from user\n");
 		ret = -EFAULT;
-		goto exit;
+		goto free_buckets;
 	}
 
 	bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
 	if (!bo_privs) {
 		ret = -ENOMEM;
-		goto exit;
+		goto free_buckets;
 	}
 
 	ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
@@ -2133,12 +2133,12 @@  static int criu_restore_bos(struct kfd_process *p,
 	if (ret) {
 		pr_err("Failed to copy BOs information from user\n");
 		ret = -EFAULT;
-		goto exit;
+		goto free_privs;
 	}
 	*priv_offset += args->num_bos * sizeof(*bo_privs);
 
 	/* Create and map new BOs */
-	for (; i < args->num_bos; i++) {
+	for (i = 0; i < args->num_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket;
 		struct kfd_criu_bo_priv_data *bo_priv;
 		struct kfd_dev *dev;
@@ -2323,8 +2323,11 @@  static int criu_restore_bos(struct kfd_process *p,
 		if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
 			close_fd(bo_buckets[i].dmabuf_fd);
 	}
-	kvfree(bo_buckets);
+free_privs:
 	kvfree(bo_privs);
+free_buckets:
+	kvfree(bo_buckets);
+
 	return ret;
 }