diff mbox series

[7/7] drm/syncobj: Add a fast path to drm_syncobj_array_find

Message ID 20250318155424.78552-8-tvrtko.ursulin@igalia.com (mailing list archive)
State New, archived
Headers show
Series A few drm_syncobj optimisations | expand

Commit Message

Tvrtko Ursulin March 18, 2025, 3:54 p.m. UTC
Running the Cyberpunk 2077 benchmark we can observe that the lookup helper
is relatively hot, but the 97% of the calls are for a single object. (~3%
for two points, and never more than three points. While a more trivial
workload like vkmark under Plasma is even more skewed to single point
lookups.)

Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a
pre-allocated stack array for those cases.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
---
 drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 12 deletions(-)

Comments

Maíra Canal March 24, 2025, 11:06 p.m. UTC | #1
Hi Tvrtko,

Some nits inline, mostly personal comments. In any case,

Reviewed-by: Maíra Canal <mcanal@igalia.com>


On 18/03/25 12:54, Tvrtko Ursulin wrote:
> Running the Cyberpunk 2077 benchmark we can observe that the lookup helper
> is relatively hot, but the 97% of the calls are for a single object. (~3%
> for two points, and never more than three points. While a more trivial
> workload like vkmark under Plasma is even more skewed to single point
> lookups.)
> 
> Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a
> pre-allocated stack array for those cases.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>   1 file changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 94932b89298f..233bdef53c87 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>   static int drm_syncobj_array_find(struct drm_file *file_private,
>   				  u32 __user *handles,
>   				  uint32_t count,
> +				  struct drm_syncobj **stack_syncobjs,
> +				  u32 stack_count,
>   				  struct drm_syncobj ***syncobjs_out)
>   {
>   	struct drm_syncobj **syncobjs;
> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
>   	if (!access_ok(handles, count * sizeof(*handles)))
>   		return -EFAULT;
>   
> -	syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
> -	if (!syncobjs)
> -		return -ENOMEM;
> +	if (count > stack_count) {

I believe it's worth adding a comment mentioning that using the stack
syncobj is a fast-path that covers most cases.

> +		syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
> +		if (!syncobjs)
> +			return -ENOMEM;
> +	} else {
> +		syncobjs = stack_syncobjs;
> +	}
>   
>   	for (i = 0; i < count; i++) {
>   		u64 handle;
> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
>   			drm_syncobj_put(syncobjs[i]);
>   		i--;
>   	}
> -	kfree(syncobjs);
> +
> +	if (syncobjs != stack_syncobjs)

Again, I have a slight preference to make `syncobjs = NULL` and avoid
this if condition. But it's just a personal preference.

> +		kfree(syncobjs);
>   
>   	return ret;
>   }
>   
>   static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
> -				   uint32_t count)
> +				   uint32_t count,
> +				   struct drm_syncobj **stack_syncobjs)

IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
intuitive.

>   {
>   	uint32_t i;
>   
>   	for (i = 0; i < count; i++)
>   		drm_syncobj_put(syncobjs[i]);
> -	kfree(syncobjs);
> +
> +	if (syncobjs != stack_syncobjs)
> +		kfree(syncobjs);
>   }
>   
>   int
>   drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   		       struct drm_file *file_private)
>   {
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj_wait *args = data;
>   	ktime_t deadline, *pdeadline = NULL;
>   	u32 count = args->count_handles;
> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   						 &first,
>   						 pdeadline);
>   
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	if (timeout < 0)
>   		return timeout;
> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   				struct drm_file *file_private)
>   {
>   	struct drm_syncobj_timeline_wait *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];

Zero initialize it?

Best Regards,
- Maíra

>   	ktime_t deadline, *pdeadline = NULL;
>   	u32 count = args->count_handles;
>   	struct drm_syncobj **syncobjs;
> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   						 &first,
>   						 pdeadline);
>   
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	if (timeout < 0)
>   		return timeout;
> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *file_private)
>   {
>   	struct drm_syncobj_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint32_t i;
>   	int ret;
> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	for (i = 0; i < args->count_handles; i++)
>   		drm_syncobj_replace_fence(syncobjs[i], NULL);
>   
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return 0;
>   }
> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			 struct drm_file *file_private)
>   {
>   	struct drm_syncobj_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint32_t i;
>   	int ret;
> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			break;
>   	}
>   
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return ret;
>   }
> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   	struct drm_syncobj_timeline_array *args = data;
>   	uint64_t __user *points = u64_to_user_ptr(args->points);
>   	uint32_t i, j, count = args->count_handles;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	struct dma_fence_chain **chains;
>   	int ret;
> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   err_chains:
>   	kfree(chains);
>   out:
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	return ret;
>   }
> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private)
>   {
>   	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint64_t __user *points = u64_to_user_ptr(args->points);
>   	uint32_t i;
> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			break;
>   		}
>   	}
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return ret;
>   }
Tvrtko Ursulin March 25, 2025, 9:54 a.m. UTC | #2
On 24/03/2025 23:06, Maíra Canal wrote:
> Hi Tvrtko,
> 
> Some nits inline, mostly personal comments. In any case,
> 
> Reviewed-by: Maíra Canal <mcanal@igalia.com>
> 
> 
> On 18/03/25 12:54, Tvrtko Ursulin wrote:
>> Running the Cyberpunk 2077 benchmark we can observe that the lookup 
>> helper
>> is relatively hot, but the 97% of the calls are for a single object. (~3%
>> for two points, and never more than three points. While a more trivial
>> workload like vkmark under Plasma is even more skewed to single point
>> lookups.)
>>
>> Therefore lets add a fast path to bypass the kmalloc_array/kfree and 
>> use a
>> pre-allocated stack array for those cases.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>> ---
>>   drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>>   1 file changed, 41 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/ 
>> drm_syncobj.c
>> index 94932b89298f..233bdef53c87 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>>   static int drm_syncobj_array_find(struct drm_file *file_private,
>>                     u32 __user *handles,
>>                     uint32_t count,
>> +                  struct drm_syncobj **stack_syncobjs,
>> +                  u32 stack_count,
>>                     struct drm_syncobj ***syncobjs_out)
>>   {
>>       struct drm_syncobj **syncobjs;
>> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct 
>> drm_file *file_private,
>>       if (!access_ok(handles, count * sizeof(*handles)))
>>           return -EFAULT;
>> -    syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>> -    if (!syncobjs)
>> -        return -ENOMEM;
>> +    if (count > stack_count) {
> 
> I believe it's worth adding a comment mentioning that using the stack
> syncobj is a fast-path that covers most cases.

Yep. But it didn't feel like here is the place so I added comments to 
where callers size the arrays. That however means there are two 
duplicated comments. Okay with you?

>> +        syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>> +        if (!syncobjs)
>> +            return -ENOMEM;
>> +    } else {
>> +        syncobjs = stack_syncobjs;
>> +    }
>>       for (i = 0; i < count; i++) {
>>           u64 handle;
>> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct 
>> drm_file *file_private,
>>               drm_syncobj_put(syncobjs[i]);
>>           i--;
>>       }
>> -    kfree(syncobjs);
>> +
>> +    if (syncobjs != stack_syncobjs)
> 
> Again, I have a slight preference to make `syncobjs = NULL` and avoid
> this if condition. But it's just a personal preference.

Pending clarifications from the other patch.

> 
>> +        kfree(syncobjs);
>>       return ret;
>>   }
>>   static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
>> -                   uint32_t count)
>> +                   uint32_t count,
>> +                   struct drm_syncobj **stack_syncobjs)
> 
> IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
> intuitive.

But count is not directly related to the size of the stack array in this 
function. I could make it a boolean perhaps like this:

static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
				   uint32_t count,
				   bool free_array)

And then in the callers:

drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs);

Would that be clearer?

> 
>>   {
>>       uint32_t i;
>>       for (i = 0; i < count; i++)
>>           drm_syncobj_put(syncobjs[i]);
>> -    kfree(syncobjs);
>> +
>> +    if (syncobjs != stack_syncobjs)
>> +        kfree(syncobjs);
>>   }
>>   int
>>   drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>                  struct drm_file *file_private)
>>   {
>> +    struct drm_syncobj *stack_syncobjs[4];
>>       struct drm_syncobj_wait *args = data;
>>       ktime_t deadline, *pdeadline = NULL;
>>       u32 count = args->count_handles;
>> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, 
>> void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        count,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, 
>> void *data,
>>                            &first,
>>                            pdeadline);
>> -    drm_syncobj_array_free(syncobjs, count);
>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>       if (timeout < 0)
>>           return timeout;
>> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct 
>> drm_device *dev, void *data,
>>                   struct drm_file *file_private)
>>   {
>>       struct drm_syncobj_timeline_wait *args = data;
>> +    struct drm_syncobj *stack_syncobjs[4];
> 
> Zero initialize it?

Do you see it is required?

Regards,

Tvrtko

>>       ktime_t deadline, *pdeadline = NULL;
>>       u32 count = args->count_handles;
>>       struct drm_syncobj **syncobjs;
>> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct 
>> drm_device *dev, void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        count,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct 
>> drm_device *dev, void *data,
>>                            &first,
>>                            pdeadline);
>> -    drm_syncobj_array_free(syncobjs, count);
>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>       if (timeout < 0)
>>           return timeout;
>> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>> void *data,
>>               struct drm_file *file_private)
>>   {
>>       struct drm_syncobj_array *args = data;
>> +    struct drm_syncobj *stack_syncobjs[4];
>>       struct drm_syncobj **syncobjs;
>>       uint32_t i;
>>       int ret;
>> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>> void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        args->count_handles,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>> void *data,
>>       for (i = 0; i < args->count_handles; i++)
>>           drm_syncobj_replace_fence(syncobjs[i], NULL);
>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>> stack_syncobjs);
>>       return 0;
>>   }
>> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, 
>> void *data,
>>                struct drm_file *file_private)
>>   {
>>       struct drm_syncobj_array *args = data;
>> +    struct drm_syncobj *stack_syncobjs[4];
>>       struct drm_syncobj **syncobjs;
>>       uint32_t i;
>>       int ret;
>> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, 
>> void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        args->count_handles,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, 
>> void *data,
>>               break;
>>       }
>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>> stack_syncobjs);
>>       return ret;
>>   }
>> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct 
>> drm_device *dev, void *data,
>>       struct drm_syncobj_timeline_array *args = data;
>>       uint64_t __user *points = u64_to_user_ptr(args->points);
>>       uint32_t i, j, count = args->count_handles;
>> +    struct drm_syncobj *stack_syncobjs[4];
>>       struct drm_syncobj **syncobjs;
>>       struct dma_fence_chain **chains;
>>       int ret;
>> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct 
>> drm_device *dev, void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        count,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct 
>> drm_device *dev, void *data,
>>   err_chains:
>>       kfree(chains);
>>   out:
>> -    drm_syncobj_array_free(syncobjs, count);
>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>       return ret;
>>   }
>> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *file_private)
>>   {
>>       struct drm_syncobj_timeline_array *args = data;
>> +    struct drm_syncobj *stack_syncobjs[4];
>>       struct drm_syncobj **syncobjs;
>>       uint64_t __user *points = u64_to_user_ptr(args->points);
>>       uint32_t i;
>> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device 
>> *dev, void *data,
>>       ret = drm_syncobj_array_find(file_private,
>>                        u64_to_user_ptr(args->handles),
>>                        args->count_handles,
>> +                     stack_syncobjs,
>> +                     ARRAY_SIZE(stack_syncobjs),
>>                        &syncobjs);
>>       if (ret < 0)
>>           return ret;
>> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device 
>> *dev, void *data,
>>               break;
>>           }
>>       }
>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>> stack_syncobjs);
>>       return ret;
>>   }
>
Maíra Canal March 25, 2025, 8:12 p.m. UTC | #3
Hi Tvrtko,

On 25/03/25 06:54, Tvrtko Ursulin wrote:
> 
> On 24/03/2025 23:06, Maíra Canal wrote:
>> Hi Tvrtko,
>>
>> Some nits inline, mostly personal comments. In any case,
>>
>> Reviewed-by: Maíra Canal <mcanal@igalia.com>
>>
>>
>> On 18/03/25 12:54, Tvrtko Ursulin wrote:
>>> Running the Cyberpunk 2077 benchmark we can observe that the lookup 
>>> helper
>>> is relatively hot, but the 97% of the calls are for a single object. 
>>> (~3%
>>> for two points, and never more than three points. While a more trivial
>>> workload like vkmark under Plasma is even more skewed to single point
>>> lookups.)
>>>
>>> Therefore lets add a fast path to bypass the kmalloc_array/kfree and 
>>> use a
>>> pre-allocated stack array for those cases.
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>> ---
>>>   drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>>>   1 file changed, 41 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/ 
>>> drm_syncobj.c
>>> index 94932b89298f..233bdef53c87 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>>>   static int drm_syncobj_array_find(struct drm_file *file_private,
>>>                     u32 __user *handles,
>>>                     uint32_t count,
>>> +                  struct drm_syncobj **stack_syncobjs,
>>> +                  u32 stack_count,
>>>                     struct drm_syncobj ***syncobjs_out)
>>>   {
>>>       struct drm_syncobj **syncobjs;
>>> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct 
>>> drm_file *file_private,
>>>       if (!access_ok(handles, count * sizeof(*handles)))
>>>           return -EFAULT;
>>> -    syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>>> -    if (!syncobjs)
>>> -        return -ENOMEM;
>>> +    if (count > stack_count) {
>>
>> I believe it's worth adding a comment mentioning that using the stack
>> syncobj is a fast-path that covers most cases.
> 
> Yep. But it didn't feel like here is the place so I added comments to 
> where callers size the arrays. That however means there are two 
> duplicated comments. Okay with you?

Sure.

> 
>>> +        syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>>> +        if (!syncobjs)
>>> +            return -ENOMEM;
>>> +    } else {
>>> +        syncobjs = stack_syncobjs;
>>> +    }
>>>       for (i = 0; i < count; i++) {
>>>           u64 handle;
>>> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct 
>>> drm_file *file_private,
>>>               drm_syncobj_put(syncobjs[i]);
>>>           i--;
>>>       }
>>> -    kfree(syncobjs);
>>> +
>>> +    if (syncobjs != stack_syncobjs)
>>
>> Again, I have a slight preference to make `syncobjs = NULL` and avoid
>> this if condition. But it's just a personal preference.
> 
> Pending clarifications from the other patch.

Nvm, it wasn't a good idea.

> 
>>
>>> +        kfree(syncobjs);
>>>       return ret;
>>>   }
>>>   static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
>>> -                   uint32_t count)
>>> +                   uint32_t count,
>>> +                   struct drm_syncobj **stack_syncobjs)
>>
>> IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
>> intuitive.
> 
> But count is not directly related to the size of the stack array in this 
 > function. I could make it a boolean perhaps like this:>
> static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
>                     uint32_t count,
>                     bool free_array)
> 
> And then in the callers:
> 
> drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs);
> 
> Would that be clearer?

Yeah, it does.

> 
>>
>>>   {
>>>       uint32_t i;
>>>       for (i = 0; i < count; i++)
>>>           drm_syncobj_put(syncobjs[i]);
>>> -    kfree(syncobjs);
>>> +
>>> +    if (syncobjs != stack_syncobjs)
>>> +        kfree(syncobjs);
>>>   }
>>>   int
>>>   drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>                  struct drm_file *file_private)
>>>   {
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>>       struct drm_syncobj_wait *args = data;
>>>       ktime_t deadline, *pdeadline = NULL;
>>>       u32 count = args->count_handles;
>>> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, 
>>> void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        count,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, 
>>> void *data,
>>>                            &first,
>>>                            pdeadline);
>>> -    drm_syncobj_array_free(syncobjs, count);
>>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>>       if (timeout < 0)
>>>           return timeout;
>>> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct 
>>> drm_device *dev, void *data,
>>>                   struct drm_file *file_private)
>>>   {
>>>       struct drm_syncobj_timeline_wait *args = data;
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>
>> Zero initialize it?
> 
> Do you see it is required?

Not required, I was just suggesting to double-check if it wasn't needed
indeed.

Best Regards,
- Maíra

> 
> Regards,
> 
> Tvrtko
> 
>>>       ktime_t deadline, *pdeadline = NULL;
>>>       u32 count = args->count_handles;
>>>       struct drm_syncobj **syncobjs;
>>> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct 
>>> drm_device *dev, void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        count,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct 
>>> drm_device *dev, void *data,
>>>                            &first,
>>>                            pdeadline);
>>> -    drm_syncobj_array_free(syncobjs, count);
>>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>>       if (timeout < 0)
>>>           return timeout;
>>> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>>> void *data,
>>>               struct drm_file *file_private)
>>>   {
>>>       struct drm_syncobj_array *args = data;
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>>       struct drm_syncobj **syncobjs;
>>>       uint32_t i;
>>>       int ret;
>>> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>>> void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        args->count_handles,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, 
>>> void *data,
>>>       for (i = 0; i < args->count_handles; i++)
>>>           drm_syncobj_replace_fence(syncobjs[i], NULL);
>>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>>> stack_syncobjs);
>>>       return 0;
>>>   }
>>> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>                struct drm_file *file_private)
>>>   {
>>>       struct drm_syncobj_array *args = data;
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>>       struct drm_syncobj **syncobjs;
>>>       uint32_t i;
>>>       int ret;
>>> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        args->count_handles,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>               break;
>>>       }
>>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>>> stack_syncobjs);
>>>       return ret;
>>>   }
>>> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct 
>>> drm_device *dev, void *data,
>>>       struct drm_syncobj_timeline_array *args = data;
>>>       uint64_t __user *points = u64_to_user_ptr(args->points);
>>>       uint32_t i, j, count = args->count_handles;
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>>       struct drm_syncobj **syncobjs;
>>>       struct dma_fence_chain **chains;
>>>       int ret;
>>> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct 
>>> drm_device *dev, void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        count,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct 
>>> drm_device *dev, void *data,
>>>   err_chains:
>>>       kfree(chains);
>>>   out:
>>> -    drm_syncobj_array_free(syncobjs, count);
>>> +    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>>       return ret;
>>>   }
>>> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device 
>>> *dev, void *data,
>>>                   struct drm_file *file_private)
>>>   {
>>>       struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj *stack_syncobjs[4];
>>>       struct drm_syncobj **syncobjs;
>>>       uint64_t __user *points = u64_to_user_ptr(args->points);
>>>       uint32_t i;
>>> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device 
>>> *dev, void *data,
>>>       ret = drm_syncobj_array_find(file_private,
>>>                        u64_to_user_ptr(args->handles),
>>>                        args->count_handles,
>>> +                     stack_syncobjs,
>>> +                     ARRAY_SIZE(stack_syncobjs),
>>>                        &syncobjs);
>>>       if (ret < 0)
>>>           return ret;
>>> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device 
>>> *dev, void *data,
>>>               break;
>>>           }
>>>       }
>>> -    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles, 
>>> stack_syncobjs);
>>>       return ret;
>>>   }
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 94932b89298f..233bdef53c87 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1223,6 +1223,8 @@  EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
 static int drm_syncobj_array_find(struct drm_file *file_private,
 				  u32 __user *handles,
 				  uint32_t count,
+				  struct drm_syncobj **stack_syncobjs,
+				  u32 stack_count,
 				  struct drm_syncobj ***syncobjs_out)
 {
 	struct drm_syncobj **syncobjs;
@@ -1232,9 +1234,13 @@  static int drm_syncobj_array_find(struct drm_file *file_private,
 	if (!access_ok(handles, count * sizeof(*handles)))
 		return -EFAULT;
 
-	syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
-	if (!syncobjs)
-		return -ENOMEM;
+	if (count > stack_count) {
+		syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
+		if (!syncobjs)
+			return -ENOMEM;
+	} else {
+		syncobjs = stack_syncobjs;
+	}
 
 	for (i = 0; i < count; i++) {
 		u64 handle;
@@ -1260,25 +1266,31 @@  static int drm_syncobj_array_find(struct drm_file *file_private,
 			drm_syncobj_put(syncobjs[i]);
 		i--;
 	}
-	kfree(syncobjs);
+
+	if (syncobjs != stack_syncobjs)
+		kfree(syncobjs);
 
 	return ret;
 }
 
 static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
-				   uint32_t count)
+				   uint32_t count,
+				   struct drm_syncobj **stack_syncobjs)
 {
 	uint32_t i;
 
 	for (i = 0; i < count; i++)
 		drm_syncobj_put(syncobjs[i]);
-	kfree(syncobjs);
+
+	if (syncobjs != stack_syncobjs)
+		kfree(syncobjs);
 }
 
 int
 drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_private)
 {
+	struct drm_syncobj *stack_syncobjs[4];
 	struct drm_syncobj_wait *args = data;
 	ktime_t deadline, *pdeadline = NULL;
 	u32 count = args->count_handles;
@@ -1304,6 +1316,8 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     count,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1321,7 +1335,7 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 						 &first,
 						 pdeadline);
 
-	drm_syncobj_array_free(syncobjs, count);
+	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
 
 	if (timeout < 0)
 		return timeout;
@@ -1336,6 +1350,7 @@  drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_private)
 {
 	struct drm_syncobj_timeline_wait *args = data;
+	struct drm_syncobj *stack_syncobjs[4];
 	ktime_t deadline, *pdeadline = NULL;
 	u32 count = args->count_handles;
 	struct drm_syncobj **syncobjs;
@@ -1361,6 +1376,8 @@  drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     count,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1378,7 +1395,7 @@  drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 						 &first,
 						 pdeadline);
 
-	drm_syncobj_array_free(syncobjs, count);
+	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
 
 	if (timeout < 0)
 		return timeout;
@@ -1496,6 +1513,7 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_private)
 {
 	struct drm_syncobj_array *args = data;
+	struct drm_syncobj *stack_syncobjs[4];
 	struct drm_syncobj **syncobjs;
 	uint32_t i;
 	int ret;
@@ -1512,6 +1530,8 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     args->count_handles,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1519,7 +1539,7 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 	for (i = 0; i < args->count_handles; i++)
 		drm_syncobj_replace_fence(syncobjs[i], NULL);
 
-	drm_syncobj_array_free(syncobjs, args->count_handles);
+	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
 
 	return 0;
 }
@@ -1529,6 +1549,7 @@  drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_private)
 {
 	struct drm_syncobj_array *args = data;
+	struct drm_syncobj *stack_syncobjs[4];
 	struct drm_syncobj **syncobjs;
 	uint32_t i;
 	int ret;
@@ -1545,6 +1566,8 @@  drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     args->count_handles,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1555,7 +1578,7 @@  drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			break;
 	}
 
-	drm_syncobj_array_free(syncobjs, args->count_handles);
+	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
 
 	return ret;
 }
@@ -1567,6 +1590,7 @@  drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj_timeline_array *args = data;
 	uint64_t __user *points = u64_to_user_ptr(args->points);
 	uint32_t i, j, count = args->count_handles;
+	struct drm_syncobj *stack_syncobjs[4];
 	struct drm_syncobj **syncobjs;
 	struct dma_fence_chain **chains;
 	int ret;
@@ -1586,6 +1610,8 @@  drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     count,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1622,7 +1648,7 @@  drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 err_chains:
 	kfree(chains);
 out:
-	drm_syncobj_array_free(syncobjs, count);
+	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
 
 	return ret;
 }
@@ -1631,6 +1657,7 @@  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private)
 {
 	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj *stack_syncobjs[4];
 	struct drm_syncobj **syncobjs;
 	uint64_t __user *points = u64_to_user_ptr(args->points);
 	uint32_t i;
@@ -1651,6 +1678,8 @@  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
 				     args->count_handles,
+				     stack_syncobjs,
+				     ARRAY_SIZE(stack_syncobjs),
 				     &syncobjs);
 	if (ret < 0)
 		return ret;
@@ -1694,7 +1723,7 @@  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			break;
 		}
 	}
-	drm_syncobj_array_free(syncobjs, args->count_handles);
+	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
 
 	return ret;
 }