diff mbox series

[8/9] drm/syncobj: add timeline signal ioctl for syncobj v5

Message ID 20190325083224.2983-8-david1.zhou@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/9] dma-buf: add new dma_fence_chain container v7 | expand

Commit Message

Chunming Zhou March 25, 2019, 8:32 a.m. UTC
v2: individually allocate chain array, since chain node is free independently.
v3: all existing points must be already signaled before cpu perform signal operation,
    so add check condition for that.
v4: remove v3 change and add checking to prevent out-of-order
v5: unify binary and timeline

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Tobias Hector <Tobias.Hector@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c    |  2 +
 drivers/gpu/drm/drm_syncobj.c  | 73 ++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  1 +
 4 files changed, 78 insertions(+)

Comments

Lionel Landwerlin March 28, 2019, 12:53 p.m. UTC | #1
On 25/03/2019 08:32, Chunming Zhou wrote:
> v2: individually allocate chain array, since chain node is free independently.
> v3: all existing points must be already signaled before cpu perform signal operation,
>      so add check condition for that.
> v4: remove v3 change and add checking to prevent out-of-order
> v5: unify binary and timeline
>
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Cc: Tobias Hector <Tobias.Hector@amd.com>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Dave Airlie <airlied@redhat.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/drm_internal.h |  2 +
>   drivers/gpu/drm/drm_ioctl.c    |  2 +
>   drivers/gpu/drm/drm_syncobj.c  | 73 ++++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         |  1 +
>   4 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index dd11ae5f1eef..d9a483a5fce0 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file_private);
> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				      struct drm_file *file_private);
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 92b3b7b2fd81..d337f161909c 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index ee2d66e047e7..099596190845 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +int
> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				  struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	struct dma_fence_chain **chains;
> +	uint64_t *points;
> +	uint32_t i, j;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -EOPNOTSUPP;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	points = kmalloc_array(args->count_handles, sizeof(*points),
> +			       GFP_KERNEL);
> +	if (!points) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	if (!u64_to_user_ptr(args->points)) {
> +		memset(points, 0, args->count_handles * sizeof(uint64_t));
> +	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
> +				  sizeof(uint64_t) * args->count_handles)) {
> +		ret = -EFAULT;
> +		goto err_points;
> +	}
> +
> +	chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL);
> +	if (!chains) {
> +		ret = -ENOMEM;
> +		goto err_points;
> +	}
> +	for (i = 0; i < args->count_handles; i++) {
> +		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +		if (!chains[i]) {
> +			for (j = 0; j < i; j++)
> +				kfree(chains[j]);
> +			ret = -ENOMEM;
> +			goto err_chains;
> +		}
> +	}
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		struct dma_fence *fence = dma_fence_get_stub();
> +
> +		drm_syncobj_add_point(syncobjs[i], chains[i],
> +				      fence, points[i]);
> +		dma_fence_put(fence);
> +	}


I think I found an issue where the implementation doesn't match what the 
spec requires on host signaling.

Consider the following :


Timeline semaphore has a value of 4.

A device submits a commands to signal point 5.

The host signals point 6.


The value of the timeline will remain 4 until the device commands 
complete, at which point it will change to 6.

But the specification seems to say that the timeline value should be 6 
once the host signaling completes.


The only option that I see working would be to add point 6 in the 
timeline and wait on that point to complete.


Christian, David, what do you think?


Thanks,


-Lionel


> +err_chains:
> +	kfree(chains);
> +err_points:
> +	kfree(points);
> +out:
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> +
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private)
>   {
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index e8d0d6b51875..236b01a1fabf 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -943,6 +943,7 @@ extern "C" {
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>   #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers
Chunming Zhou March 28, 2019, 1:08 p.m. UTC | #2
在 2019/3/28 20:53, Lionel Landwerlin 写道:
> On 25/03/2019 08:32, Chunming Zhou wrote:
>> v2: individually allocate chain array, since chain node is free 
>> independently.
>> v3: all existing points must be already signaled before cpu perform 
>> signal operation,
>>      so add check condition for that.
>> v4: remove v3 change and add checking to prevent out-of-order
>> v5: unify binary and timeline
>>
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Tobias Hector <Tobias.Hector@amd.com>
>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>> Cc: Dave Airlie <airlied@redhat.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/drm_internal.h |  2 +
>>   drivers/gpu/drm/drm_ioctl.c    |  2 +
>>   drivers/gpu/drm/drm_syncobj.c  | 73 ++++++++++++++++++++++++++++++++++
>>   include/uapi/drm/drm.h         |  1 +
>>   4 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h 
>> b/drivers/gpu/drm/drm_internal.h
>> index dd11ae5f1eef..d9a483a5fce0 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *file_private);
>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>                    struct drm_file *file_private);
>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
>> *data,
>> +                      struct drm_file *file_private);
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private);
>>   diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index 92b3b7b2fd81..d337f161909c 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
>> drm_syncobj_timeline_signal_ioctl,
>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index ee2d66e047e7..099596190845 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device 
>> *dev, void *data,
>>       return ret;
>>   }
>>   +int
>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>> +                  struct drm_file *file_private)
>> +{
>> +    struct drm_syncobj_timeline_array *args = data;
>> +    struct drm_syncobj **syncobjs;
>> +    struct dma_fence_chain **chains;
>> +    uint64_t *points;
>> +    uint32_t i, j;
>> +    int ret;
>> +
>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +        return -EOPNOTSUPP;
>> +
>> +    if (args->pad != 0)
>> +        return -EINVAL;
>> +
>> +    if (args->count_handles == 0)
>> +        return -EINVAL;
>> +
>> +    ret = drm_syncobj_array_find(file_private,
>> +                     u64_to_user_ptr(args->handles),
>> +                     args->count_handles,
>> +                     &syncobjs);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>> +                   GFP_KERNEL);
>> +    if (!points) {
>> +        ret = -ENOMEM;
>> +        goto out;
>> +    }
>> +    if (!u64_to_user_ptr(args->points)) {
>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>> +                  sizeof(uint64_t) * args->count_handles)) {
>> +        ret = -EFAULT;
>> +        goto err_points;
>> +    }
>> +
>> +    chains = kmalloc_array(args->count_handles, sizeof(void *), 
>> GFP_KERNEL);
>> +    if (!chains) {
>> +        ret = -ENOMEM;
>> +        goto err_points;
>> +    }
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        chains[i] = kzalloc(sizeof(struct dma_fence_chain), 
>> GFP_KERNEL);
>> +        if (!chains[i]) {
>> +            for (j = 0; j < i; j++)
>> +                kfree(chains[j]);
>> +            ret = -ENOMEM;
>> +            goto err_chains;
>> +        }
>> +    }
>> +
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        struct dma_fence *fence = dma_fence_get_stub();
>> +
>> +        drm_syncobj_add_point(syncobjs[i], chains[i],
>> +                      fence, points[i]);
>> +        dma_fence_put(fence);
>> +    }
>
>
> I think I found an issue where the implementation doesn't match what 
> the spec requires on host signaling.
>
> Consider the following :
>
>
> Timeline semaphore has a value of 4.
>
> A device submits a commands to signal point 5.
>
> The host signals point 6.
>
>
> The value of the timeline will remain 4 until the device commands 
> complete, at which point it will change to 6.
>
> But the specification seems to say that the timeline value should be 6 
> once the host signaling completes.
>
>
> The only option that I see working would be to add point 6 in the 
> timeline and wait on that point to complete.
>
>
> Christian, David, what do you think?

This is exactly what I said before, see V3 comment in the patch head.

But Jeson said this should be guranteed by user and application.

Btw, if we wait all previous points completion this ioctl could be blocked.


-David


>
>
> Thanks,
>
>
> -Lionel
>
>
>> +err_chains:
>> +    kfree(chains);
>> +err_points:
>> +    kfree(points);
>> +out:
>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +    return ret;
>> +}
>> +
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private)
>>   {
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index e8d0d6b51875..236b01a1fabf 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -943,6 +943,7 @@ extern "C" {
>>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct 
>> drm_syncobj_timeline_wait)
>>   #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>> drm_syncobj_timeline_array)
>>   #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct 
>> drm_syncobj_transfer)
>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, struct 
>> drm_syncobj_timeline_array)
>>     /**
>>    * Device specific ioctls should only be in their respective headers
>
>
Lionel Landwerlin March 28, 2019, 1:33 p.m. UTC | #3
On 28/03/2019 13:08, Chunming Zhou wrote:
> 在 2019/3/28 20:53, Lionel Landwerlin 写道:
>> On 25/03/2019 08:32, Chunming Zhou wrote:
>>> v2: individually allocate chain array, since chain node is free
>>> independently.
>>> v3: all existing points must be already signaled before cpu perform
>>> signal operation,
>>>       so add check condition for that.
>>> v4: remove v3 change and add checking to prevent out-of-order
>>> v5: unify binary and timeline
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Cc: Tobias Hector <Tobias.Hector@amd.com>
>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>> Cc: Dave Airlie <airlied@redhat.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>> ---
>>>    drivers/gpu/drm/drm_internal.h |  2 +
>>>    drivers/gpu/drm/drm_ioctl.c    |  2 +
>>>    drivers/gpu/drm/drm_syncobj.c  | 73 ++++++++++++++++++++++++++++++++++
>>>    include/uapi/drm/drm.h         |  1 +
>>>    4 files changed, 78 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>> b/drivers/gpu/drm/drm_internal.h
>>> index dd11ae5f1eef..d9a483a5fce0 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device
>>> *dev, void *data,
>>>                    struct drm_file *file_private);
>>>    int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>                     struct drm_file *file_private);
>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void
>>> *data,
>>> +                      struct drm_file *file_private);
>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>>    diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>> index 92b3b7b2fd81..d337f161909c 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL,
>>> drm_syncobj_timeline_signal_ioctl,
>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index ee2d66e047e7..099596190845 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device
>>> *dev, void *data,
>>>        return ret;
>>>    }
>>>    +int
>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>> +                  struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    struct dma_fence_chain **chains;
>>> +    uint64_t *points;
>>> +    uint32_t i, j;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>>> +                   GFP_KERNEL);
>>> +    if (!points) {
>>> +        ret = -ENOMEM;
>>> +        goto out;
>>> +    }
>>> +    if (!u64_to_user_ptr(args->points)) {
>>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>>> +                  sizeof(uint64_t) * args->count_handles)) {
>>> +        ret = -EFAULT;
>>> +        goto err_points;
>>> +    }
>>> +
>>> +    chains = kmalloc_array(args->count_handles, sizeof(void *),
>>> GFP_KERNEL);
>>> +    if (!chains) {
>>> +        ret = -ENOMEM;
>>> +        goto err_points;
>>> +    }
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        chains[i] = kzalloc(sizeof(struct dma_fence_chain),
>>> GFP_KERNEL);
>>> +        if (!chains[i]) {
>>> +            for (j = 0; j < i; j++)
>>> +                kfree(chains[j]);
>>> +            ret = -ENOMEM;
>>> +            goto err_chains;
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        struct dma_fence *fence = dma_fence_get_stub();
>>> +
>>> +        drm_syncobj_add_point(syncobjs[i], chains[i],
>>> +                      fence, points[i]);
>>> +        dma_fence_put(fence);
>>> +    }
>>
>> I think I found an issue where the implementation doesn't match what
>> the spec requires on host signaling.
>>
>> Consider the following :
>>
>>
>> Timeline semaphore has a value of 4.
>>
>> A device submits a commands to signal point 5.
>>
>> The host signals point 6.
>>
>>
>> The value of the timeline will remain 4 until the device commands
>> complete, at which point it will change to 6.
>>
>> But the specification seems to say that the timeline value should be 6
>> once the host signaling completes.
>>
>>
>> The only option that I see working would be to add point 6 in the
>> timeline and wait on that point to complete.
>>
>>
>> Christian, David, what do you think?
> This is exactly what I said before, see V3 comment in the patch head.
>
> But Jeson said this should be guranteed by user and application.
>
> Btw, if we wait all previous points completion this ioctl could be blocked.
>
>
> -David


I haven't seen anything that explicitly forbids that in the spec.

Trying to get a clarification.


-Lionel


>
>
>>
>> Thanks,
>>
>>
>> -Lionel
>>
>>
>>> +err_chains:
>>> +    kfree(chains);
>>> +err_points:
>>> +    kfree(points);
>>> +out:
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private)
>>>    {
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index e8d0d6b51875..236b01a1fabf 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -943,6 +943,7 @@ extern "C" {
>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct
>>> drm_syncobj_timeline_wait)
>>>    #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>> drm_syncobj_timeline_array)
>>>    #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>> drm_syncobj_transfer)
>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, struct
>>> drm_syncobj_timeline_array)
>>>      /**
>>>     * Device specific ioctls should only be in their respective headers
>>
Lionel Landwerlin March 30, 2019, 2:05 p.m. UTC | #4
On 28/03/2019 13:33, Lionel Landwerlin wrote:
> On 28/03/2019 13:08, Chunming Zhou wrote:
>> 在 2019/3/28 20:53, Lionel Landwerlin 写道:
>>> On 25/03/2019 08:32, Chunming Zhou wrote:
>>>> v2: individually allocate chain array, since chain node is free
>>>> independently.
>>>> v3: all existing points must be already signaled before cpu perform
>>>> signal operation,
>>>>       so add check condition for that.
>>>> v4: remove v3 change and add checking to prevent out-of-order
>>>> v5: unify binary and timeline
>>>>
>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>> Cc: Tobias Hector <Tobias.Hector@amd.com>
>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>> Cc: Dave Airlie <airlied@redhat.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/drm_internal.h |  2 +
>>>>    drivers/gpu/drm/drm_ioctl.c    |  2 +
>>>>    drivers/gpu/drm/drm_syncobj.c  | 73 
>>>> ++++++++++++++++++++++++++++++++++
>>>>    include/uapi/drm/drm.h         |  1 +
>>>>    4 files changed, 78 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>> b/drivers/gpu/drm/drm_internal.h
>>>> index dd11ae5f1eef..d9a483a5fce0 100644
>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device
>>>> *dev, void *data,
>>>>                    struct drm_file *file_private);
>>>>    int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>>                     struct drm_file *file_private);
>>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void
>>>> *data,
>>>> +                      struct drm_file *file_private);
>>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>                    struct drm_file *file_private);
>>>>    diff --git a/drivers/gpu/drm/drm_ioctl.c 
>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>> index 92b3b7b2fd81..d337f161909c 100644
>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] 
>>>> = {
>>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, 
>>>> drm_syncobj_signal_ioctl,
>>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL,
>>>> drm_syncobj_timeline_signal_ioctl,
>>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>> index ee2d66e047e7..099596190845 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device
>>>> *dev, void *data,
>>>>        return ret;
>>>>    }
>>>>    +int
>>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>>> +                  struct drm_file *file_private)
>>>> +{
>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>> +    struct drm_syncobj **syncobjs;
>>>> +    struct dma_fence_chain **chains;
>>>> +    uint64_t *points;
>>>> +    uint32_t i, j;
>>>> +    int ret;
>>>> +
>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>> +        return -EOPNOTSUPP;
>>>> +
>>>> +    if (args->pad != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (args->count_handles == 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    ret = drm_syncobj_array_find(file_private,
>>>> +                     u64_to_user_ptr(args->handles),
>>>> +                     args->count_handles,
>>>> +                     &syncobjs);
>>>> +    if (ret < 0)
>>>> +        return ret;
>>>> +
>>>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>>>> +                   GFP_KERNEL);
>>>> +    if (!points) {
>>>> +        ret = -ENOMEM;
>>>> +        goto out;
>>>> +    }
>>>> +    if (!u64_to_user_ptr(args->points)) {
>>>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>>>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>>>> +                  sizeof(uint64_t) * args->count_handles)) {
>>>> +        ret = -EFAULT;
>>>> +        goto err_points;
>>>> +    }
>>>> +
>>>> +    chains = kmalloc_array(args->count_handles, sizeof(void *),
>>>> GFP_KERNEL);
>>>> +    if (!chains) {
>>>> +        ret = -ENOMEM;
>>>> +        goto err_points;
>>>> +    }
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>> +        chains[i] = kzalloc(sizeof(struct dma_fence_chain),
>>>> GFP_KERNEL);
>>>> +        if (!chains[i]) {
>>>> +            for (j = 0; j < i; j++)
>>>> +                kfree(chains[j]);
>>>> +            ret = -ENOMEM;
>>>> +            goto err_chains;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>> +        struct dma_fence *fence = dma_fence_get_stub();
>>>> +
>>>> +        drm_syncobj_add_point(syncobjs[i], chains[i],
>>>> +                      fence, points[i]);
>>>> +        dma_fence_put(fence);
>>>> +    }
>>>
>>> I think I found an issue where the implementation doesn't match what
>>> the spec requires on host signaling.
>>>
>>> Consider the following :
>>>
>>>
>>> Timeline semaphore has a value of 4.
>>>
>>> A device submits a commands to signal point 5.
>>>
>>> The host signals point 6.
>>>
>>>
>>> The value of the timeline will remain 4 until the device commands
>>> complete, at which point it will change to 6.
>>>
>>> But the specification seems to say that the timeline value should be 6
>>> once the host signaling completes.
>>>
>>>
>>> The only option that I see working would be to add point 6 in the
>>> timeline and wait on that point to complete.
>>>
>>>
>>> Christian, David, what do you think?
>> This is exactly what I said before, see V3 comment in the patch head.
>>
>> But Jeson said this should be guranteed by user and application.
>>
>> Btw, if we wait all previous points completion this ioctl could be 
>> blocked.
>>
>>
>> -David
>
>
> I haven't seen anything that explicitly forbids that in the spec.
>
> Trying to get a clarification.
>
>
> -Lionel


So it looks like this won't be allowed by the vulkan spec.

Sorry, I genuinely thought this was allowed because we can end up in 
such situation on the device side :(


Not quite sure how useful it would be to have some checks here as most 
of the error handling can only be done within the add_point() function 
and requires taking the syncobj lock.


As much as I could like to see more error checking, I guess this isn't 
doable without more locking.


-Lionel


>
>
>>
>>
>>>
>>> Thanks,
>>>
>>>
>>> -Lionel
>>>
>>>
>>>> +err_chains:
>>>> +    kfree(chains);
>>>> +err_points:
>>>> +    kfree(points);
>>>> +out:
>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>                    struct drm_file *file_private)
>>>>    {
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index e8d0d6b51875..236b01a1fabf 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -943,6 +943,7 @@ extern "C" {
>>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct
>>>> drm_syncobj_timeline_wait)
>>>>    #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>> drm_syncobj_timeline_array)
>>>>    #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>>> drm_syncobj_transfer)
>>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, struct
>>>> drm_syncobj_timeline_array)
>>>>      /**
>>>>     * Device specific ioctls should only be in their respective 
>>>> headers
>>>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@  int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index ee2d66e047e7..099596190845 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,79 @@  drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	struct dma_fence_chain **chains;
+	uint64_t *points;
+	uint32_t i, j;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	points = kmalloc_array(args->count_handles, sizeof(*points),
+			       GFP_KERNEL);
+	if (!points) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!u64_to_user_ptr(args->points)) {
+		memset(points, 0, args->count_handles * sizeof(uint64_t));
+	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
+				  sizeof(uint64_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_points;
+	}
+
+	chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL);
+	if (!chains) {
+		ret = -ENOMEM;
+		goto err_points;
+	}
+	for (i = 0; i < args->count_handles; i++) {
+		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+		if (!chains[i]) {
+			for (j = 0; j < i; j++)
+				kfree(chains[j]);
+			ret = -ENOMEM;
+			goto err_chains;
+		}
+	}
+
+	for (i = 0; i < args->count_handles; i++) {
+		struct dma_fence *fence = dma_fence_get_stub();
+
+		drm_syncobj_add_point(syncobjs[i], chains[i],
+				      fence, points[i]);
+		dma_fence_put(fence);
+	}
+err_chains:
+	kfree(chains);
+err_points:
+	kfree(points);
+out:
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index e8d0d6b51875..236b01a1fabf 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -943,6 +943,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
 
 /**
  * Device specific ioctls should only be in their respective headers