diff mbox series

[v5,07/21] gpu: host1x: Introduce UAPI header

Message ID 20210111130019.3515669-8-mperttunen@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Host1x/TegraDRM UAPI | expand

Commit Message

Mikko Perttunen Jan. 11, 2021, 1 p.m. UTC
Add the userspace interface header, specifying interfaces
for allocating and accessing syncpoints from userspace,
and for creating sync_file based fences based on syncpoint
thresholds.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 include/uapi/linux/host1x.h | 134 ++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 include/uapi/linux/host1x.h

Comments

Thierry Reding March 23, 2021, 10:52 a.m. UTC | #1
On Mon, Jan 11, 2021 at 03:00:05PM +0200, Mikko Perttunen wrote:
> Add the userspace interface header, specifying interfaces
> for allocating and accessing syncpoints from userspace,
> and for creating sync_file based fences based on syncpoint
> thresholds.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  include/uapi/linux/host1x.h | 134 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 134 insertions(+)
>  create mode 100644 include/uapi/linux/host1x.h

What's the number of these syncpoints that we expect userspace to
create? There's a limited amount of open file descriptors available by
default, so this needs to be kept reasonably low.

> diff --git a/include/uapi/linux/host1x.h b/include/uapi/linux/host1x.h
> new file mode 100644
> index 000000000000..9c8fb9425cb2
> --- /dev/null
> +++ b/include/uapi/linux/host1x.h
> @@ -0,0 +1,134 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/* Copyright (c) 2020 NVIDIA Corporation */
> +
> +#ifndef _UAPI__LINUX_HOST1X_H
> +#define _UAPI__LINUX_HOST1X_H
> +
> +#include <linux/ioctl.h>
> +#include <linux/types.h>
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +struct host1x_allocate_syncpoint {
> +	/**
> +	 * @fd: [out]
> +	 *
> +	 * New file descriptor representing the allocated syncpoint.
> +	 */
> +	__s32 fd;
> +
> +	__u32 reserved[3];
> +};
> +
> +struct host1x_syncpoint_info {
> +	/**
> +	 * @id: [out]
> +	 *
> +	 * System-global ID of the syncpoint.
> +	 */
> +	__u32 id;
> +
> +	__u32 reserved[3];
> +};

Given that this has only out parameters, I expect this will be called on
the FD returned by HOST1X_IOCTL_ALLOCATE_SYNCPOINT? It might be worth
pointing that out explicitly in a comment.

> +
> +struct host1x_syncpoint_increment {
> +	/**
> +	 * @count: [in]
> +	 *
> +	 * Number of times to increment the syncpoint. The syncpoint can
> +	 * be observed at in-between values, but each increment is atomic.
> +	 */
> +	__u32 count;
> +};

This seems like it would have to be called on the FD as well...

> +
> +struct host1x_read_syncpoint {
> +	/**
> +	 * @id: [in]
> +	 *
> +	 * ID of the syncpoint to read.
> +	 */
> +	__u32 id;
> +
> +	/**
> +	 * @value: [out]
> +	 *
> +	 * Current value of the syncpoint.
> +	 */
> +	__u32 value;
> +};

... but then, all of a sudden you seem to switch things around and allow
reading the value of an arbitrary syncpoint specified by ID.

Now, I suspect that's because reading the syncpoint is harmless and does
not allow abuse, whereas incrementing could be abused if allowed on an
arbitrary syncpoint ID. But I think it's worth spelling all that out in
some documentation to make this clear from a security point of view and
from a usability point of view for people trying to figure out how to
use these interfaces.

> +
> +struct host1x_create_fence {
> +	/**
> +	 * @id: [in]
> +	 *
> +	 * ID of the syncpoint to create a fence for.
> +	 */
> +	__u32 id;
> +
> +	/**
> +	 * @threshold: [in]
> +	 *
> +	 * When the syncpoint reaches this value, the fence will be signaled.
> +	 * The syncpoint is considered to have reached the threshold when the
> +	 * following condition is true:
> +	 *
> +	 * 	((value - threshold) & 0x80000000U) == 0U
> +	 *
> +	 */
> +	__u32 threshold;
> +
> +	/**
> +	 * @fence_fd: [out]
> +	 *
> +	 * New sync_file file descriptor containing the created fence.
> +	 */
> +	__s32 fence_fd;
> +
> +	__u32 reserved[1];
> +};

Again this takes an arbitrary syncpoint ID as input, so I expect that
the corresponding IOCTL will have to be called on the host1x device
node? Again, I think it would be good to either point that out for each
structure or IOCTL, or alternatively maybe reorder these such that this
becomes clearer.

> +
> +struct host1x_fence_extract_fence {
> +	__u32 id;
> +	__u32 threshold;
> +};
> +
> +struct host1x_fence_extract {
> +	/**
> +	 * @fence_fd: [in]
> +	 *
> +	 * sync_file file descriptor
> +	 */
> +	__s32 fence_fd;
> +
> +	/**
> +	 * @num_fences: [in,out]
> +	 *
> +	 * In: size of the `fences_ptr` array counted in elements.
> +	 * Out: required size of the `fences_ptr` array counted in elements.
> +	 */
> +	__u32 num_fences;
> +
> +	/**
> +	 * @fences_ptr: [in]
> +	 *
> +	 * Pointer to array of `struct host1x_fence_extract_fence`.
> +	 */
> +	__u64 fences_ptr;
> +
> +	__u32 reserved[2];
> +};

For the others it's pretty clear to me what the purpose is, but I'm at a
complete loss with this one. What's the use-case for this?

In general I think it'd make sense to add a bit more documentation about
how all these IOCTLs are meant to be used to give people a better
understanding of why these are needed.

Thierry

> +
> +#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct host1x_allocate_syncpoint)
> +#define HOST1X_IOCTL_READ_SYNCPOINT      _IOR ('X', 0x01, struct host1x_read_syncpoint)
> +#define HOST1X_IOCTL_CREATE_FENCE        _IOWR('X', 0x02, struct host1x_create_fence)
> +#define HOST1X_IOCTL_SYNCPOINT_INFO      _IOWR('X', 0x03, struct host1x_syncpoint_info)
> +#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct host1x_syncpoint_increment)
> +#define HOST1X_IOCTL_FENCE_EXTRACT       _IOWR('X', 0x05, struct host1x_fence_extract)
> +
> +#if defined(__cplusplus)
> +}
> +#endif
> +
> +#endif
> -- 
> 2.30.0
>
Mikko Perttunen March 23, 2021, 11:12 a.m. UTC | #2
On 3/23/21 12:52 PM, Thierry Reding wrote:
> On Mon, Jan 11, 2021 at 03:00:05PM +0200, Mikko Perttunen wrote:
>> Add the userspace interface header, specifying interfaces
>> for allocating and accessing syncpoints from userspace,
>> and for creating sync_file based fences based on syncpoint
>> thresholds.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>>   include/uapi/linux/host1x.h | 134 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 134 insertions(+)
>>   create mode 100644 include/uapi/linux/host1x.h
> 
> What's the number of these syncpoints that we expect userspace to
> create? There's a limited amount of open file descriptors available by
> default, so this needs to be kept reasonably low.
> 
>> diff --git a/include/uapi/linux/host1x.h b/include/uapi/linux/host1x.h
>> new file mode 100644
>> index 000000000000..9c8fb9425cb2
>> --- /dev/null
>> +++ b/include/uapi/linux/host1x.h
>> @@ -0,0 +1,134 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/* Copyright (c) 2020 NVIDIA Corporation */
>> +
>> +#ifndef _UAPI__LINUX_HOST1X_H
>> +#define _UAPI__LINUX_HOST1X_H
>> +
>> +#include <linux/ioctl.h>
>> +#include <linux/types.h>
>> +
>> +#if defined(__cplusplus)
>> +extern "C" {
>> +#endif
>> +
>> +struct host1x_allocate_syncpoint {
>> +	/**
>> +	 * @fd: [out]
>> +	 *
>> +	 * New file descriptor representing the allocated syncpoint.
>> +	 */
>> +	__s32 fd;
>> +
>> +	__u32 reserved[3];
>> +};
>> +
>> +struct host1x_syncpoint_info {
>> +	/**
>> +	 * @id: [out]
>> +	 *
>> +	 * System-global ID of the syncpoint.
>> +	 */
>> +	__u32 id;
>> +
>> +	__u32 reserved[3];
>> +};
> 
> Given that this has only out parameters, I expect this will be called on
> the FD returned by HOST1X_IOCTL_ALLOCATE_SYNCPOINT? It might be worth
> pointing that out explicitly in a comment.
> 

Correct.

>> +
>> +struct host1x_syncpoint_increment {
>> +	/**
>> +	 * @count: [in]
>> +	 *
>> +	 * Number of times to increment the syncpoint. The syncpoint can
>> +	 * be observed at in-between values, but each increment is atomic.
>> +	 */
>> +	__u32 count;
>> +};
> 
> This seems like it would have to be called on the FD as well...

Yep.

> 
>> +
>> +struct host1x_read_syncpoint {
>> +	/**
>> +	 * @id: [in]
>> +	 *
>> +	 * ID of the syncpoint to read.
>> +	 */
>> +	__u32 id;
>> +
>> +	/**
>> +	 * @value: [out]
>> +	 *
>> +	 * Current value of the syncpoint.
>> +	 */
>> +	__u32 value;
>> +};
> 
> ... but then, all of a sudden you seem to switch things around and allow
> reading the value of an arbitrary syncpoint specified by ID.
> 
> Now, I suspect that's because reading the syncpoint is harmless and does
> not allow abuse, whereas incrementing could be abused if allowed on an
> arbitrary syncpoint ID. But I think it's worth spelling all that out in
> some documentation to make this clear from a security point of view and
> from a usability point of view for people trying to figure out how to
> use these interfaces.

Yeah. The model is that reading any syncpoint is OK but writing is not. 
I think these things were mentioned in the original proposal text but I 
did not carry them over to the comments. Will fix (however see below)

> 
>> +
>> +struct host1x_create_fence {
>> +	/**
>> +	 * @id: [in]
>> +	 *
>> +	 * ID of the syncpoint to create a fence for.
>> +	 */
>> +	__u32 id;
>> +
>> +	/**
>> +	 * @threshold: [in]
>> +	 *
>> +	 * When the syncpoint reaches this value, the fence will be signaled.
>> +	 * The syncpoint is considered to have reached the threshold when the
>> +	 * following condition is true:
>> +	 *
>> +	 * 	((value - threshold) & 0x80000000U) == 0U
>> +	 *
>> +	 */
>> +	__u32 threshold;
>> +
>> +	/**
>> +	 * @fence_fd: [out]
>> +	 *
>> +	 * New sync_file file descriptor containing the created fence.
>> +	 */
>> +	__s32 fence_fd;
>> +
>> +	__u32 reserved[1];
>> +};
> 
> Again this takes an arbitrary syncpoint ID as input, so I expect that
> the corresponding IOCTL will have to be called on the host1x device
> node? Again, I think it would be good to either point that out for each
> structure or IOCTL, or alternatively maybe reorder these such that this
> becomes clearer.
> 
>> +
>> +struct host1x_fence_extract_fence {
>> +	__u32 id;
>> +	__u32 threshold;
>> +};
>> +
>> +struct host1x_fence_extract {
>> +	/**
>> +	 * @fence_fd: [in]
>> +	 *
>> +	 * sync_file file descriptor
>> +	 */
>> +	__s32 fence_fd;
>> +
>> +	/**
>> +	 * @num_fences: [in,out]
>> +	 *
>> +	 * In: size of the `fences_ptr` array counted in elements.
>> +	 * Out: required size of the `fences_ptr` array counted in elements.
>> +	 */
>> +	__u32 num_fences;
>> +
>> +	/**
>> +	 * @fences_ptr: [in]
>> +	 *
>> +	 * Pointer to array of `struct host1x_fence_extract_fence`.
>> +	 */
>> +	__u64 fences_ptr;
>> +
>> +	__u32 reserved[2];
>> +};
> 
> For the others it's pretty clear to me what the purpose is, but I'm at a
> complete loss with this one. What's the use-case for this?

This is needed to process incoming prefences for userspace-programmed 
engines -- mainly, the GPU with usermode submit enabled.

To align with other upstream code, I've been thinking of removing this 
whole UAPI; moving the syncpoint allocation part to the DRM UAPI, and 
dropping the sync_file stuff altogether (if we have support for job 
submission outputting syncobjs, those could still be converted into 
sync_files). This doesn't support usecases like GPU usermode submit, so 
for downstream we'll have to add it back in, though. Would like to hear 
your opinion on it as well.

Mikko

> 
> In general I think it'd make sense to add a bit more documentation about
> how all these IOCTLs are meant to be used to give people a better
> understanding of why these are needed.
> 
> Thierry
> 
>> +
>> +#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct host1x_allocate_syncpoint)
>> +#define HOST1X_IOCTL_READ_SYNCPOINT      _IOR ('X', 0x01, struct host1x_read_syncpoint)
>> +#define HOST1X_IOCTL_CREATE_FENCE        _IOWR('X', 0x02, struct host1x_create_fence)
>> +#define HOST1X_IOCTL_SYNCPOINT_INFO      _IOWR('X', 0x03, struct host1x_syncpoint_info)
>> +#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct host1x_syncpoint_increment)
>> +#define HOST1X_IOCTL_FENCE_EXTRACT       _IOWR('X', 0x05, struct host1x_fence_extract)
>> +
>> +#if defined(__cplusplus)
>> +}
>> +#endif
>> +
>> +#endif
>> -- 
>> 2.30.0
>>
Thierry Reding March 23, 2021, 11:43 a.m. UTC | #3
On Tue, Mar 23, 2021 at 01:12:36PM +0200, Mikko Perttunen wrote:
> On 3/23/21 12:52 PM, Thierry Reding wrote:
> > On Mon, Jan 11, 2021 at 03:00:05PM +0200, Mikko Perttunen wrote:
[...]
> > > +struct host1x_fence_extract_fence {
> > > +	__u32 id;
> > > +	__u32 threshold;
> > > +};
> > > +
> > > +struct host1x_fence_extract {
> > > +	/**
> > > +	 * @fence_fd: [in]
> > > +	 *
> > > +	 * sync_file file descriptor
> > > +	 */
> > > +	__s32 fence_fd;
> > > +
> > > +	/**
> > > +	 * @num_fences: [in,out]
> > > +	 *
> > > +	 * In: size of the `fences_ptr` array counted in elements.
> > > +	 * Out: required size of the `fences_ptr` array counted in elements.
> > > +	 */
> > > +	__u32 num_fences;
> > > +
> > > +	/**
> > > +	 * @fences_ptr: [in]
> > > +	 *
> > > +	 * Pointer to array of `struct host1x_fence_extract_fence`.
> > > +	 */
> > > +	__u64 fences_ptr;
> > > +
> > > +	__u32 reserved[2];
> > > +};
> > 
> > For the others it's pretty clear to me what the purpose is, but I'm at a
> > complete loss with this one. What's the use-case for this?
> 
> This is needed to process incoming prefences for userspace-programmed
> engines -- mainly, the GPU with usermode submit enabled.

I'm not sure what GPU usermode submit is. The name would imply that it's
somehow a mechanism to submit work to the GPU without getting the kernel
involved at all. That's something we'd have to clarify with the Nouveau
team to see if it's something they'd consider implementing, or implement
it ourselves.

Currently there's no interoperation at the syncpoint level between
Nouveau and Tegra DRM, so Nouveau on Tegra doesn't use any syncpoints at
all and hence there's currently no use at all for this kind of API.

> To align with other upstream code, I've been thinking of removing this whole
> UAPI; moving the syncpoint allocation part to the DRM UAPI, and dropping the
> sync_file stuff altogether (if we have support for job submission outputting
> syncobjs, those could still be converted into sync_files). This doesn't
> support usecases like GPU usermode submit, so for downstream we'll have to
> add it back in, though. Would like to hear your opinion on it as well.

That certainly sounds like a much easier sell because we have use-cases
for all of that. Along with your patches for NVDEC, the existing
userspace for VIC and your work-in-progress NVDEC userspace, this should
cover all the requirements.

Long story short, I think we have some ground to cover before we can
start thinking about how to do GPU usermode submits in an upstream
stack. As such we have no clear idea of what this is going to look like
in the end, or if it's going to be supported at all, so I think it'd be
best to move forward with your alternate proposal and move the syncpoint
functionality into Tegra DRM so that it can be used for VIC, NVDEC and
potentially other engines. If we ever get to the point of having to
support GPU usermode submit, we can take another look at how best to
support it.

Thierry
diff mbox series

Patch

diff --git a/include/uapi/linux/host1x.h b/include/uapi/linux/host1x.h
new file mode 100644
index 000000000000..9c8fb9425cb2
--- /dev/null
+++ b/include/uapi/linux/host1x.h
@@ -0,0 +1,134 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _UAPI__LINUX_HOST1X_H
+#define _UAPI__LINUX_HOST1X_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct host1x_allocate_syncpoint {
+	/**
+	 * @fd: [out]
+	 *
+	 * New file descriptor representing the allocated syncpoint.
+	 */
+	__s32 fd;
+
+	__u32 reserved[3];
+};
+
+struct host1x_syncpoint_info {
+	/**
+	 * @id: [out]
+	 *
+	 * System-global ID of the syncpoint.
+	 */
+	__u32 id;
+
+	__u32 reserved[3];
+};
+
+struct host1x_syncpoint_increment {
+	/**
+	 * @count: [in]
+	 *
+	 * Number of times to increment the syncpoint. The syncpoint can
+	 * be observed at in-between values, but each increment is atomic.
+	 */
+	__u32 count;
+};
+
+struct host1x_read_syncpoint {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of the syncpoint to read.
+	 */
+	__u32 id;
+
+	/**
+	 * @value: [out]
+	 *
+	 * Current value of the syncpoint.
+	 */
+	__u32 value;
+};
+
+struct host1x_create_fence {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of the syncpoint to create a fence for.
+	 */
+	__u32 id;
+
+	/**
+	 * @threshold: [in]
+	 *
+	 * When the syncpoint reaches this value, the fence will be signaled.
+	 * The syncpoint is considered to have reached the threshold when the
+	 * following condition is true:
+	 *
+	 * 	((value - threshold) & 0x80000000U) == 0U
+	 *
+	 */
+	__u32 threshold;
+
+	/**
+	 * @fence_fd: [out]
+	 *
+	 * New sync_file file descriptor containing the created fence.
+	 */
+	__s32 fence_fd;
+
+	__u32 reserved[1];
+};
+
+struct host1x_fence_extract_fence {
+	__u32 id;
+	__u32 threshold;
+};
+
+struct host1x_fence_extract {
+	/**
+	 * @fence_fd: [in]
+	 *
+	 * sync_file file descriptor
+	 */
+	__s32 fence_fd;
+
+	/**
+	 * @num_fences: [in,out]
+	 *
+	 * In: size of the `fences_ptr` array counted in elements.
+	 * Out: required size of the `fences_ptr` array counted in elements.
+	 */
+	__u32 num_fences;
+
+	/**
+	 * @fences_ptr: [in]
+	 *
+	 * Pointer to array of `struct host1x_fence_extract_fence`.
+	 */
+	__u64 fences_ptr;
+
+	__u32 reserved[2];
+};
+
+#define HOST1X_IOCTL_ALLOCATE_SYNCPOINT  _IOWR('X', 0x00, struct host1x_allocate_syncpoint)
+#define HOST1X_IOCTL_READ_SYNCPOINT      _IOR ('X', 0x01, struct host1x_read_syncpoint)
+#define HOST1X_IOCTL_CREATE_FENCE        _IOWR('X', 0x02, struct host1x_create_fence)
+#define HOST1X_IOCTL_SYNCPOINT_INFO      _IOWR('X', 0x03, struct host1x_syncpoint_info)
+#define HOST1X_IOCTL_SYNCPOINT_INCREMENT _IOWR('X', 0x04, struct host1x_syncpoint_increment)
+#define HOST1X_IOCTL_FENCE_EXTRACT       _IOWR('X', 0x05, struct host1x_fence_extract)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif