diff mbox series

[v3,1/4] dma-buf: add support for virtio exported objects

Message ID 20200311112004.47138-2-stevensd@chromium.org (mailing list archive)
State New, archived
Headers show
Series Support virtio cross-device resources | expand

Commit Message

David Stevens March 11, 2020, 11:20 a.m. UTC
This change adds a new dma-buf operation that allows dma-bufs to be used
by virtio drivers to share exported objects. The new operation allows
the importing driver to query the exporting driver for the UUID which
identifies the underlying exported object.

Signed-off-by: David Stevens <stevensd@chromium.org>
---
 drivers/dma-buf/dma-buf.c | 12 ++++++++++++
 include/linux/dma-buf.h   | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)

Comments

Daniel Vetter May 13, 2020, 3:45 p.m. UTC | #1
On Wed, Mar 11, 2020 at 12:20 PM David Stevens <stevensd@chromium.org> wrote:
>
> This change adds a new dma-buf operation that allows dma-bufs to be used
> by virtio drivers to share exported objects. The new operation allows
> the importing driver to query the exporting driver for the UUID which
> identifies the underlying exported object.
>
> Signed-off-by: David Stevens <stevensd@chromium.org>

Adding Tomasz Figa, I've discussed this with him at elce last year I
think. Just to make sure.

Bunch of things:
- obviously we need the users of this in a few drivers, can't really
review anything stand-alone
- adding very specific ops to the generic interface is rather awkward,
eventually everyone wants that and we end up in a mess. I think the
best solution here would be if we create a struct virtio_dma_buf which
subclasses dma-buf, add a (hopefully safe) runtime upcasting
functions, and then a virtio_dma_buf_get_uuid() function. Just storing
the uuid should be doable (assuming this doesn't change during the
lifetime of the buffer), so no need for a callback.
- for the runtime upcasting the usual approach is to check the ->ops
pointer. Which means that would need to be the same for all virtio
dma_bufs, which might get a bit awkward. But I'd really prefer we not
add allocator specific stuff like this to dma-buf.
-Daniel

> ---
>  drivers/dma-buf/dma-buf.c | 12 ++++++++++++
>  include/linux/dma-buf.h   | 18 ++++++++++++++++++
>  2 files changed, 30 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index d4097856c86b..fa5210ba6aaa 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -1158,6 +1158,18 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_vunmap);
>
> +int dma_buf_get_uuid(struct dma_buf *dmabuf, uuid_t *uuid)
> +{
> +       if (WARN_ON(!dmabuf) || !uuid)
> +               return -EINVAL;
> +
> +       if (!dmabuf->ops->get_uuid)
> +               return -ENODEV;
> +
> +       return dmabuf->ops->get_uuid(dmabuf, uuid);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_get_uuid);
> +
>  #ifdef CONFIG_DEBUG_FS
>  static int dma_buf_debug_show(struct seq_file *s, void *unused)
>  {
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index abf5459a5b9d..00758523597d 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -251,6 +251,21 @@ struct dma_buf_ops {
>
>         void *(*vmap)(struct dma_buf *);
>         void (*vunmap)(struct dma_buf *, void *vaddr);
> +
> +       /**
> +        * @get_uuid
> +        *
> +        * This is called by dma_buf_get_uuid to get the UUID which identifies
> +        * the buffer to virtio devices.
> +        *
> +        * This callback is optional.
> +        *
> +        * Returns:
> +        *
> +        * 0 on success or a negative error code on failure. On success uuid
> +        * will be populated with the buffer's UUID.
> +        */
> +       int (*get_uuid)(struct dma_buf *dmabuf, uuid_t *uuid);
>  };
>
>  /**
> @@ -444,4 +459,7 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
>                  unsigned long);
>  void *dma_buf_vmap(struct dma_buf *);
>  void dma_buf_vunmap(struct dma_buf *, void *vaddr);
> +
> +int dma_buf_get_uuid(struct dma_buf *dmabuf, uuid_t *uuid);
> +
>  #endif /* __DMA_BUF_H__ */
> --
> 2.25.1.481.gfbce0eb801-goog
>
David Stevens May 14, 2020, 2:08 a.m. UTC | #2
On Thu, May 14, 2020 at 12:45 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Mar 11, 2020 at 12:20 PM David Stevens <stevensd@chromium.org> wrote:
> >
> > This change adds a new dma-buf operation that allows dma-bufs to be used
> > by virtio drivers to share exported objects. The new operation allows
> > the importing driver to query the exporting driver for the UUID which
> > identifies the underlying exported object.
> >
> > Signed-off-by: David Stevens <stevensd@chromium.org>
>
> Adding Tomasz Figa, I've discussed this with him at elce last year I
> think. Just to make sure.
>
> Bunch of things:
> - obviously we need the users of this in a few drivers, can't really
> review anything stand-alone

Here is a link to the usage of this feature by the currently under
development virtio-video driver:
https://markmail.org/thread/j4xlqaaim266qpks

> - adding very specific ops to the generic interface is rather awkward,
> eventually everyone wants that and we end up in a mess. I think the
> best solution here would be if we create a struct virtio_dma_buf which
> subclasses dma-buf, add a (hopefully safe) runtime upcasting
> functions, and then a virtio_dma_buf_get_uuid() function. Just storing
> the uuid should be doable (assuming this doesn't change during the
> lifetime of the buffer), so no need for a callback.

So you would prefer a solution similar to the original version of this
patchset? https://markmail.org/message/z7if4u56q5fmaok4
Gerd Hoffmann May 14, 2020, 7:59 a.m. UTC | #3
Hi,

> - for the runtime upcasting the usual approach is to check the ->ops
> pointer. Which means that would need to be the same for all virtio
> dma_bufs, which might get a bit awkward. But I'd really prefer we not
> add allocator specific stuff like this to dma-buf.

This is exactly the problem, it gets messy quickly, also when it comes
to using the drm_prime.c helpers ...

take care,
  Gerd
David Stevens May 14, 2020, 8:19 a.m. UTC | #4
Sorry for the duplicate reply, didn't notice this until now.

> Just storing
> the uuid should be doable (assuming this doesn't change during the
> lifetime of the buffer), so no need for a callback.

Directly storing the uuid doesn't work that well because of
synchronization issues. The uuid needs to be shared between multiple
virtio devices with independent command streams, so to prevent races
between importing and exporting, the exporting driver can't share the
uuid with other drivers until it knows that the device has finished
registering the uuid. That requires a round trip to and then back from
the device. Using a callback allows the latency from that round trip
registration to be hidden.

-David
Daniel Vetter May 14, 2020, 12:28 p.m. UTC | #5
On Thu, May 14, 2020 at 11:08:52AM +0900, David Stevens wrote:
> On Thu, May 14, 2020 at 12:45 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Wed, Mar 11, 2020 at 12:20 PM David Stevens <stevensd@chromium.org> wrote:
> > >
> > > This change adds a new dma-buf operation that allows dma-bufs to be used
> > > by virtio drivers to share exported objects. The new operation allows
> > > the importing driver to query the exporting driver for the UUID which
> > > identifies the underlying exported object.
> > >
> > > Signed-off-by: David Stevens <stevensd@chromium.org>
> >
> > Adding Tomasz Figa, I've discussed this with him at elce last year I
> > think. Just to make sure.
> >
> > Bunch of things:
> > - obviously we need the users of this in a few drivers, can't really
> > review anything stand-alone
> 
> Here is a link to the usage of this feature by the currently under
> development virtio-video driver:
> https://markmail.org/thread/j4xlqaaim266qpks
> 
> > - adding very specific ops to the generic interface is rather awkward,
> > eventually everyone wants that and we end up in a mess. I think the
> > best solution here would be if we create a struct virtio_dma_buf which
> > subclasses dma-buf, add a (hopefully safe) runtime upcasting
> > functions, and then a virtio_dma_buf_get_uuid() function. Just storing
> > the uuid should be doable (assuming this doesn't change during the
> > lifetime of the buffer), so no need for a callback.
> 
> So you would prefer a solution similar to the original version of this
> patchset? https://markmail.org/message/z7if4u56q5fmaok4

yup.
-Daniel
Daniel Vetter May 14, 2020, 12:30 p.m. UTC | #6
On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote:
> Sorry for the duplicate reply, didn't notice this until now.
> 
> > Just storing
> > the uuid should be doable (assuming this doesn't change during the
> > lifetime of the buffer), so no need for a callback.
> 
> Directly storing the uuid doesn't work that well because of
> synchronization issues. The uuid needs to be shared between multiple
> virtio devices with independent command streams, so to prevent races
> between importing and exporting, the exporting driver can't share the
> uuid with other drivers until it knows that the device has finished
> registering the uuid. That requires a round trip to and then back from
> the device. Using a callback allows the latency from that round trip
> registration to be hidden.

Uh, that means you actually do something and there's locking involved.
Makes stuff more complicated, invariant attributes are a lot easier
generally. Registering that uuid just always doesn't work, and blocking
when you're exporting?
-Daniel
Daniel Vetter May 14, 2020, 12:31 p.m. UTC | #7
On Thu, May 14, 2020 at 09:59:52AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > - for the runtime upcasting the usual approach is to check the ->ops
> > pointer. Which means that would need to be the same for all virtio
> > dma_bufs, which might get a bit awkward. But I'd really prefer we not
> > add allocator specific stuff like this to dma-buf.
> 
> This is exactly the problem, it gets messy quickly, also when it comes
> to using the drm_prime.c helpers ...

drm_prime.c helpers (not the core bits) exist becaues nvidia needed
something that wasnt EXPORT_SYMBOL_GPL.

I wouldn't shed a big tear if they don't fit anymore, they're kinda not
great to begin with. Much midlayer, not much of valued added, but at least
the _GPL is gone.
-Daniel
David Stevens May 15, 2020, 5:07 a.m. UTC | #8
On Thu, May 14, 2020 at 9:30 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote:
> > Sorry for the duplicate reply, didn't notice this until now.
> >
> > > Just storing
> > > the uuid should be doable (assuming this doesn't change during the
> > > lifetime of the buffer), so no need for a callback.
> >
> > Directly storing the uuid doesn't work that well because of
> > synchronization issues. The uuid needs to be shared between multiple
> > virtio devices with independent command streams, so to prevent races
> > between importing and exporting, the exporting driver can't share the
> > uuid with other drivers until it knows that the device has finished
> > registering the uuid. That requires a round trip to and then back from
> > the device. Using a callback allows the latency from that round trip
> > registration to be hidden.
>
> Uh, that means you actually do something and there's locking involved.
> Makes stuff more complicated, invariant attributes are a lot easier
> generally. Registering that uuid just always doesn't work, and blocking
> when you're exporting?

Registering the id at creation and blocking in gem export is feasible,
but it doesn't work well for systems with a centralized buffer
allocator that doesn't support batch allocations (e.g. gralloc). In
such a system, the round trip latency would almost certainly be
included in the buffer allocation time. At least on the system I'm
working on, I suspect that would add 10s of milliseconds of startup
latency to video pipelines (although I haven't benchmarked the
difference). Doing the blocking as late as possible means most or all
of the latency can be hidden behind other pipeline setup work.

In terms of complexity, I think the synchronization would be basically
the same in either approach, just in different locations. All it would
do is alleviate the need for a callback to fetch the UUID.

-David
Daniel Vetter May 15, 2020, 2:03 p.m. UTC | #9
On Fri, May 15, 2020 at 02:07:06PM +0900, David Stevens wrote:
> On Thu, May 14, 2020 at 9:30 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote:
> > > Sorry for the duplicate reply, didn't notice this until now.
> > >
> > > > Just storing
> > > > the uuid should be doable (assuming this doesn't change during the
> > > > lifetime of the buffer), so no need for a callback.
> > >
> > > Directly storing the uuid doesn't work that well because of
> > > synchronization issues. The uuid needs to be shared between multiple
> > > virtio devices with independent command streams, so to prevent races
> > > between importing and exporting, the exporting driver can't share the
> > > uuid with other drivers until it knows that the device has finished
> > > registering the uuid. That requires a round trip to and then back from
> > > the device. Using a callback allows the latency from that round trip
> > > registration to be hidden.
> >
> > Uh, that means you actually do something and there's locking involved.
> > Makes stuff more complicated, invariant attributes are a lot easier
> > generally. Registering that uuid just always doesn't work, and blocking
> > when you're exporting?
> 
> Registering the id at creation and blocking in gem export is feasible,
> but it doesn't work well for systems with a centralized buffer
> allocator that doesn't support batch allocations (e.g. gralloc). In
> such a system, the round trip latency would almost certainly be
> included in the buffer allocation time. At least on the system I'm
> working on, I suspect that would add 10s of milliseconds of startup
> latency to video pipelines (although I haven't benchmarked the
> difference). Doing the blocking as late as possible means most or all
> of the latency can be hidden behind other pipeline setup work.
> 
> In terms of complexity, I think the synchronization would be basically
> the same in either approach, just in different locations. All it would
> do is alleviate the need for a callback to fetch the UUID.

Hm ok. I guess if we go with the older patch, where this all is a lot more
just code in virtio, doing an extra function to allocate the uuid sounds
fine. Then synchronization is entirely up to the virtio subsystem and not
a dma-buf problem (and hence not mine). You can use dma_resv_lock or so,
but no need to. But with callbacks potentially going both ways things
always get a bit interesting wrt locking - this is what makes peer2peer
dma-buf so painful right now. Hence I'd like to avoid that if needed, at
least at the dma-buf level. virtio code I don't mind what you do there :-)

Cheers, Daniel
Sumit Semwal May 18, 2020, 11:17 a.m. UTC | #10
Hello David,

On Fri, 15 May 2020 at 19:33, Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Fri, May 15, 2020 at 02:07:06PM +0900, David Stevens wrote:
> > On Thu, May 14, 2020 at 9:30 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> > > On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote:
> > > > Sorry for the duplicate reply, didn't notice this until now.
> > > >
> > > > > Just storing
> > > > > the uuid should be doable (assuming this doesn't change during the
> > > > > lifetime of the buffer), so no need for a callback.
> > > >
> > > > Directly storing the uuid doesn't work that well because of
> > > > synchronization issues. The uuid needs to be shared between multiple
> > > > virtio devices with independent command streams, so to prevent races
> > > > between importing and exporting, the exporting driver can't share the
> > > > uuid with other drivers until it knows that the device has finished
> > > > registering the uuid. That requires a round trip to and then back from
> > > > the device. Using a callback allows the latency from that round trip
> > > > registration to be hidden.
> > >
> > > Uh, that means you actually do something and there's locking involved.
> > > Makes stuff more complicated, invariant attributes are a lot easier
> > > generally. Registering that uuid just always doesn't work, and blocking
> > > when you're exporting?
> >
> > Registering the id at creation and blocking in gem export is feasible,
> > but it doesn't work well for systems with a centralized buffer
> > allocator that doesn't support batch allocations (e.g. gralloc). In
> > such a system, the round trip latency would almost certainly be
> > included in the buffer allocation time. At least on the system I'm
> > working on, I suspect that would add 10s of milliseconds of startup
> > latency to video pipelines (although I haven't benchmarked the
> > difference). Doing the blocking as late as possible means most or all
> > of the latency can be hidden behind other pipeline setup work.
> >
> > In terms of complexity, I think the synchronization would be basically
> > the same in either approach, just in different locations. All it would
> > do is alleviate the need for a callback to fetch the UUID.
>
I think I agree with Daniel there - this seems best suited for code
within virtio.

> Hm ok. I guess if we go with the older patch, where this all is a lot more
> just code in virtio, doing an extra function to allocate the uuid sounds
> fine. Then synchronization is entirely up to the virtio subsystem and not
> a dma-buf problem (and hence not mine). You can use dma_resv_lock or so,
> but no need to. But with callbacks potentially going both ways things
> always get a bit interesting wrt locking - this is what makes peer2peer
> dma-buf so painful right now. Hence I'd like to avoid that if needed, at
> least at the dma-buf level. virtio code I don't mind what you do there :-)
>
> Cheers, Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

Best,
Sumit.
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d4097856c86b..fa5210ba6aaa 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1158,6 +1158,18 @@  void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
 
+int dma_buf_get_uuid(struct dma_buf *dmabuf, uuid_t *uuid)
+{
+	if (WARN_ON(!dmabuf) || !uuid)
+		return -EINVAL;
+
+	if (!dmabuf->ops->get_uuid)
+		return -ENODEV;
+
+	return dmabuf->ops->get_uuid(dmabuf, uuid);
+}
+EXPORT_SYMBOL_GPL(dma_buf_get_uuid);
+
 #ifdef CONFIG_DEBUG_FS
 static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index abf5459a5b9d..00758523597d 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -251,6 +251,21 @@  struct dma_buf_ops {
 
 	void *(*vmap)(struct dma_buf *);
 	void (*vunmap)(struct dma_buf *, void *vaddr);
+
+	/**
+	 * @get_uuid
+	 *
+	 * This is called by dma_buf_get_uuid to get the UUID which identifies
+	 * the buffer to virtio devices.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure. On success uuid
+	 * will be populated with the buffer's UUID.
+	 */
+	int (*get_uuid)(struct dma_buf *dmabuf, uuid_t *uuid);
 };
 
 /**
@@ -444,4 +459,7 @@  int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
 void *dma_buf_vmap(struct dma_buf *);
 void dma_buf_vunmap(struct dma_buf *, void *vaddr);
+
+int dma_buf_get_uuid(struct dma_buf *dmabuf, uuid_t *uuid);
+
 #endif /* __DMA_BUF_H__ */