diff mbox series

[v3,3/4] usb: gadget: functionfs: Add DMABUF import interface

Message ID 20240108120056.22165-4-paul@crapouillou.net (mailing list archive)
State Superseded
Headers show
Series usb: gadget: functionfs: DMABUF import interface | expand

Commit Message

Paul Cercueil Jan. 8, 2024, noon UTC
This patch introduces three new ioctls. They all should be called on a
data endpoint (ie. not ep0). They are:

- FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of a DMABUF
  object to attach to the endpoint.

- FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of the
  DMABUF to detach from the endpoint. Note that closing the endpoint's
  file descriptor will automatically detach all attached DMABUFs.

- FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer from / to
  the given DMABUF. Its argument is a structure that packs the DMABUF's
  file descriptor, the size in bytes to transfer (which should generally
  be set to the size of the DMABUF), and a 'flags' field which is unused
  for now.
  Before this ioctl can be used, the related DMABUF must be attached
  with FUNCTIONFS_DMABUF_ATTACH.

These three ioctls enable the FunctionFS code to transfer data between
the USB stack and a DMABUF object, which can be provided by a driver
from a completely different subsystem, in a zero-copy fashion.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>

---
v2:
- Make ffs_dma_resv_lock() static
- Add MODULE_IMPORT_NS(DMA_BUF);
- The attach/detach functions are now performed without locking the
  eps_lock spinlock. The transfer function starts with the spinlock
  unlocked, then locks it before allocating and queueing the USB
  transfer.

v3:
- Inline to_ffs_dma_fence() which was called only once.
- Simplify ffs_dma_resv_lock()
- Add comment explaining why we unref twice in ffs_dmabuf_detach()
- Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
---
 drivers/usb/gadget/function/f_fs.c  | 417 ++++++++++++++++++++++++++++
 include/uapi/linux/usb/functionfs.h |  41 +++
 2 files changed, 458 insertions(+)

Comments

Christian König Jan. 8, 2024, 12:28 p.m. UTC | #1
Am 08.01.24 um 13:00 schrieb Paul Cercueil:
> This patch introduces three new ioctls. They all should be called on a
> data endpoint (ie. not ep0). They are:
>
> - FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of a DMABUF
>    object to attach to the endpoint.
>
> - FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of the
>    DMABUF to detach from the endpoint. Note that closing the endpoint's
>    file descriptor will automatically detach all attached DMABUFs.
>
> - FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer from / to
>    the given DMABUF. Its argument is a structure that packs the DMABUF's
>    file descriptor, the size in bytes to transfer (which should generally
>    be set to the size of the DMABUF), and a 'flags' field which is unused
>    for now.
>    Before this ioctl can be used, the related DMABUF must be attached
>    with FUNCTIONFS_DMABUF_ATTACH.
>
> These three ioctls enable the FunctionFS code to transfer data between
> the USB stack and a DMABUF object, which can be provided by a driver
> from a completely different subsystem, in a zero-copy fashion.
>
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
>
> ---
> v2:
> - Make ffs_dma_resv_lock() static
> - Add MODULE_IMPORT_NS(DMA_BUF);
> - The attach/detach functions are now performed without locking the
>    eps_lock spinlock. The transfer function starts with the spinlock
>    unlocked, then locks it before allocating and queueing the USB
>    transfer.
>
> v3:
> - Inline to_ffs_dma_fence() which was called only once.
> - Simplify ffs_dma_resv_lock()
> - Add comment explaining why we unref twice in ffs_dmabuf_detach()
> - Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
> ---
>   drivers/usb/gadget/function/f_fs.c  | 417 ++++++++++++++++++++++++++++
>   include/uapi/linux/usb/functionfs.h |  41 +++
>   2 files changed, 458 insertions(+)
>
> diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
> index ed2a6d5fcef7..9df1f5abb0d4 100644
> --- a/drivers/usb/gadget/function/f_fs.c
> +++ b/drivers/usb/gadget/function/f_fs.c
> @@ -15,6 +15,9 @@
>   /* #define VERBOSE_DEBUG */
>   
>   #include <linux/blkdev.h>
> +#include <linux/dma-buf.h>
> +#include <linux/dma-fence.h>
> +#include <linux/dma-resv.h>
>   #include <linux/pagemap.h>
>   #include <linux/export.h>
>   #include <linux/fs_parser.h>
> @@ -43,6 +46,8 @@
>   
>   #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
>   
> +MODULE_IMPORT_NS(DMA_BUF);
> +
>   /* Reference counter handling */
>   static void ffs_data_get(struct ffs_data *ffs);
>   static void ffs_data_put(struct ffs_data *ffs);
> @@ -124,6 +129,21 @@ struct ffs_ep {
>   	u8				num;
>   };
>   
> +struct ffs_dmabuf_priv {
> +	struct list_head entry;
> +	struct kref ref;
> +	struct dma_buf_attachment *attach;
> +	spinlock_t lock;
> +	u64 context;
> +};
> +
> +struct ffs_dma_fence {
> +	struct dma_fence base;
> +	struct ffs_dmabuf_priv *priv;
> +	struct sg_table *sgt;
> +	enum dma_data_direction dir;
> +};
> +
>   struct ffs_epfile {
>   	/* Protects ep->ep and ep->req. */
>   	struct mutex			mutex;
> @@ -197,6 +217,8 @@ struct ffs_epfile {
>   	unsigned char			isoc;	/* P: ffs->eps_lock */
>   
>   	unsigned char			_pad;
> +
> +	struct list_head		dmabufs;
>   };
>   
>   struct ffs_buffer {
> @@ -1271,10 +1293,44 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
>   	return res;
>   }
>   
> +static void ffs_dmabuf_release(struct kref *ref)
> +{
> +	struct ffs_dmabuf_priv *priv = container_of(ref, struct ffs_dmabuf_priv, ref);
> +	struct dma_buf_attachment *attach = priv->attach;
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +
> +	pr_debug("FFS DMABUF release\n");
> +	dma_buf_detach(attach->dmabuf, attach);
> +	dma_buf_put(dmabuf);
> +
> +	list_del(&priv->entry);
> +	kfree(priv);
> +}
> +
> +static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
> +{
> +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> +
> +	kref_get(&priv->ref);
> +}
> +
> +static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
> +{
> +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> +
> +	kref_put(&priv->ref, ffs_dmabuf_release);
> +}
> +
>   static int
>   ffs_epfile_release(struct inode *inode, struct file *file)
>   {
>   	struct ffs_epfile *epfile = inode->i_private;
> +	struct ffs_dmabuf_priv *priv, *tmp;
> +
> +	/* Close all attached DMABUFs */
> +	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs, entry) {
> +		ffs_dmabuf_put(priv->attach);
> +	}
>   
>   	__ffs_epfile_read_buffer_free(epfile);
>   	ffs_data_closed(epfile->ffs);
> @@ -1282,6 +1338,328 @@ ffs_epfile_release(struct inode *inode, struct file *file)
>   	return 0;
>   }
>   
> +static void ffs_dmabuf_signal_done(struct ffs_dma_fence *dma_fence, int ret)
> +{
> +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> +	struct dma_fence *fence = &dma_fence->base;
> +
> +	dma_fence_get(fence);
> +	fence->error = ret;
> +	dma_fence_signal(fence);
> +
> +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt, dma_fence->dir);
> +	dma_fence_put(fence);
> +	ffs_dmabuf_put(priv->attach);
> +}
> +
> +static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
> +					  struct usb_request *req)
> +{
> +	pr_debug("FFS: DMABUF transfer complete, status=%d\n", req->status);
> +	ffs_dmabuf_signal_done(req->context, req->status);
> +	usb_ep_free_request(ep, req);
> +}
> +
> +static const char *ffs_dmabuf_get_driver_name(struct dma_fence *fence)
> +{
> +	return "functionfs";
> +}
> +
> +static const char *ffs_dmabuf_get_timeline_name(struct dma_fence *fence)
> +{
> +	return "";
> +}
> +
> +static void ffs_dmabuf_fence_release(struct dma_fence *fence)
> +{
> +	struct ffs_dma_fence *dma_fence =
> +		container_of(fence, struct ffs_dma_fence, base);
> +
> +	kfree(dma_fence);
> +}
> +
> +static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
> +	.get_driver_name	= ffs_dmabuf_get_driver_name,
> +	.get_timeline_name	= ffs_dmabuf_get_timeline_name,
> +	.release		= ffs_dmabuf_fence_release,
> +};
> +
> +static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool nonblock)
> +{
> +	int ret;
> +
> +	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
> +	if (ret) {
> +		if (ret != -EDEADLK)
> +			return ret;
> +		if (nonblock)
> +			return -EBUSY;
> +
> +		ret = dma_resv_lock_slow_interruptible(dmabuf->resv, NULL);
> +	}
> +
> +	return ret;
> +}
> +
> +static struct dma_buf_attachment *
> +ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf *dmabuf,
> +			   bool nonblock)
> +{
> +	struct dma_buf_attachment *elm, *attach = NULL;
> +	int ret;
> +
> +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	list_for_each_entry(elm, &dmabuf->attachments, node) {

Mhm, this is messing with internals of DMA-buf and probably a no-go.

We might be able to have this as dma_buf function with proper 
documentation if you really need it, but see below.

> +		if (elm->dev == dev) {
> +			attach = elm;
> +			break;
> +		}
> +	}
> +
> +	if (attach)
> +		ffs_dmabuf_get(elm);
> +
> +	dma_resv_unlock(dmabuf->resv);
> +
> +	return attach ?: ERR_PTR(-EPERM);
> +}
> +
> +static int ffs_dmabuf_attach(struct file *file, int fd)
> +{
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	struct dma_buf_attachment *attach;
> +	struct ffs_dmabuf_priv *priv;
> +	struct dma_buf *dmabuf;
> +	int err;
> +
> +	if (!gadget || !gadget->sg_supported)
> +		return -EPERM;
> +
> +	dmabuf = dma_buf_get(fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
> +	if (IS_ERR(attach)) {
> +		err = PTR_ERR(attach);
> +		goto err_dmabuf_put;
> +	}
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv) {
> +		err = -ENOMEM;
> +		goto err_dmabuf_detach;
> +	}
> +
> +	attach->importer_priv = priv;
> +
> +	priv->attach = attach;
> +	spin_lock_init(&priv->lock);
> +	kref_init(&priv->ref);
> +	priv->context = dma_fence_context_alloc(1);
> +
> +	list_add(&priv->entry, &epfile->dmabufs);
> +
> +	return 0;
> +
> +err_dmabuf_detach:
> +	dma_buf_detach(dmabuf, attach);
> +err_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return err;
> +}
> +
> +static int ffs_dmabuf_detach(struct file *file, int fd)
> +{
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	bool nonblock = file->f_flags & O_NONBLOCK;
> +	struct dma_buf_attachment *attach;
> +	struct dma_buf *dmabuf;
> +	int ret = 0;
> +
> +	dmabuf = dma_buf_get(fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> +					    dmabuf, nonblock);
> +	if (IS_ERR(attach)) {
> +		ret = PTR_ERR(attach);
> +		goto out_dmabuf_put;
> +	}
> +
> +	/*
> +	 * Unref twice to release the reference obtained with
> +	 * ffs_dmabuf_find_attachment() above, and the one obtained in
> +	 * ffs_dmabuf_attach().
> +	 */
> +	ffs_dmabuf_put(attach);
> +	ffs_dmabuf_put(attach);

What is preventing this function from being called multiple times 
concurrently?

If I'm not completely mistaken that can break trivially.

Regards,
Christian.

> +
> +out_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +	return ret;
> +}
> +
> +static int ffs_dmabuf_transfer(struct file *file,
> +			       const struct usb_ffs_dmabuf_transfer_req *req)
> +{
> +	bool dma_to_ram, nonblock = file->f_flags & O_NONBLOCK;
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	struct dma_buf_attachment *attach;
> +	struct ffs_dmabuf_priv *priv;
> +	enum dma_data_direction dir;
> +	struct ffs_dma_fence *fence;
> +	struct usb_request *usb_req;
> +	struct sg_table *sg_table;
> +	struct dma_buf *dmabuf;
> +	struct ffs_ep *ep;
> +	int ret;
> +
> +	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
> +		return -EINVAL;
> +
> +	dmabuf = dma_buf_get(req->fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	if (req->length > dmabuf->size || req->length == 0) {
> +		ret = -EINVAL;
> +		goto err_dmabuf_put;
> +	}
> +
> +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> +					    dmabuf, nonblock);
> +	if (IS_ERR(attach)) {
> +		ret = PTR_ERR(attach);
> +		goto err_dmabuf_put;
> +	}
> +
> +	priv = attach->importer_priv;
> +
> +	if (epfile->in)
> +		dir = DMA_FROM_DEVICE;
> +	else
> +		dir = DMA_TO_DEVICE;
> +
> +	sg_table = dma_buf_map_attachment(attach, dir);
> +	if (IS_ERR(sg_table)) {
> +		ret = PTR_ERR(sg_table);
> +		goto err_attachment_put;
> +	}
> +
> +	ep = ffs_epfile_wait_ep(file);
> +	if (IS_ERR(ep)) {
> +		ret = PTR_ERR(ep);
> +		goto err_unmap_attachment;
> +	}
> +
> +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> +	if (ret)
> +		goto err_unmap_attachment;
> +
> +	/* Make sure we don't have writers */
> +	if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) {
> +		pr_debug("FFS WRITE fence is not signaled\n");
> +		ret = -EBUSY;
> +		goto err_resv_unlock;
> +	}
> +
> +	dma_to_ram = dir == DMA_FROM_DEVICE;
> +
> +	/* If we're writing to the DMABUF, make sure we don't have readers */
> +	if (dma_to_ram &&
> +	    !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) {
> +		pr_debug("FFS READ fence is not signaled\n");
> +		ret = -EBUSY;
> +		goto err_resv_unlock;
> +	}
> +
> +	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
> +	if (ret)
> +		goto err_resv_unlock;
> +
> +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> +	if (!fence) {
> +		ret = -ENOMEM;
> +		goto err_resv_unlock;
> +	}
> +
> +	fence->sgt = sg_table;
> +	fence->dir = dir;
> +	fence->priv = priv;
> +
> +	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
> +		       &priv->lock, priv->context, 0);
> +
> +	spin_lock_irq(&epfile->ffs->eps_lock);
> +
> +	/* In the meantime, endpoint got disabled or changed. */
> +	if (epfile->ep != ep) {
> +		ret = -ESHUTDOWN;
> +		goto err_fence_put;
> +	}
> +
> +	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
> +	if (!usb_req) {
> +		ret = -ENOMEM;
> +		goto err_fence_put;
> +	}
> +
> +	dma_resv_add_fence(dmabuf->resv, &fence->base,
> +			   dma_resv_usage_rw(dma_to_ram));
> +	dma_resv_unlock(dmabuf->resv);
> +
> +	/* Now that the dma_fence is in place, queue the transfer. */
> +
> +	usb_req->length = req->length;
> +	usb_req->buf = NULL;
> +	usb_req->sg = sg_table->sgl;
> +	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl, req->length);
> +	usb_req->sg_was_mapped = true;
> +	usb_req->context  = fence;
> +	usb_req->complete = ffs_epfile_dmabuf_io_complete;
> +
> +	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
> +	if (ret) {
> +		usb_ep_free_request(ep->ep, usb_req);
> +
> +		spin_unlock_irq(&epfile->ffs->eps_lock);
> +
> +		pr_warn("FFS: Failed to queue DMABUF: %d\n", ret);
> +		ffs_dmabuf_signal_done(fence, ret);
> +		goto out_dma_buf_put;
> +	}
> +
> +	spin_unlock_irq(&epfile->ffs->eps_lock);
> +
> +out_dma_buf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return ret;
> +
> +err_fence_put:
> +	spin_unlock_irq(&epfile->ffs->eps_lock);
> +	dma_fence_put(&fence->base);
> +err_resv_unlock:
> +	dma_resv_unlock(dmabuf->resv);
> +err_unmap_attachment:
> +	dma_buf_unmap_attachment(attach, sg_table, dir);
> +err_attachment_put:
> +	ffs_dmabuf_put(attach);
> +err_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return ret;
> +}
> +
>   static long ffs_epfile_ioctl(struct file *file, unsigned code,
>   			     unsigned long value)
>   {
> @@ -1292,6 +1670,44 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
>   	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
>   		return -ENODEV;
>   
> +	switch (code) {
> +	case FUNCTIONFS_DMABUF_ATTACH:
> +	{
> +		int fd;
> +
> +		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_attach(file, fd);
> +	}
> +	case FUNCTIONFS_DMABUF_DETACH:
> +	{
> +		int fd;
> +
> +		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_detach(file, fd);
> +	}
> +	case FUNCTIONFS_DMABUF_TRANSFER:
> +	{
> +		struct usb_ffs_dmabuf_transfer_req req;
> +
> +		if (copy_from_user(&req, (void __user *)value, sizeof(req))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_transfer(file, &req);
> +	}
> +	default:
> +		break;
> +	}
> +
>   	/* Wait for endpoint to be enabled */
>   	ep = ffs_epfile_wait_ep(file);
>   	if (IS_ERR(ep))
> @@ -1869,6 +2285,7 @@ static int ffs_epfiles_create(struct ffs_data *ffs)
>   	for (i = 1; i <= count; ++i, ++epfile) {
>   		epfile->ffs = ffs;
>   		mutex_init(&epfile->mutex);
> +		INIT_LIST_HEAD(&epfile->dmabufs);
>   		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
>   			sprintf(epfile->name, "ep%02x", ffs->eps_addrmap[i]);
>   		else
> diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
> index 078098e73fd3..9f88de9c3d66 100644
> --- a/include/uapi/linux/usb/functionfs.h
> +++ b/include/uapi/linux/usb/functionfs.h
> @@ -86,6 +86,22 @@ struct usb_ext_prop_desc {
>   	__le16	wPropertyNameLength;
>   } __attribute__((packed));
>   
> +/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now) */
> +#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
> +
> +/**
> + * struct usb_ffs_dmabuf_transfer_req - Transfer request for a DMABUF object
> + * @fd:		file descriptor of the DMABUF object
> + * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
> + * @length:	number of bytes used in this DMABUF for the data transfer.
> + *		Should generally be set to the DMABUF's size.
> + */
> +struct usb_ffs_dmabuf_transfer_req {
> +	int fd;
> +	__u32 flags;
> +	__u64 length;
> +} __attribute__((packed));
> +
>   #ifndef __KERNEL__
>   
>   /*
> @@ -290,6 +306,31 @@ struct usb_functionfs_event {
>   #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
>   					     struct usb_endpoint_descriptor)
>   
> +/*
> + * Attach the DMABUF object, identified by its file descriptor, to the
> + * data endpoint. Returns zero on success, and a negative errno value
> + * on error.
> + */
> +#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
> +
>   
> +/*
> + * Detach the given DMABUF object, identified by its file descriptor,
> + * from the data endpoint. Returns zero on success, and a negative
> + * errno value on error. Note that closing the endpoint's file
> + * descriptor will automatically detach all attached DMABUFs.
> + */
> +#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
> +
> +/*
> + * Enqueue the previously attached DMABUF to the transfer queue.
> + * The argument is a structure that packs the DMABUF's file descriptor,
> + * the size in bytes to transfer (which should generally correspond to
> + * the size of the DMABUF), and a 'flags' field which is unused
> + * for now. Returns zero on success, and a negative errno value on
> + * error.
> + */
> +#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
> +					     struct usb_ffs_dmabuf_transfer_req)
>   
>   #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */
Daniel Vetter Jan. 8, 2024, 12:39 p.m. UTC | #2
On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil wrote:
> This patch introduces three new ioctls. They all should be called on a
> data endpoint (ie. not ep0). They are:
> 
> - FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of a DMABUF
>   object to attach to the endpoint.
> 
> - FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of the
>   DMABUF to detach from the endpoint. Note that closing the endpoint's
>   file descriptor will automatically detach all attached DMABUFs.
> 
> - FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer from / to
>   the given DMABUF. Its argument is a structure that packs the DMABUF's
>   file descriptor, the size in bytes to transfer (which should generally
>   be set to the size of the DMABUF), and a 'flags' field which is unused
>   for now.
>   Before this ioctl can be used, the related DMABUF must be attached
>   with FUNCTIONFS_DMABUF_ATTACH.
> 
> These three ioctls enable the FunctionFS code to transfer data between
> the USB stack and a DMABUF object, which can be provided by a driver
> from a completely different subsystem, in a zero-copy fashion.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> 
> ---
> v2:
> - Make ffs_dma_resv_lock() static
> - Add MODULE_IMPORT_NS(DMA_BUF);
> - The attach/detach functions are now performed without locking the
>   eps_lock spinlock. The transfer function starts with the spinlock
>   unlocked, then locks it before allocating and queueing the USB
>   transfer.
> 
> v3:
> - Inline to_ffs_dma_fence() which was called only once.
> - Simplify ffs_dma_resv_lock()
> - Add comment explaining why we unref twice in ffs_dmabuf_detach()
> - Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
> ---
>  drivers/usb/gadget/function/f_fs.c  | 417 ++++++++++++++++++++++++++++
>  include/uapi/linux/usb/functionfs.h |  41 +++
>  2 files changed, 458 insertions(+)
> 
> diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
> index ed2a6d5fcef7..9df1f5abb0d4 100644
> --- a/drivers/usb/gadget/function/f_fs.c
> +++ b/drivers/usb/gadget/function/f_fs.c
> @@ -15,6 +15,9 @@
>  /* #define VERBOSE_DEBUG */
>  
>  #include <linux/blkdev.h>
> +#include <linux/dma-buf.h>
> +#include <linux/dma-fence.h>
> +#include <linux/dma-resv.h>
>  #include <linux/pagemap.h>
>  #include <linux/export.h>
>  #include <linux/fs_parser.h>
> @@ -43,6 +46,8 @@
>  
>  #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
>  
> +MODULE_IMPORT_NS(DMA_BUF);
> +
>  /* Reference counter handling */
>  static void ffs_data_get(struct ffs_data *ffs);
>  static void ffs_data_put(struct ffs_data *ffs);
> @@ -124,6 +129,21 @@ struct ffs_ep {
>  	u8				num;
>  };
>  
> +struct ffs_dmabuf_priv {
> +	struct list_head entry;
> +	struct kref ref;
> +	struct dma_buf_attachment *attach;
> +	spinlock_t lock;
> +	u64 context;
> +};
> +
> +struct ffs_dma_fence {
> +	struct dma_fence base;
> +	struct ffs_dmabuf_priv *priv;
> +	struct sg_table *sgt;
> +	enum dma_data_direction dir;
> +};
> +
>  struct ffs_epfile {
>  	/* Protects ep->ep and ep->req. */
>  	struct mutex			mutex;
> @@ -197,6 +217,8 @@ struct ffs_epfile {
>  	unsigned char			isoc;	/* P: ffs->eps_lock */
>  
>  	unsigned char			_pad;
> +
> +	struct list_head		dmabufs;
>  };
>  
>  struct ffs_buffer {
> @@ -1271,10 +1293,44 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
>  	return res;
>  }
>  
> +static void ffs_dmabuf_release(struct kref *ref)
> +{
> +	struct ffs_dmabuf_priv *priv = container_of(ref, struct ffs_dmabuf_priv, ref);
> +	struct dma_buf_attachment *attach = priv->attach;
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +
> +	pr_debug("FFS DMABUF release\n");
> +	dma_buf_detach(attach->dmabuf, attach);
> +	dma_buf_put(dmabuf);
> +
> +	list_del(&priv->entry);

I didn't find any locking for this list here.

> +	kfree(priv);
> +}
> +
> +static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
> +{
> +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> +
> +	kref_get(&priv->ref);
> +}
> +
> +static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
> +{
> +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> +
> +	kref_put(&priv->ref, ffs_dmabuf_release);
> +}
> +
>  static int
>  ffs_epfile_release(struct inode *inode, struct file *file)
>  {
>  	struct ffs_epfile *epfile = inode->i_private;
> +	struct ffs_dmabuf_priv *priv, *tmp;
> +
> +	/* Close all attached DMABUFs */
> +	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs, entry) {
> +		ffs_dmabuf_put(priv->attach);
> +	}
>  
>  	__ffs_epfile_read_buffer_free(epfile);
>  	ffs_data_closed(epfile->ffs);
> @@ -1282,6 +1338,328 @@ ffs_epfile_release(struct inode *inode, struct file *file)
>  	return 0;
>  }
>  
> +static void ffs_dmabuf_signal_done(struct ffs_dma_fence *dma_fence, int ret)
> +{
> +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> +	struct dma_fence *fence = &dma_fence->base;
> +
> +	dma_fence_get(fence);
> +	fence->error = ret;
> +	dma_fence_signal(fence);
> +
> +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt, dma_fence->dir);
> +	dma_fence_put(fence);
> +	ffs_dmabuf_put(priv->attach);

So this can in theory take the dma_resv lock, and if the usb completion
isn't an unlimited worker this could hold up completion of future
dma_fence, resulting in a deadlock.

Needs to be checked how usb works, and if stalling indefinitely in the
io_complete callback can hold up the usb stack you need to:

- drop a dma_fence_begin/end_signalling annotations in here
- pull out the unref stuff into a separate preallocated worker (or at
  least the final unrefs for ffs_dma_buf).

> +}
> +
> +static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
> +					  struct usb_request *req)
> +{
> +	pr_debug("FFS: DMABUF transfer complete, status=%d\n", req->status);
> +	ffs_dmabuf_signal_done(req->context, req->status);
> +	usb_ep_free_request(ep, req);
> +}
> +
> +static const char *ffs_dmabuf_get_driver_name(struct dma_fence *fence)
> +{
> +	return "functionfs";
> +}
> +
> +static const char *ffs_dmabuf_get_timeline_name(struct dma_fence *fence)
> +{
> +	return "";
> +}
> +
> +static void ffs_dmabuf_fence_release(struct dma_fence *fence)
> +{
> +	struct ffs_dma_fence *dma_fence =
> +		container_of(fence, struct ffs_dma_fence, base);
> +
> +	kfree(dma_fence);
> +}
> +
> +static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
> +	.get_driver_name	= ffs_dmabuf_get_driver_name,
> +	.get_timeline_name	= ffs_dmabuf_get_timeline_name,
> +	.release		= ffs_dmabuf_fence_release,
> +};
> +
> +static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool nonblock)
> +{
> +	int ret;
> +
> +	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
> +	if (ret) {
> +		if (ret != -EDEADLK)
> +			return ret;
> +		if (nonblock)
> +			return -EBUSY;
> +
> +		ret = dma_resv_lock_slow_interruptible(dmabuf->resv, NULL);

This is overkill, without a reservation context you will never get
-EDEADLK and so never have to do slowpath locking. So just dead code.

If you want to check, build with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y

> +	}
> +
> +	return ret;
> +}
> +
> +static struct dma_buf_attachment *
> +ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf *dmabuf,
> +			   bool nonblock)
> +{
> +	struct dma_buf_attachment *elm, *attach = NULL;
> +	int ret;
> +
> +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	list_for_each_entry(elm, &dmabuf->attachments, node) {
> +		if (elm->dev == dev) {
> +			attach = elm;
> +			break;
> +		}
> +	}
> +
> +	if (attach)
> +		ffs_dmabuf_get(elm);

This needs a kref_get_unless_zero or you can race with the final free.

I'm not super keen that usb-gadget is noodling around in the attachment
list like this, your own lookup structure (you have the dma-buf list
already anyway to keep track of all attachments) would be much nicer. But
the get_unless_zero I think is mandatory here for this weak reference.

> +
> +	dma_resv_unlock(dmabuf->resv);
> +
> +	return attach ?: ERR_PTR(-EPERM);
> +}
> +
> +static int ffs_dmabuf_attach(struct file *file, int fd)
> +{
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	struct dma_buf_attachment *attach;
> +	struct ffs_dmabuf_priv *priv;
> +	struct dma_buf *dmabuf;
> +	int err;
> +
> +	if (!gadget || !gadget->sg_supported)
> +		return -EPERM;
> +
> +	dmabuf = dma_buf_get(fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
> +	if (IS_ERR(attach)) {
> +		err = PTR_ERR(attach);
> +		goto err_dmabuf_put;
> +	}
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv) {
> +		err = -ENOMEM;
> +		goto err_dmabuf_detach;
> +	}
> +
> +	attach->importer_priv = priv;
> +
> +	priv->attach = attach;
> +	spin_lock_init(&priv->lock);
> +	kref_init(&priv->ref);
> +	priv->context = dma_fence_context_alloc(1);

Just to check: usb gagdet gurantees that all requests on an ep are
ordered?

> +
> +	list_add(&priv->entry, &epfile->dmabufs);
> +
> +	return 0;
> +
> +err_dmabuf_detach:
> +	dma_buf_detach(dmabuf, attach);
> +err_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return err;
> +}
> +
> +static int ffs_dmabuf_detach(struct file *file, int fd)
> +{
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	bool nonblock = file->f_flags & O_NONBLOCK;
> +	struct dma_buf_attachment *attach;
> +	struct dma_buf *dmabuf;
> +	int ret = 0;
> +
> +	dmabuf = dma_buf_get(fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> +					    dmabuf, nonblock);
> +	if (IS_ERR(attach)) {
> +		ret = PTR_ERR(attach);
> +		goto out_dmabuf_put;
> +	}
> +
> +	/*
> +	 * Unref twice to release the reference obtained with
> +	 * ffs_dmabuf_find_attachment() above, and the one obtained in
> +	 * ffs_dmabuf_attach().
> +	 */
> +	ffs_dmabuf_put(attach);

This looks strange, what's stopping userspace from calling detach multiple
times while a transfer is pending (so that the destruction is delayed)?
That smells like a refcount underflow.

You probably need to tie the refcounts you acquire in ffs_dmabuf_attach to
epfile->dmabufs 1:1 to make sure there's no way userspace can pull you
over the table. This is also the reason why I looked for the locking of
that list, and didn't find it.

> +	ffs_dmabuf_put(attach);
> +
> +out_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +	return ret;
> +}
> +
> +static int ffs_dmabuf_transfer(struct file *file,
> +			       const struct usb_ffs_dmabuf_transfer_req *req)
> +{
> +	bool dma_to_ram, nonblock = file->f_flags & O_NONBLOCK;
> +	struct ffs_epfile *epfile = file->private_data;
> +	struct usb_gadget *gadget = epfile->ffs->gadget;
> +	struct dma_buf_attachment *attach;
> +	struct ffs_dmabuf_priv *priv;
> +	enum dma_data_direction dir;
> +	struct ffs_dma_fence *fence;
> +	struct usb_request *usb_req;
> +	struct sg_table *sg_table;
> +	struct dma_buf *dmabuf;
> +	struct ffs_ep *ep;
> +	int ret;
> +
> +	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
> +		return -EINVAL;
> +
> +	dmabuf = dma_buf_get(req->fd);
> +	if (IS_ERR(dmabuf))
> +		return PTR_ERR(dmabuf);
> +
> +	if (req->length > dmabuf->size || req->length == 0) {
> +		ret = -EINVAL;
> +		goto err_dmabuf_put;
> +	}
> +
> +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> +					    dmabuf, nonblock);
> +	if (IS_ERR(attach)) {
> +		ret = PTR_ERR(attach);
> +		goto err_dmabuf_put;
> +	}
> +
> +	priv = attach->importer_priv;
> +
> +	if (epfile->in)
> +		dir = DMA_FROM_DEVICE;
> +	else
> +		dir = DMA_TO_DEVICE;
> +
> +	sg_table = dma_buf_map_attachment(attach, dir);
> +	if (IS_ERR(sg_table)) {
> +		ret = PTR_ERR(sg_table);
> +		goto err_attachment_put;
> +	}
> +
> +	ep = ffs_epfile_wait_ep(file);
> +	if (IS_ERR(ep)) {
> +		ret = PTR_ERR(ep);
> +		goto err_unmap_attachment;
> +	}
> +
> +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> +	if (ret)
> +		goto err_unmap_attachment;
> +
> +	/* Make sure we don't have writers */
> +	if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) {
> +		pr_debug("FFS WRITE fence is not signaled\n");
> +		ret = -EBUSY;
> +		goto err_resv_unlock;
> +	}
> +
> +	dma_to_ram = dir == DMA_FROM_DEVICE;
> +
> +	/* If we're writing to the DMABUF, make sure we don't have readers */
> +	if (dma_to_ram &&
> +	    !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) {
> +		pr_debug("FFS READ fence is not signaled\n");
> +		ret = -EBUSY;
> +		goto err_resv_unlock;
> +	}
> +
> +	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
> +	if (ret)
> +		goto err_resv_unlock;
> +
> +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> +	if (!fence) {
> +		ret = -ENOMEM;
> +		goto err_resv_unlock;
> +	}
> +
> +	fence->sgt = sg_table;
> +	fence->dir = dir;
> +	fence->priv = priv;
> +
> +	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
> +		       &priv->lock, priv->context, 0);

You need a real seqno here or things break with fence merging. Or
alternatively unordered dma_fence (which are implemented by allocating a
new context for each fence, maybe we should change that eventually ...).
> +
> +	spin_lock_irq(&epfile->ffs->eps_lock);
> +
> +	/* In the meantime, endpoint got disabled or changed. */
> +	if (epfile->ep != ep) {
> +		ret = -ESHUTDOWN;
> +		goto err_fence_put;
> +	}
> +
> +	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
> +	if (!usb_req) {
> +		ret = -ENOMEM;
> +		goto err_fence_put;
> +	}
> +
> +	dma_resv_add_fence(dmabuf->resv, &fence->base,
> +			   dma_resv_usage_rw(dma_to_ram));
> +	dma_resv_unlock(dmabuf->resv);
> +
> +	/* Now that the dma_fence is in place, queue the transfer. */
> +
> +	usb_req->length = req->length;
> +	usb_req->buf = NULL;
> +	usb_req->sg = sg_table->sgl;
> +	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl, req->length);
> +	usb_req->sg_was_mapped = true;
> +	usb_req->context  = fence;
> +	usb_req->complete = ffs_epfile_dmabuf_io_complete;
> +
> +	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
> +	if (ret) {
> +		usb_ep_free_request(ep->ep, usb_req);
> +
> +		spin_unlock_irq(&epfile->ffs->eps_lock);
> +
> +		pr_warn("FFS: Failed to queue DMABUF: %d\n", ret);
> +		ffs_dmabuf_signal_done(fence, ret);
> +		goto out_dma_buf_put;
> +	}
> +
> +	spin_unlock_irq(&epfile->ffs->eps_lock);
> +
> +out_dma_buf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return ret;
> +
> +err_fence_put:
> +	spin_unlock_irq(&epfile->ffs->eps_lock);
> +	dma_fence_put(&fence->base);
> +err_resv_unlock:
> +	dma_resv_unlock(dmabuf->resv);
> +err_unmap_attachment:
> +	dma_buf_unmap_attachment(attach, sg_table, dir);
> +err_attachment_put:
> +	ffs_dmabuf_put(attach);
> +err_dmabuf_put:
> +	dma_buf_put(dmabuf);
> +
> +	return ret;
> +}
> +
>  static long ffs_epfile_ioctl(struct file *file, unsigned code,
>  			     unsigned long value)
>  {
> @@ -1292,6 +1670,44 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
>  	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
>  		return -ENODEV;
>  
> +	switch (code) {
> +	case FUNCTIONFS_DMABUF_ATTACH:
> +	{
> +		int fd;
> +
> +		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_attach(file, fd);
> +	}
> +	case FUNCTIONFS_DMABUF_DETACH:
> +	{
> +		int fd;
> +
> +		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_detach(file, fd);
> +	}
> +	case FUNCTIONFS_DMABUF_TRANSFER:
> +	{
> +		struct usb_ffs_dmabuf_transfer_req req;
> +
> +		if (copy_from_user(&req, (void __user *)value, sizeof(req))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		return ffs_dmabuf_transfer(file, &req);
> +	}
> +	default:
> +		break;
> +	}
> +
>  	/* Wait for endpoint to be enabled */
>  	ep = ffs_epfile_wait_ep(file);
>  	if (IS_ERR(ep))
> @@ -1869,6 +2285,7 @@ static int ffs_epfiles_create(struct ffs_data *ffs)
>  	for (i = 1; i <= count; ++i, ++epfile) {
>  		epfile->ffs = ffs;
>  		mutex_init(&epfile->mutex);
> +		INIT_LIST_HEAD(&epfile->dmabufs);
>  		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
>  			sprintf(epfile->name, "ep%02x", ffs->eps_addrmap[i]);
>  		else
> diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
> index 078098e73fd3..9f88de9c3d66 100644
> --- a/include/uapi/linux/usb/functionfs.h
> +++ b/include/uapi/linux/usb/functionfs.h
> @@ -86,6 +86,22 @@ struct usb_ext_prop_desc {
>  	__le16	wPropertyNameLength;
>  } __attribute__((packed));
>  
> +/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now) */
> +#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
> +
> +/**
> + * struct usb_ffs_dmabuf_transfer_req - Transfer request for a DMABUF object
> + * @fd:		file descriptor of the DMABUF object
> + * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
> + * @length:	number of bytes used in this DMABUF for the data transfer.
> + *		Should generally be set to the DMABUF's size.
> + */
> +struct usb_ffs_dmabuf_transfer_req {
> +	int fd;
> +	__u32 flags;
> +	__u64 length;
> +} __attribute__((packed));
> +
>  #ifndef __KERNEL__
>  
>  /*
> @@ -290,6 +306,31 @@ struct usb_functionfs_event {
>  #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
>  					     struct usb_endpoint_descriptor)
>  
> +/*
> + * Attach the DMABUF object, identified by its file descriptor, to the
> + * data endpoint. Returns zero on success, and a negative errno value
> + * on error.
> + */
> +#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
> +
>  
> +/*
> + * Detach the given DMABUF object, identified by its file descriptor,
> + * from the data endpoint. Returns zero on success, and a negative
> + * errno value on error. Note that closing the endpoint's file
> + * descriptor will automatically detach all attached DMABUFs.
> + */
> +#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
> +
> +/*
> + * Enqueue the previously attached DMABUF to the transfer queue.
> + * The argument is a structure that packs the DMABUF's file descriptor,
> + * the size in bytes to transfer (which should generally correspond to
> + * the size of the DMABUF), and a 'flags' field which is unused
> + * for now. Returns zero on success, and a negative errno value on
> + * error.
> + */
> +#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
> +					     struct usb_ffs_dmabuf_transfer_req)
>  
>  #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */

Only things I've found are (I think at least) bugs in the usb gadget
logic, not directly in how dma-buf/fence is used. The only thing I've
noticed is the lack of actual dma_fence seqno (which I think Christian
already pointed out in an already review, looking at archives at least).
With that addressed:

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Cheers, Sima
Paul Cercueil Jan. 8, 2024, 2:21 p.m. UTC | #3
Hi Daniel (Sima?),

Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil wrote:
> > This patch introduces three new ioctls. They all should be called
> > on a
> > data endpoint (ie. not ep0). They are:
> > 
> > - FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of a
> > DMABUF
> >   object to attach to the endpoint.
> > 
> > - FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of the
> >   DMABUF to detach from the endpoint. Note that closing the
> > endpoint's
> >   file descriptor will automatically detach all attached DMABUFs.
> > 
> > - FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer from /
> > to
> >   the given DMABUF. Its argument is a structure that packs the
> > DMABUF's
> >   file descriptor, the size in bytes to transfer (which should
> > generally
> >   be set to the size of the DMABUF), and a 'flags' field which is
> > unused
> >   for now.
> >   Before this ioctl can be used, the related DMABUF must be
> > attached
> >   with FUNCTIONFS_DMABUF_ATTACH.
> > 
> > These three ioctls enable the FunctionFS code to transfer data
> > between
> > the USB stack and a DMABUF object, which can be provided by a
> > driver
> > from a completely different subsystem, in a zero-copy fashion.
> > 
> > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > 
> > ---
> > v2:
> > - Make ffs_dma_resv_lock() static
> > - Add MODULE_IMPORT_NS(DMA_BUF);
> > - The attach/detach functions are now performed without locking the
> >   eps_lock spinlock. The transfer function starts with the spinlock
> >   unlocked, then locks it before allocating and queueing the USB
> >   transfer.
> > 
> > v3:
> > - Inline to_ffs_dma_fence() which was called only once.
> > - Simplify ffs_dma_resv_lock()
> > - Add comment explaining why we unref twice in ffs_dmabuf_detach()
> > - Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
> > ---
> >  drivers/usb/gadget/function/f_fs.c  | 417
> > ++++++++++++++++++++++++++++
> >  include/uapi/linux/usb/functionfs.h |  41 +++
> >  2 files changed, 458 insertions(+)
> > 
> > diff --git a/drivers/usb/gadget/function/f_fs.c
> > b/drivers/usb/gadget/function/f_fs.c
> > index ed2a6d5fcef7..9df1f5abb0d4 100644
> > --- a/drivers/usb/gadget/function/f_fs.c
> > +++ b/drivers/usb/gadget/function/f_fs.c
> > @@ -15,6 +15,9 @@
> >  /* #define VERBOSE_DEBUG */
> >  
> >  #include <linux/blkdev.h>
> > +#include <linux/dma-buf.h>
> > +#include <linux/dma-fence.h>
> > +#include <linux/dma-resv.h>
> >  #include <linux/pagemap.h>
> >  #include <linux/export.h>
> >  #include <linux/fs_parser.h>
> > @@ -43,6 +46,8 @@
> >  
> >  #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest
> > dice roll ;) */
> >  
> > +MODULE_IMPORT_NS(DMA_BUF);
> > +
> >  /* Reference counter handling */
> >  static void ffs_data_get(struct ffs_data *ffs);
> >  static void ffs_data_put(struct ffs_data *ffs);
> > @@ -124,6 +129,21 @@ struct ffs_ep {
> >  	u8				num;
> >  };
> >  
> > +struct ffs_dmabuf_priv {
> > +	struct list_head entry;
> > +	struct kref ref;
> > +	struct dma_buf_attachment *attach;
> > +	spinlock_t lock;
> > +	u64 context;
> > +};
> > +
> > +struct ffs_dma_fence {
> > +	struct dma_fence base;
> > +	struct ffs_dmabuf_priv *priv;
> > +	struct sg_table *sgt;
> > +	enum dma_data_direction dir;
> > +};
> > +
> >  struct ffs_epfile {
> >  	/* Protects ep->ep and ep->req. */
> >  	struct mutex			mutex;
> > @@ -197,6 +217,8 @@ struct ffs_epfile {
> >  	unsigned char			isoc;	/* P: ffs-
> > >eps_lock */
> >  
> >  	unsigned char			_pad;
> > +
> > +	struct list_head		dmabufs;
> >  };
> >  
> >  struct ffs_buffer {
> > @@ -1271,10 +1293,44 @@ static ssize_t ffs_epfile_read_iter(struct
> > kiocb *kiocb, struct iov_iter *to)
> >  	return res;
> >  }
> >  
> > +static void ffs_dmabuf_release(struct kref *ref)
> > +{
> > +	struct ffs_dmabuf_priv *priv = container_of(ref, struct
> > ffs_dmabuf_priv, ref);
> > +	struct dma_buf_attachment *attach = priv->attach;
> > +	struct dma_buf *dmabuf = attach->dmabuf;
> > +
> > +	pr_debug("FFS DMABUF release\n");
> > +	dma_buf_detach(attach->dmabuf, attach);
> > +	dma_buf_put(dmabuf);
> > +
> > +	list_del(&priv->entry);
> 
> I didn't find any locking for this list here.

Yeah. I'll add some.

> > +	kfree(priv);
> > +}
> > +
> > +static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
> > +{
> > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > +
> > +	kref_get(&priv->ref);
> > +}
> > +
> > +static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
> > +{
> > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > +
> > +	kref_put(&priv->ref, ffs_dmabuf_release);
> > +}
> > +
> >  static int
> >  ffs_epfile_release(struct inode *inode, struct file *file)
> >  {
> >  	struct ffs_epfile *epfile = inode->i_private;
> > +	struct ffs_dmabuf_priv *priv, *tmp;
> > +
> > +	/* Close all attached DMABUFs */
> > +	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs,
> > entry) {
> > +		ffs_dmabuf_put(priv->attach);
> > +	}
> >  
> >  	__ffs_epfile_read_buffer_free(epfile);
> >  	ffs_data_closed(epfile->ffs);
> > @@ -1282,6 +1338,328 @@ ffs_epfile_release(struct inode *inode,
> > struct file *file)
> >  	return 0;
> >  }
> >  
> > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > *dma_fence, int ret)
> > +{
> > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > +	struct dma_fence *fence = &dma_fence->base;
> > +
> > +	dma_fence_get(fence);
> > +	fence->error = ret;
> > +	dma_fence_signal(fence);
> > +
> > +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt,
> > dma_fence->dir);
> > +	dma_fence_put(fence);
> > +	ffs_dmabuf_put(priv->attach);
> 
> So this can in theory take the dma_resv lock, and if the usb
> completion
> isn't an unlimited worker this could hold up completion of future
> dma_fence, resulting in a deadlock.
> 
> Needs to be checked how usb works, and if stalling indefinitely in
> the
> io_complete callback can hold up the usb stack you need to:
> 
> - drop a dma_fence_begin/end_signalling annotations in here
> - pull out the unref stuff into a separate preallocated worker (or at
>   least the final unrefs for ffs_dma_buf).

Only ffs_dmabuf_put() can attempt to take the dma_resv and would have
to be in a worker, right? Everything else would be inside the
dma_fence_begin/end_signalling() annotations?

> 
> > +}
> > +
> > +static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
> > +					  struct usb_request *req)
> > +{
> > +	pr_debug("FFS: DMABUF transfer complete, status=%d\n",
> > req->status);
> > +	ffs_dmabuf_signal_done(req->context, req->status);
> > +	usb_ep_free_request(ep, req);
> > +}
> > +
> > +static const char *ffs_dmabuf_get_driver_name(struct dma_fence
> > *fence)
> > +{
> > +	return "functionfs";
> > +}
> > +
> > +static const char *ffs_dmabuf_get_timeline_name(struct dma_fence
> > *fence)
> > +{
> > +	return "";
> > +}
> > +
> > +static void ffs_dmabuf_fence_release(struct dma_fence *fence)
> > +{
> > +	struct ffs_dma_fence *dma_fence =
> > +		container_of(fence, struct ffs_dma_fence, base);
> > +
> > +	kfree(dma_fence);
> > +}
> > +
> > +static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
> > +	.get_driver_name	= ffs_dmabuf_get_driver_name,
> > +	.get_timeline_name	= ffs_dmabuf_get_timeline_name,
> > +	.release		= ffs_dmabuf_fence_release,
> > +};
> > +
> > +static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool
> > nonblock)
> > +{
> > +	int ret;
> > +
> > +	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
> > +	if (ret) {
> > +		if (ret != -EDEADLK)
> > +			return ret;
> > +		if (nonblock)
> > +			return -EBUSY;
> > +
> > +		ret = dma_resv_lock_slow_interruptible(dmabuf-
> > >resv, NULL);
> 
> This is overkill, without a reservation context you will never get
> -EDEADLK and so never have to do slowpath locking. So just dead code.
> 
> If you want to check, build with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y

Ok.

> 
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static struct dma_buf_attachment *
> > +ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf
> > *dmabuf,
> > +			   bool nonblock)
> > +{
> > +	struct dma_buf_attachment *elm, *attach = NULL;
> > +	int ret;
> > +
> > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > +	if (ret)
> > +		return ERR_PTR(ret);
> > +
> > +	list_for_each_entry(elm, &dmabuf->attachments, node) {
> > +		if (elm->dev == dev) {
> > +			attach = elm;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (attach)
> > +		ffs_dmabuf_get(elm);
> 
> This needs a kref_get_unless_zero or you can race with the final
> free.
> 
> I'm not super keen that usb-gadget is noodling around in the
> attachment
> list like this, your own lookup structure (you have the dma-buf list
> already anyway to keep track of all attachments) would be much nicer.
> But
> the get_unless_zero I think is mandatory here for this weak
> reference.

Christian suggested to move that to a dma_buf function.
Alternatively I can browse my epfile->dmabufs list, sure - that won't
be hard to do. That's probably a better idea too.

> 
> > +
> > +	dma_resv_unlock(dmabuf->resv);
> > +
> > +	return attach ?: ERR_PTR(-EPERM);
> > +}
> > +
> > +static int ffs_dmabuf_attach(struct file *file, int fd)
> > +{
> > +	struct ffs_epfile *epfile = file->private_data;
> > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > +	struct dma_buf_attachment *attach;
> > +	struct ffs_dmabuf_priv *priv;
> > +	struct dma_buf *dmabuf;
> > +	int err;
> > +
> > +	if (!gadget || !gadget->sg_supported)
> > +		return -EPERM;
> > +
> > +	dmabuf = dma_buf_get(fd);
> > +	if (IS_ERR(dmabuf))
> > +		return PTR_ERR(dmabuf);
> > +
> > +	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
> > +	if (IS_ERR(attach)) {
> > +		err = PTR_ERR(attach);
> > +		goto err_dmabuf_put;
> > +	}
> > +
> > +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > +	if (!priv) {
> > +		err = -ENOMEM;
> > +		goto err_dmabuf_detach;
> > +	}
> > +
> > +	attach->importer_priv = priv;
> > +
> > +	priv->attach = attach;
> > +	spin_lock_init(&priv->lock);
> > +	kref_init(&priv->ref);
> > +	priv->context = dma_fence_context_alloc(1);
> 
> Just to check: usb gagdet gurantees that all requests on an ep are
> ordered?

The documentation of usb_ep_queue() states that the transfer requests
are processed in FIFO order, yes.

> 
> > +
> > +	list_add(&priv->entry, &epfile->dmabufs);
> > +
> > +	return 0;
> > +
> > +err_dmabuf_detach:
> > +	dma_buf_detach(dmabuf, attach);
> > +err_dmabuf_put:
> > +	dma_buf_put(dmabuf);
> > +
> > +	return err;
> > +}
> > +
> > +static int ffs_dmabuf_detach(struct file *file, int fd)
> > +{
> > +	struct ffs_epfile *epfile = file->private_data;
> > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > +	bool nonblock = file->f_flags & O_NONBLOCK;
> > +	struct dma_buf_attachment *attach;
> > +	struct dma_buf *dmabuf;
> > +	int ret = 0;
> > +
> > +	dmabuf = dma_buf_get(fd);
> > +	if (IS_ERR(dmabuf))
> > +		return PTR_ERR(dmabuf);
> > +
> > +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> > +					    dmabuf, nonblock);
> > +	if (IS_ERR(attach)) {
> > +		ret = PTR_ERR(attach);
> > +		goto out_dmabuf_put;
> > +	}
> > +
> > +	/*
> > +	 * Unref twice to release the reference obtained with
> > +	 * ffs_dmabuf_find_attachment() above, and the one
> > obtained in
> > +	 * ffs_dmabuf_attach().
> > +	 */
> > +	ffs_dmabuf_put(attach);
> 
> This looks strange, what's stopping userspace from calling detach
> multiple
> times while a transfer is pending (so that the destruction is
> delayed)?
> That smells like a refcount underflow.

My idea was that the second ffs_dmabuf_put() would trigger
ffs_dmabuf_release(), which calls the list_del(); so a second call to
ffs_dmabuf_detach() would fail to find the attachment.

Indeed, if there's an on-going transfer, the refcount is higher, and
this breaks miserably.

Christian pointed out that it breaks if ffs_dmabuf_detach() is called
concurrently, but this is even worse :)


> You probably need to tie the refcounts you acquire in
> ffs_dmabuf_attach to
> epfile->dmabufs 1:1 to make sure there's no way userspace can pull
> you
> over the table. This is also the reason why I looked for the locking
> of
> that list, and didn't find it.

I'll change the code to atomically get the dma_buf_attachment and
remove it from the list.

> 
> > +	ffs_dmabuf_put(attach);
> > +
> > +out_dmabuf_put:
> > +	dma_buf_put(dmabuf);
> > +	return ret;
> > +}
> > +
> > +static int ffs_dmabuf_transfer(struct file *file,
> > +			       const struct
> > usb_ffs_dmabuf_transfer_req *req)
> > +{
> > +	bool dma_to_ram, nonblock = file->f_flags & O_NONBLOCK;
> > +	struct ffs_epfile *epfile = file->private_data;
> > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > +	struct dma_buf_attachment *attach;
> > +	struct ffs_dmabuf_priv *priv;
> > +	enum dma_data_direction dir;
> > +	struct ffs_dma_fence *fence;
> > +	struct usb_request *usb_req;
> > +	struct sg_table *sg_table;
> > +	struct dma_buf *dmabuf;
> > +	struct ffs_ep *ep;
> > +	int ret;
> > +
> > +	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
> > +		return -EINVAL;
> > +
> > +	dmabuf = dma_buf_get(req->fd);
> > +	if (IS_ERR(dmabuf))
> > +		return PTR_ERR(dmabuf);
> > +
> > +	if (req->length > dmabuf->size || req->length == 0) {
> > +		ret = -EINVAL;
> > +		goto err_dmabuf_put;
> > +	}
> > +
> > +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> > +					    dmabuf, nonblock);
> > +	if (IS_ERR(attach)) {
> > +		ret = PTR_ERR(attach);
> > +		goto err_dmabuf_put;
> > +	}
> > +
> > +	priv = attach->importer_priv;
> > +
> > +	if (epfile->in)
> > +		dir = DMA_FROM_DEVICE;
> > +	else
> > +		dir = DMA_TO_DEVICE;
> > +
> > +	sg_table = dma_buf_map_attachment(attach, dir);
> > +	if (IS_ERR(sg_table)) {
> > +		ret = PTR_ERR(sg_table);
> > +		goto err_attachment_put;
> > +	}
> > +
> > +	ep = ffs_epfile_wait_ep(file);
> > +	if (IS_ERR(ep)) {
> > +		ret = PTR_ERR(ep);
> > +		goto err_unmap_attachment;
> > +	}
> > +
> > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > +	if (ret)
> > +		goto err_unmap_attachment;
> > +
> > +	/* Make sure we don't have writers */
> > +	if (!dma_resv_test_signaled(dmabuf->resv,
> > DMA_RESV_USAGE_WRITE)) {
> > +		pr_debug("FFS WRITE fence is not signaled\n");
> > +		ret = -EBUSY;
> > +		goto err_resv_unlock;
> > +	}
> > +
> > +	dma_to_ram = dir == DMA_FROM_DEVICE;
> > +
> > +	/* If we're writing to the DMABUF, make sure we don't have
> > readers */
> > +	if (dma_to_ram &&
> > +	    !dma_resv_test_signaled(dmabuf->resv,
> > DMA_RESV_USAGE_READ)) {
> > +		pr_debug("FFS READ fence is not signaled\n");
> > +		ret = -EBUSY;
> > +		goto err_resv_unlock;
> > +	}
> > +
> > +	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
> > +	if (ret)
> > +		goto err_resv_unlock;
> > +
> > +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> > +	if (!fence) {
> > +		ret = -ENOMEM;
> > +		goto err_resv_unlock;
> > +	}
> > +
> > +	fence->sgt = sg_table;
> > +	fence->dir = dir;
> > +	fence->priv = priv;
> > +
> > +	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
> > +		       &priv->lock, priv->context, 0);
> 
> You need a real seqno here or things break with fence merging. Or
> alternatively unordered dma_fence (which are implemented by
> allocating a
> new context for each fence, maybe we should change that eventually
> ...).

Understood.

> > +
> > +	spin_lock_irq(&epfile->ffs->eps_lock);
> > +
> > +	/* In the meantime, endpoint got disabled or changed. */
> > +	if (epfile->ep != ep) {
> > +		ret = -ESHUTDOWN;
> > +		goto err_fence_put;
> > +	}
> > +
> > +	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
> > +	if (!usb_req) {
> > +		ret = -ENOMEM;
> > +		goto err_fence_put;
> > +	}
> > +
> > +	dma_resv_add_fence(dmabuf->resv, &fence->base,
> > +			   dma_resv_usage_rw(dma_to_ram));
> > +	dma_resv_unlock(dmabuf->resv);
> > +
> > +	/* Now that the dma_fence is in place, queue the transfer.
> > */
> > +
> > +	usb_req->length = req->length;
> > +	usb_req->buf = NULL;
> > +	usb_req->sg = sg_table->sgl;
> > +	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl, req-
> > >length);
> > +	usb_req->sg_was_mapped = true;
> > +	usb_req->context  = fence;
> > +	usb_req->complete = ffs_epfile_dmabuf_io_complete;
> > +
> > +	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
> > +	if (ret) {
> > +		usb_ep_free_request(ep->ep, usb_req);
> > +
> > +		spin_unlock_irq(&epfile->ffs->eps_lock);
> > +
> > +		pr_warn("FFS: Failed to queue DMABUF: %d\n", ret);
> > +		ffs_dmabuf_signal_done(fence, ret);
> > +		goto out_dma_buf_put;
> > +	}
> > +
> > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > +
> > +out_dma_buf_put:
> > +	dma_buf_put(dmabuf);
> > +
> > +	return ret;
> > +
> > +err_fence_put:
> > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > +	dma_fence_put(&fence->base);
> > +err_resv_unlock:
> > +	dma_resv_unlock(dmabuf->resv);
> > +err_unmap_attachment:
> > +	dma_buf_unmap_attachment(attach, sg_table, dir);
> > +err_attachment_put:
> > +	ffs_dmabuf_put(attach);
> > +err_dmabuf_put:
> > +	dma_buf_put(dmabuf);
> > +
> > +	return ret;
> > +}
> > +
> >  static long ffs_epfile_ioctl(struct file *file, unsigned code,
> >  			     unsigned long value)
> >  {
> > @@ -1292,6 +1670,44 @@ static long ffs_epfile_ioctl(struct file
> > *file, unsigned code,
> >  	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
> >  		return -ENODEV;
> >  
> > +	switch (code) {
> > +	case FUNCTIONFS_DMABUF_ATTACH:
> > +	{
> > +		int fd;
> > +
> > +		if (copy_from_user(&fd, (void __user *)value,
> > sizeof(fd))) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		return ffs_dmabuf_attach(file, fd);
> > +	}
> > +	case FUNCTIONFS_DMABUF_DETACH:
> > +	{
> > +		int fd;
> > +
> > +		if (copy_from_user(&fd, (void __user *)value,
> > sizeof(fd))) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		return ffs_dmabuf_detach(file, fd);
> > +	}
> > +	case FUNCTIONFS_DMABUF_TRANSFER:
> > +	{
> > +		struct usb_ffs_dmabuf_transfer_req req;
> > +
> > +		if (copy_from_user(&req, (void __user *)value,
> > sizeof(req))) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		return ffs_dmabuf_transfer(file, &req);
> > +	}
> > +	default:
> > +		break;
> > +	}
> > +
> >  	/* Wait for endpoint to be enabled */
> >  	ep = ffs_epfile_wait_ep(file);
> >  	if (IS_ERR(ep))
> > @@ -1869,6 +2285,7 @@ static int ffs_epfiles_create(struct ffs_data
> > *ffs)
> >  	for (i = 1; i <= count; ++i, ++epfile) {
> >  		epfile->ffs = ffs;
> >  		mutex_init(&epfile->mutex);
> > +		INIT_LIST_HEAD(&epfile->dmabufs);
> >  		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
> >  			sprintf(epfile->name, "ep%02x", ffs-
> > >eps_addrmap[i]);
> >  		else
> > diff --git a/include/uapi/linux/usb/functionfs.h
> > b/include/uapi/linux/usb/functionfs.h
> > index 078098e73fd3..9f88de9c3d66 100644
> > --- a/include/uapi/linux/usb/functionfs.h
> > +++ b/include/uapi/linux/usb/functionfs.h
> > @@ -86,6 +86,22 @@ struct usb_ext_prop_desc {
> >  	__le16	wPropertyNameLength;
> >  } __attribute__((packed));
> >  
> > +/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now) */
> > +#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
> > +
> > +/**
> > + * struct usb_ffs_dmabuf_transfer_req - Transfer request for a
> > DMABUF object
> > + * @fd:		file descriptor of the DMABUF object
> > + * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
> > + * @length:	number of bytes used in this DMABUF for the data
> > transfer.
> > + *		Should generally be set to the DMABUF's size.
> > + */
> > +struct usb_ffs_dmabuf_transfer_req {
> > +	int fd;
> > +	__u32 flags;
> > +	__u64 length;
> > +} __attribute__((packed));
> > +
> >  #ifndef __KERNEL__
> >  
> >  /*
> > @@ -290,6 +306,31 @@ struct usb_functionfs_event {
> >  #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
> >  					     struct
> > usb_endpoint_descriptor)
> >  
> > +/*
> > + * Attach the DMABUF object, identified by its file descriptor, to
> > the
> > + * data endpoint. Returns zero on success, and a negative errno
> > value
> > + * on error.
> > + */
> > +#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
> > +
> >  
> > +/*
> > + * Detach the given DMABUF object, identified by its file
> > descriptor,
> > + * from the data endpoint. Returns zero on success, and a negative
> > + * errno value on error. Note that closing the endpoint's file
> > + * descriptor will automatically detach all attached DMABUFs.
> > + */
> > +#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
> > +
> > +/*
> > + * Enqueue the previously attached DMABUF to the transfer queue.
> > + * The argument is a structure that packs the DMABUF's file
> > descriptor,
> > + * the size in bytes to transfer (which should generally
> > correspond to
> > + * the size of the DMABUF), and a 'flags' field which is unused
> > + * for now. Returns zero on success, and a negative errno value on
> > + * error.
> > + */
> > +#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
> > +					     struct
> > usb_ffs_dmabuf_transfer_req)
> >  
> >  #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */
> 
> Only things I've found are (I think at least) bugs in the usb gadget
> logic, not directly in how dma-buf/fence is used. The only thing I've
> noticed is the lack of actual dma_fence seqno (which I think
> Christian
> already pointed out in an already review, looking at archives at
> least).
> With that addressed:
> 
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Cheers, Sima

Thanks for the review!

Cheers,
-Paul
Daniel Vetter Jan. 8, 2024, 3:29 p.m. UTC | #4
On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil wrote:
> Hi Daniel (Sima?),
> 
> Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil wrote:
> > > This patch introduces three new ioctls. They all should be called
> > > on a
> > > data endpoint (ie. not ep0). They are:
> > > 
> > > - FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of a
> > > DMABUF
> > >   object to attach to the endpoint.
> > > 
> > > - FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of the
> > >   DMABUF to detach from the endpoint. Note that closing the
> > > endpoint's
> > >   file descriptor will automatically detach all attached DMABUFs.
> > > 
> > > - FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer from /
> > > to
> > >   the given DMABUF. Its argument is a structure that packs the
> > > DMABUF's
> > >   file descriptor, the size in bytes to transfer (which should
> > > generally
> > >   be set to the size of the DMABUF), and a 'flags' field which is
> > > unused
> > >   for now.
> > >   Before this ioctl can be used, the related DMABUF must be
> > > attached
> > >   with FUNCTIONFS_DMABUF_ATTACH.
> > > 
> > > These three ioctls enable the FunctionFS code to transfer data
> > > between
> > > the USB stack and a DMABUF object, which can be provided by a
> > > driver
> > > from a completely different subsystem, in a zero-copy fashion.
> > > 
> > > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > > 
> > > ---
> > > v2:
> > > - Make ffs_dma_resv_lock() static
> > > - Add MODULE_IMPORT_NS(DMA_BUF);
> > > - The attach/detach functions are now performed without locking the
> > >   eps_lock spinlock. The transfer function starts with the spinlock
> > >   unlocked, then locks it before allocating and queueing the USB
> > >   transfer.
> > > 
> > > v3:
> > > - Inline to_ffs_dma_fence() which was called only once.
> > > - Simplify ffs_dma_resv_lock()
> > > - Add comment explaining why we unref twice in ffs_dmabuf_detach()
> > > - Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
> > > ---
> > >  drivers/usb/gadget/function/f_fs.c  | 417
> > > ++++++++++++++++++++++++++++
> > >  include/uapi/linux/usb/functionfs.h |  41 +++
> > >  2 files changed, 458 insertions(+)
> > > 
> > > diff --git a/drivers/usb/gadget/function/f_fs.c
> > > b/drivers/usb/gadget/function/f_fs.c
> > > index ed2a6d5fcef7..9df1f5abb0d4 100644
> > > --- a/drivers/usb/gadget/function/f_fs.c
> > > +++ b/drivers/usb/gadget/function/f_fs.c
> > > @@ -15,6 +15,9 @@
> > >  /* #define VERBOSE_DEBUG */
> > >  
> > >  #include <linux/blkdev.h>
> > > +#include <linux/dma-buf.h>
> > > +#include <linux/dma-fence.h>
> > > +#include <linux/dma-resv.h>
> > >  #include <linux/pagemap.h>
> > >  #include <linux/export.h>
> > >  #include <linux/fs_parser.h>
> > > @@ -43,6 +46,8 @@
> > >  
> > >  #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest
> > > dice roll ;) */
> > >  
> > > +MODULE_IMPORT_NS(DMA_BUF);
> > > +
> > >  /* Reference counter handling */
> > >  static void ffs_data_get(struct ffs_data *ffs);
> > >  static void ffs_data_put(struct ffs_data *ffs);
> > > @@ -124,6 +129,21 @@ struct ffs_ep {
> > >  	u8				num;
> > >  };
> > >  
> > > +struct ffs_dmabuf_priv {
> > > +	struct list_head entry;
> > > +	struct kref ref;
> > > +	struct dma_buf_attachment *attach;
> > > +	spinlock_t lock;
> > > +	u64 context;
> > > +};
> > > +
> > > +struct ffs_dma_fence {
> > > +	struct dma_fence base;
> > > +	struct ffs_dmabuf_priv *priv;
> > > +	struct sg_table *sgt;
> > > +	enum dma_data_direction dir;
> > > +};
> > > +
> > >  struct ffs_epfile {
> > >  	/* Protects ep->ep and ep->req. */
> > >  	struct mutex			mutex;
> > > @@ -197,6 +217,8 @@ struct ffs_epfile {
> > >  	unsigned char			isoc;	/* P: ffs-
> > > >eps_lock */
> > >  
> > >  	unsigned char			_pad;
> > > +
> > > +	struct list_head		dmabufs;
> > >  };
> > >  
> > >  struct ffs_buffer {
> > > @@ -1271,10 +1293,44 @@ static ssize_t ffs_epfile_read_iter(struct
> > > kiocb *kiocb, struct iov_iter *to)
> > >  	return res;
> > >  }
> > >  
> > > +static void ffs_dmabuf_release(struct kref *ref)
> > > +{
> > > +	struct ffs_dmabuf_priv *priv = container_of(ref, struct
> > > ffs_dmabuf_priv, ref);
> > > +	struct dma_buf_attachment *attach = priv->attach;
> > > +	struct dma_buf *dmabuf = attach->dmabuf;
> > > +
> > > +	pr_debug("FFS DMABUF release\n");
> > > +	dma_buf_detach(attach->dmabuf, attach);
> > > +	dma_buf_put(dmabuf);
> > > +
> > > +	list_del(&priv->entry);
> > 
> > I didn't find any locking for this list here.
> 
> Yeah. I'll add some.
> 
> > > +	kfree(priv);
> > > +}
> > > +
> > > +static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
> > > +{
> > > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > > +
> > > +	kref_get(&priv->ref);
> > > +}
> > > +
> > > +static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
> > > +{
> > > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > > +
> > > +	kref_put(&priv->ref, ffs_dmabuf_release);
> > > +}
> > > +
> > >  static int
> > >  ffs_epfile_release(struct inode *inode, struct file *file)
> > >  {
> > >  	struct ffs_epfile *epfile = inode->i_private;
> > > +	struct ffs_dmabuf_priv *priv, *tmp;
> > > +
> > > +	/* Close all attached DMABUFs */
> > > +	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs,
> > > entry) {
> > > +		ffs_dmabuf_put(priv->attach);
> > > +	}
> > >  
> > >  	__ffs_epfile_read_buffer_free(epfile);
> > >  	ffs_data_closed(epfile->ffs);
> > > @@ -1282,6 +1338,328 @@ ffs_epfile_release(struct inode *inode,
> > > struct file *file)
> > >  	return 0;
> > >  }
> > >  
> > > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > > *dma_fence, int ret)
> > > +{
> > > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > > +	struct dma_fence *fence = &dma_fence->base;
> > > +
> > > +	dma_fence_get(fence);
> > > +	fence->error = ret;
> > > +	dma_fence_signal(fence);
> > > +
> > > +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt,
> > > dma_fence->dir);
> > > +	dma_fence_put(fence);
> > > +	ffs_dmabuf_put(priv->attach);
> > 
> > So this can in theory take the dma_resv lock, and if the usb
> > completion
> > isn't an unlimited worker this could hold up completion of future
> > dma_fence, resulting in a deadlock.
> > 
> > Needs to be checked how usb works, and if stalling indefinitely in
> > the
> > io_complete callback can hold up the usb stack you need to:
> > 
> > - drop a dma_fence_begin/end_signalling annotations in here
> > - pull out the unref stuff into a separate preallocated worker (or at
> >   least the final unrefs for ffs_dma_buf).
> 
> Only ffs_dmabuf_put() can attempt to take the dma_resv and would have
> to be in a worker, right? Everything else would be inside the
> dma_fence_begin/end_signalling() annotations?

Yup. Also I noticed that unlike the iio patches you don't have the
dma_buf_unmap here in the completion path (or I'm blind?), which helps a
lot with avoiding trouble.

> > > +}
> > > +
> > > +static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
> > > +					  struct usb_request *req)
> > > +{
> > > +	pr_debug("FFS: DMABUF transfer complete, status=%d\n",
> > > req->status);
> > > +	ffs_dmabuf_signal_done(req->context, req->status);
> > > +	usb_ep_free_request(ep, req);
> > > +}
> > > +
> > > +static const char *ffs_dmabuf_get_driver_name(struct dma_fence
> > > *fence)
> > > +{
> > > +	return "functionfs";
> > > +}
> > > +
> > > +static const char *ffs_dmabuf_get_timeline_name(struct dma_fence
> > > *fence)
> > > +{
> > > +	return "";
> > > +}
> > > +
> > > +static void ffs_dmabuf_fence_release(struct dma_fence *fence)
> > > +{
> > > +	struct ffs_dma_fence *dma_fence =
> > > +		container_of(fence, struct ffs_dma_fence, base);
> > > +
> > > +	kfree(dma_fence);
> > > +}
> > > +
> > > +static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
> > > +	.get_driver_name	= ffs_dmabuf_get_driver_name,
> > > +	.get_timeline_name	= ffs_dmabuf_get_timeline_name,
> > > +	.release		= ffs_dmabuf_fence_release,
> > > +};
> > > +
> > > +static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool
> > > nonblock)
> > > +{
> > > +	int ret;
> > > +
> > > +	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
> > > +	if (ret) {
> > > +		if (ret != -EDEADLK)
> > > +			return ret;
> > > +		if (nonblock)
> > > +			return -EBUSY;
> > > +
> > > +		ret = dma_resv_lock_slow_interruptible(dmabuf-
> > > >resv, NULL);
> > 
> > This is overkill, without a reservation context you will never get
> > -EDEADLK and so never have to do slowpath locking. So just dead code.
> > 
> > If you want to check, build with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
> 
> Ok.
> 
> > 
> > > +	}
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static struct dma_buf_attachment *
> > > +ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf
> > > *dmabuf,
> > > +			   bool nonblock)
> > > +{
> > > +	struct dma_buf_attachment *elm, *attach = NULL;
> > > +	int ret;
> > > +
> > > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > > +	if (ret)
> > > +		return ERR_PTR(ret);
> > > +
> > > +	list_for_each_entry(elm, &dmabuf->attachments, node) {
> > > +		if (elm->dev == dev) {
> > > +			attach = elm;
> > > +			break;
> > > +		}
> > > +	}
> > > +
> > > +	if (attach)
> > > +		ffs_dmabuf_get(elm);
> > 
> > This needs a kref_get_unless_zero or you can race with the final
> > free.
> > 
> > I'm not super keen that usb-gadget is noodling around in the
> > attachment
> > list like this, your own lookup structure (you have the dma-buf list
> > already anyway to keep track of all attachments) would be much nicer.
> > But
> > the get_unless_zero I think is mandatory here for this weak
> > reference.
> 
> Christian suggested to move that to a dma_buf function.
> Alternatively I can browse my epfile->dmabufs list, sure - that won't
> be hard to do. That's probably a better idea too.

The trouble with the dma_buf function is that you need to have the
kref_get_unless_zero on the kref in your private thing _within_ the
dma_resv_lock critical section. Otherwise this lookup function and a
concurrent final kref_put might race. So that helper function would need
to require the caller to hold dma_resv_lock already.

It's that kind of locking context leaking across subsystems I don't really
like much. It's not buggy, but it is a bit much leaky abstraction.

> > > +
> > > +	dma_resv_unlock(dmabuf->resv);
> > > +
> > > +	return attach ?: ERR_PTR(-EPERM);
> > > +}
> > > +
> > > +static int ffs_dmabuf_attach(struct file *file, int fd)
> > > +{
> > > +	struct ffs_epfile *epfile = file->private_data;
> > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > +	struct dma_buf_attachment *attach;
> > > +	struct ffs_dmabuf_priv *priv;
> > > +	struct dma_buf *dmabuf;
> > > +	int err;
> > > +
> > > +	if (!gadget || !gadget->sg_supported)
> > > +		return -EPERM;
> > > +
> > > +	dmabuf = dma_buf_get(fd);
> > > +	if (IS_ERR(dmabuf))
> > > +		return PTR_ERR(dmabuf);
> > > +
> > > +	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
> > > +	if (IS_ERR(attach)) {
> > > +		err = PTR_ERR(attach);
> > > +		goto err_dmabuf_put;
> > > +	}
> > > +
> > > +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > > +	if (!priv) {
> > > +		err = -ENOMEM;
> > > +		goto err_dmabuf_detach;
> > > +	}
> > > +
> > > +	attach->importer_priv = priv;
> > > +
> > > +	priv->attach = attach;
> > > +	spin_lock_init(&priv->lock);
> > > +	kref_init(&priv->ref);
> > > +	priv->context = dma_fence_context_alloc(1);
> > 
> > Just to check: usb gagdet gurantees that all requests on an ep are
> > ordered?
> 
> The documentation of usb_ep_queue() states that the transfer requests
> are processed in FIFO order, yes.

Might be good to document this in the comment message, or if you feel
like, in the seqno generation for each dma_fence.
> 
> > 
> > > +
> > > +	list_add(&priv->entry, &epfile->dmabufs);
> > > +
> > > +	return 0;
> > > +
> > > +err_dmabuf_detach:
> > > +	dma_buf_detach(dmabuf, attach);
> > > +err_dmabuf_put:
> > > +	dma_buf_put(dmabuf);
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +static int ffs_dmabuf_detach(struct file *file, int fd)
> > > +{
> > > +	struct ffs_epfile *epfile = file->private_data;
> > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > +	bool nonblock = file->f_flags & O_NONBLOCK;
> > > +	struct dma_buf_attachment *attach;
> > > +	struct dma_buf *dmabuf;
> > > +	int ret = 0;
> > > +
> > > +	dmabuf = dma_buf_get(fd);
> > > +	if (IS_ERR(dmabuf))
> > > +		return PTR_ERR(dmabuf);
> > > +
> > > +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> > > +					    dmabuf, nonblock);
> > > +	if (IS_ERR(attach)) {
> > > +		ret = PTR_ERR(attach);
> > > +		goto out_dmabuf_put;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Unref twice to release the reference obtained with
> > > +	 * ffs_dmabuf_find_attachment() above, and the one
> > > obtained in
> > > +	 * ffs_dmabuf_attach().
> > > +	 */
> > > +	ffs_dmabuf_put(attach);
> > 
> > This looks strange, what's stopping userspace from calling detach
> > multiple
> > times while a transfer is pending (so that the destruction is
> > delayed)?
> > That smells like a refcount underflow.
> 
> My idea was that the second ffs_dmabuf_put() would trigger
> ffs_dmabuf_release(), which calls the list_del(); so a second call to
> ffs_dmabuf_detach() would fail to find the attachment.
> 
> Indeed, if there's an on-going transfer, the refcount is higher, and
> this breaks miserably.

Yup that was the scenario I was thinking of.

> Christian pointed out that it breaks if ffs_dmabuf_detach() is called
> concurrently, but this is even worse :)

You need the 2nd detach ioctl call to actually make things underrun, but I
guess it also means that with just one you destroy the mapping/attachment
before the hw is finished. Which I missed :-)

> > You probably need to tie the refcounts you acquire in
> > ffs_dmabuf_attach to
> > epfile->dmabufs 1:1 to make sure there's no way userspace can pull
> > you
> > over the table. This is also the reason why I looked for the locking
> > of
> > that list, and didn't find it.
> 
> I'll change the code to atomically get the dma_buf_attachment and
> remove it from the list.
> 
> > 
> > > +	ffs_dmabuf_put(attach);
> > > +
> > > +out_dmabuf_put:
> > > +	dma_buf_put(dmabuf);
> > > +	return ret;
> > > +}
> > > +
> > > +static int ffs_dmabuf_transfer(struct file *file,
> > > +			       const struct
> > > usb_ffs_dmabuf_transfer_req *req)
> > > +{
> > > +	bool dma_to_ram, nonblock = file->f_flags & O_NONBLOCK;
> > > +	struct ffs_epfile *epfile = file->private_data;
> > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > +	struct dma_buf_attachment *attach;
> > > +	struct ffs_dmabuf_priv *priv;
> > > +	enum dma_data_direction dir;
> > > +	struct ffs_dma_fence *fence;
> > > +	struct usb_request *usb_req;
> > > +	struct sg_table *sg_table;
> > > +	struct dma_buf *dmabuf;
> > > +	struct ffs_ep *ep;
> > > +	int ret;
> > > +
> > > +	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
> > > +		return -EINVAL;
> > > +
> > > +	dmabuf = dma_buf_get(req->fd);
> > > +	if (IS_ERR(dmabuf))
> > > +		return PTR_ERR(dmabuf);
> > > +
> > > +	if (req->length > dmabuf->size || req->length == 0) {
> > > +		ret = -EINVAL;
> > > +		goto err_dmabuf_put;
> > > +	}
> > > +
> > > +	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
> > > +					    dmabuf, nonblock);
> > > +	if (IS_ERR(attach)) {
> > > +		ret = PTR_ERR(attach);
> > > +		goto err_dmabuf_put;
> > > +	}
> > > +
> > > +	priv = attach->importer_priv;
> > > +
> > > +	if (epfile->in)
> > > +		dir = DMA_FROM_DEVICE;
> > > +	else
> > > +		dir = DMA_TO_DEVICE;
> > > +
> > > +	sg_table = dma_buf_map_attachment(attach, dir);
> > > +	if (IS_ERR(sg_table)) {
> > > +		ret = PTR_ERR(sg_table);
> > > +		goto err_attachment_put;
> > > +	}
> > > +
> > > +	ep = ffs_epfile_wait_ep(file);
> > > +	if (IS_ERR(ep)) {
> > > +		ret = PTR_ERR(ep);
> > > +		goto err_unmap_attachment;
> > > +	}
> > > +
> > > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > > +	if (ret)
> > > +		goto err_unmap_attachment;
> > > +
> > > +	/* Make sure we don't have writers */
> > > +	if (!dma_resv_test_signaled(dmabuf->resv,
> > > DMA_RESV_USAGE_WRITE)) {
> > > +		pr_debug("FFS WRITE fence is not signaled\n");
> > > +		ret = -EBUSY;
> > > +		goto err_resv_unlock;
> > > +	}
> > > +
> > > +	dma_to_ram = dir == DMA_FROM_DEVICE;
> > > +
> > > +	/* If we're writing to the DMABUF, make sure we don't have
> > > readers */
> > > +	if (dma_to_ram &&
> > > +	    !dma_resv_test_signaled(dmabuf->resv,
> > > DMA_RESV_USAGE_READ)) {
> > > +		pr_debug("FFS READ fence is not signaled\n");
> > > +		ret = -EBUSY;
> > > +		goto err_resv_unlock;
> > > +	}
> > > +
> > > +	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
> > > +	if (ret)
> > > +		goto err_resv_unlock;
> > > +
> > > +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> > > +	if (!fence) {
> > > +		ret = -ENOMEM;
> > > +		goto err_resv_unlock;
> > > +	}
> > > +
> > > +	fence->sgt = sg_table;
> > > +	fence->dir = dir;
> > > +	fence->priv = priv;
> > > +
> > > +	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
> > > +		       &priv->lock, priv->context, 0);
> > 
> > You need a real seqno here or things break with fence merging. Or
> > alternatively unordered dma_fence (which are implemented by
> > allocating a
> > new context for each fence, maybe we should change that eventually
> > ...).
> 
> Understood.
> 
> > > +
> > > +	spin_lock_irq(&epfile->ffs->eps_lock);
> > > +
> > > +	/* In the meantime, endpoint got disabled or changed. */
> > > +	if (epfile->ep != ep) {
> > > +		ret = -ESHUTDOWN;
> > > +		goto err_fence_put;
> > > +	}
> > > +
> > > +	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
> > > +	if (!usb_req) {
> > > +		ret = -ENOMEM;
> > > +		goto err_fence_put;
> > > +	}
> > > +
> > > +	dma_resv_add_fence(dmabuf->resv, &fence->base,
> > > +			   dma_resv_usage_rw(dma_to_ram));
> > > +	dma_resv_unlock(dmabuf->resv);
> > > +
> > > +	/* Now that the dma_fence is in place, queue the transfer.
> > > */
> > > +
> > > +	usb_req->length = req->length;
> > > +	usb_req->buf = NULL;
> > > +	usb_req->sg = sg_table->sgl;
> > > +	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl, req-
> > > >length);
> > > +	usb_req->sg_was_mapped = true;
> > > +	usb_req->context  = fence;
> > > +	usb_req->complete = ffs_epfile_dmabuf_io_complete;
> > > +
> > > +	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
> > > +	if (ret) {
> > > +		usb_ep_free_request(ep->ep, usb_req);
> > > +
> > > +		spin_unlock_irq(&epfile->ffs->eps_lock);
> > > +
> > > +		pr_warn("FFS: Failed to queue DMABUF: %d\n", ret);
> > > +		ffs_dmabuf_signal_done(fence, ret);
> > > +		goto out_dma_buf_put;
> > > +	}
> > > +
> > > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > > +
> > > +out_dma_buf_put:
> > > +	dma_buf_put(dmabuf);
> > > +
> > > +	return ret;
> > > +
> > > +err_fence_put:
> > > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > > +	dma_fence_put(&fence->base);
> > > +err_resv_unlock:
> > > +	dma_resv_unlock(dmabuf->resv);
> > > +err_unmap_attachment:
> > > +	dma_buf_unmap_attachment(attach, sg_table, dir);
> > > +err_attachment_put:
> > > +	ffs_dmabuf_put(attach);
> > > +err_dmabuf_put:
> > > +	dma_buf_put(dmabuf);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > >  static long ffs_epfile_ioctl(struct file *file, unsigned code,
> > >  			     unsigned long value)
> > >  {
> > > @@ -1292,6 +1670,44 @@ static long ffs_epfile_ioctl(struct file
> > > *file, unsigned code,
> > >  	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
> > >  		return -ENODEV;
> > >  
> > > +	switch (code) {
> > > +	case FUNCTIONFS_DMABUF_ATTACH:
> > > +	{
> > > +		int fd;
> > > +
> > > +		if (copy_from_user(&fd, (void __user *)value,
> > > sizeof(fd))) {
> > > +			ret = -EFAULT;
> > > +			break;
> > > +		}
> > > +
> > > +		return ffs_dmabuf_attach(file, fd);
> > > +	}
> > > +	case FUNCTIONFS_DMABUF_DETACH:
> > > +	{
> > > +		int fd;
> > > +
> > > +		if (copy_from_user(&fd, (void __user *)value,
> > > sizeof(fd))) {
> > > +			ret = -EFAULT;
> > > +			break;
> > > +		}
> > > +
> > > +		return ffs_dmabuf_detach(file, fd);
> > > +	}
> > > +	case FUNCTIONFS_DMABUF_TRANSFER:
> > > +	{
> > > +		struct usb_ffs_dmabuf_transfer_req req;
> > > +
> > > +		if (copy_from_user(&req, (void __user *)value,
> > > sizeof(req))) {
> > > +			ret = -EFAULT;
> > > +			break;
> > > +		}
> > > +
> > > +		return ffs_dmabuf_transfer(file, &req);
> > > +	}
> > > +	default:
> > > +		break;
> > > +	}
> > > +
> > >  	/* Wait for endpoint to be enabled */
> > >  	ep = ffs_epfile_wait_ep(file);
> > >  	if (IS_ERR(ep))
> > > @@ -1869,6 +2285,7 @@ static int ffs_epfiles_create(struct ffs_data
> > > *ffs)
> > >  	for (i = 1; i <= count; ++i, ++epfile) {
> > >  		epfile->ffs = ffs;
> > >  		mutex_init(&epfile->mutex);
> > > +		INIT_LIST_HEAD(&epfile->dmabufs);
> > >  		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
> > >  			sprintf(epfile->name, "ep%02x", ffs-
> > > >eps_addrmap[i]);
> > >  		else
> > > diff --git a/include/uapi/linux/usb/functionfs.h
> > > b/include/uapi/linux/usb/functionfs.h
> > > index 078098e73fd3..9f88de9c3d66 100644
> > > --- a/include/uapi/linux/usb/functionfs.h
> > > +++ b/include/uapi/linux/usb/functionfs.h
> > > @@ -86,6 +86,22 @@ struct usb_ext_prop_desc {
> > >  	__le16	wPropertyNameLength;
> > >  } __attribute__((packed));
> > >  
> > > +/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now) */
> > > +#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
> > > +
> > > +/**
> > > + * struct usb_ffs_dmabuf_transfer_req - Transfer request for a
> > > DMABUF object
> > > + * @fd:		file descriptor of the DMABUF object
> > > + * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
> > > + * @length:	number of bytes used in this DMABUF for the data
> > > transfer.
> > > + *		Should generally be set to the DMABUF's size.
> > > + */
> > > +struct usb_ffs_dmabuf_transfer_req {
> > > +	int fd;
> > > +	__u32 flags;
> > > +	__u64 length;
> > > +} __attribute__((packed));
> > > +
> > >  #ifndef __KERNEL__
> > >  
> > >  /*
> > > @@ -290,6 +306,31 @@ struct usb_functionfs_event {
> > >  #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
> > >  					     struct
> > > usb_endpoint_descriptor)
> > >  
> > > +/*
> > > + * Attach the DMABUF object, identified by its file descriptor, to
> > > the
> > > + * data endpoint. Returns zero on success, and a negative errno
> > > value
> > > + * on error.
> > > + */
> > > +#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
> > > +
> > >  
> > > +/*
> > > + * Detach the given DMABUF object, identified by its file
> > > descriptor,
> > > + * from the data endpoint. Returns zero on success, and a negative
> > > + * errno value on error. Note that closing the endpoint's file
> > > + * descriptor will automatically detach all attached DMABUFs.
> > > + */
> > > +#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
> > > +
> > > +/*
> > > + * Enqueue the previously attached DMABUF to the transfer queue.
> > > + * The argument is a structure that packs the DMABUF's file
> > > descriptor,
> > > + * the size in bytes to transfer (which should generally
> > > correspond to
> > > + * the size of the DMABUF), and a 'flags' field which is unused
> > > + * for now. Returns zero on success, and a negative errno value on
> > > + * error.
> > > + */
> > > +#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
> > > +					     struct
> > > usb_ffs_dmabuf_transfer_req)
> > >  
> > >  #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */
> > 
> > Only things I've found are (I think at least) bugs in the usb gadget
> > logic, not directly in how dma-buf/fence is used. The only thing I've
> > noticed is the lack of actual dma_fence seqno (which I think
> > Christian
> > already pointed out in an already review, looking at archives at
> > least).
> > With that addressed:
> > 
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > Cheers, Sima
> 
> Thanks for the review!

btw if you address all the locking/lifetime issues I guess you can upgrade
to r-b: me on the next version.

Cheers, Daniel
Paul Cercueil Jan. 8, 2024, 4:27 p.m. UTC | #5
Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil wrote:
> > Hi Daniel (Sima?),
> > 
> > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil wrote:
> > > > This patch introduces three new ioctls. They all should be
> > > > called
> > > > on a
> > > > data endpoint (ie. not ep0). They are:
> > > > 
> > > > - FUNCTIONFS_DMABUF_ATTACH, which takes the file descriptor of
> > > > a
> > > > DMABUF
> > > >   object to attach to the endpoint.
> > > > 
> > > > - FUNCTIONFS_DMABUF_DETACH, which takes the file descriptor of
> > > > the
> > > >   DMABUF to detach from the endpoint. Note that closing the
> > > > endpoint's
> > > >   file descriptor will automatically detach all attached
> > > > DMABUFs.
> > > > 
> > > > - FUNCTIONFS_DMABUF_TRANSFER, which requests a data transfer
> > > > from /
> > > > to
> > > >   the given DMABUF. Its argument is a structure that packs the
> > > > DMABUF's
> > > >   file descriptor, the size in bytes to transfer (which should
> > > > generally
> > > >   be set to the size of the DMABUF), and a 'flags' field which
> > > > is
> > > > unused
> > > >   for now.
> > > >   Before this ioctl can be used, the related DMABUF must be
> > > > attached
> > > >   with FUNCTIONFS_DMABUF_ATTACH.
> > > > 
> > > > These three ioctls enable the FunctionFS code to transfer data
> > > > between
> > > > the USB stack and a DMABUF object, which can be provided by a
> > > > driver
> > > > from a completely different subsystem, in a zero-copy fashion.
> > > > 
> > > > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > > > 
> > > > ---
> > > > v2:
> > > > - Make ffs_dma_resv_lock() static
> > > > - Add MODULE_IMPORT_NS(DMA_BUF);
> > > > - The attach/detach functions are now performed without locking
> > > > the
> > > >   eps_lock spinlock. The transfer function starts with the
> > > > spinlock
> > > >   unlocked, then locks it before allocating and queueing the
> > > > USB
> > > >   transfer.
> > > > 
> > > > v3:
> > > > - Inline to_ffs_dma_fence() which was called only once.
> > > > - Simplify ffs_dma_resv_lock()
> > > > - Add comment explaining why we unref twice in
> > > > ffs_dmabuf_detach()
> > > > - Document uapi struct usb_ffs_dmabuf_transfer_req and IOCTLs
> > > > ---
> > > >  drivers/usb/gadget/function/f_fs.c  | 417
> > > > ++++++++++++++++++++++++++++
> > > >  include/uapi/linux/usb/functionfs.h |  41 +++
> > > >  2 files changed, 458 insertions(+)
> > > > 
> > > > diff --git a/drivers/usb/gadget/function/f_fs.c
> > > > b/drivers/usb/gadget/function/f_fs.c
> > > > index ed2a6d5fcef7..9df1f5abb0d4 100644
> > > > --- a/drivers/usb/gadget/function/f_fs.c
> > > > +++ b/drivers/usb/gadget/function/f_fs.c
> > > > @@ -15,6 +15,9 @@
> > > >  /* #define VERBOSE_DEBUG */
> > > >  
> > > >  #include <linux/blkdev.h>
> > > > +#include <linux/dma-buf.h>
> > > > +#include <linux/dma-fence.h>
> > > > +#include <linux/dma-resv.h>
> > > >  #include <linux/pagemap.h>
> > > >  #include <linux/export.h>
> > > >  #include <linux/fs_parser.h>
> > > > @@ -43,6 +46,8 @@
> > > >  
> > > >  #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a
> > > > honest
> > > > dice roll ;) */
> > > >  
> > > > +MODULE_IMPORT_NS(DMA_BUF);
> > > > +
> > > >  /* Reference counter handling */
> > > >  static void ffs_data_get(struct ffs_data *ffs);
> > > >  static void ffs_data_put(struct ffs_data *ffs);
> > > > @@ -124,6 +129,21 @@ struct ffs_ep {
> > > >  	u8				num;
> > > >  };
> > > >  
> > > > +struct ffs_dmabuf_priv {
> > > > +	struct list_head entry;
> > > > +	struct kref ref;
> > > > +	struct dma_buf_attachment *attach;
> > > > +	spinlock_t lock;
> > > > +	u64 context;
> > > > +};
> > > > +
> > > > +struct ffs_dma_fence {
> > > > +	struct dma_fence base;
> > > > +	struct ffs_dmabuf_priv *priv;
> > > > +	struct sg_table *sgt;
> > > > +	enum dma_data_direction dir;
> > > > +};
> > > > +
> > > >  struct ffs_epfile {
> > > >  	/* Protects ep->ep and ep->req. */
> > > >  	struct mutex			mutex;
> > > > @@ -197,6 +217,8 @@ struct ffs_epfile {
> > > >  	unsigned char			isoc;	/* P: ffs-
> > > > > eps_lock */
> > > >  
> > > >  	unsigned char			_pad;
> > > > +
> > > > +	struct list_head		dmabufs;
> > > >  };
> > > >  
> > > >  struct ffs_buffer {
> > > > @@ -1271,10 +1293,44 @@ static ssize_t
> > > > ffs_epfile_read_iter(struct
> > > > kiocb *kiocb, struct iov_iter *to)
> > > >  	return res;
> > > >  }
> > > >  
> > > > +static void ffs_dmabuf_release(struct kref *ref)
> > > > +{
> > > > +	struct ffs_dmabuf_priv *priv = container_of(ref,
> > > > struct
> > > > ffs_dmabuf_priv, ref);
> > > > +	struct dma_buf_attachment *attach = priv->attach;
> > > > +	struct dma_buf *dmabuf = attach->dmabuf;
> > > > +
> > > > +	pr_debug("FFS DMABUF release\n");
> > > > +	dma_buf_detach(attach->dmabuf, attach);
> > > > +	dma_buf_put(dmabuf);
> > > > +
> > > > +	list_del(&priv->entry);
> > > 
> > > I didn't find any locking for this list here.
> > 
> > Yeah. I'll add some.
> > 
> > > > +	kfree(priv);
> > > > +}
> > > > +
> > > > +static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
> > > > +{
> > > > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > > > +
> > > > +	kref_get(&priv->ref);
> > > > +}
> > > > +
> > > > +static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
> > > > +{
> > > > +	struct ffs_dmabuf_priv *priv = attach->importer_priv;
> > > > +
> > > > +	kref_put(&priv->ref, ffs_dmabuf_release);
> > > > +}
> > > > +
> > > >  static int
> > > >  ffs_epfile_release(struct inode *inode, struct file *file)
> > > >  {
> > > >  	struct ffs_epfile *epfile = inode->i_private;
> > > > +	struct ffs_dmabuf_priv *priv, *tmp;
> > > > +
> > > > +	/* Close all attached DMABUFs */
> > > > +	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs,
> > > > entry) {
> > > > +		ffs_dmabuf_put(priv->attach);
> > > > +	}
> > > >  
> > > >  	__ffs_epfile_read_buffer_free(epfile);
> > > >  	ffs_data_closed(epfile->ffs);
> > > > @@ -1282,6 +1338,328 @@ ffs_epfile_release(struct inode *inode,
> > > > struct file *file)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > > > *dma_fence, int ret)
> > > > +{
> > > > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > +
> > > > +	dma_fence_get(fence);
> > > > +	fence->error = ret;
> > > > +	dma_fence_signal(fence);
> > > > +
> > > > +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt,
> > > > dma_fence->dir);
> > > > +	dma_fence_put(fence);
> > > > +	ffs_dmabuf_put(priv->attach);
> > > 
> > > So this can in theory take the dma_resv lock, and if the usb
> > > completion
> > > isn't an unlimited worker this could hold up completion of future
> > > dma_fence, resulting in a deadlock.
> > > 
> > > Needs to be checked how usb works, and if stalling indefinitely
> > > in
> > > the
> > > io_complete callback can hold up the usb stack you need to:
> > > 
> > > - drop a dma_fence_begin/end_signalling annotations in here
> > > - pull out the unref stuff into a separate preallocated worker
> > > (or at
> > >   least the final unrefs for ffs_dma_buf).
> > 
> > Only ffs_dmabuf_put() can attempt to take the dma_resv and would
> > have
> > to be in a worker, right? Everything else would be inside the
> > dma_fence_begin/end_signalling() annotations?
> 
> Yup. Also I noticed that unlike the iio patches you don't have the
> dma_buf_unmap here in the completion path (or I'm blind?), which
> helps a
> lot with avoiding trouble.

They both call dma_buf_unmap_attachment() in the "signal done"
callback, the only difference I see is that it is called after the
dma_fence_put() in the iio patches, while it's called before
dma_fence_put() here.

> 
> > > > +}
> > > > +
> > > > +static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
> > > > +					  struct usb_request
> > > > *req)
> > > > +{
> > > > +	pr_debug("FFS: DMABUF transfer complete, status=%d\n",
> > > > req->status);
> > > > +	ffs_dmabuf_signal_done(req->context, req->status);
> > > > +	usb_ep_free_request(ep, req);
> > > > +}
> > > > +
> > > > +static const char *ffs_dmabuf_get_driver_name(struct dma_fence
> > > > *fence)
> > > > +{
> > > > +	return "functionfs";
> > > > +}
> > > > +
> > > > +static const char *ffs_dmabuf_get_timeline_name(struct
> > > > dma_fence
> > > > *fence)
> > > > +{
> > > > +	return "";
> > > > +}
> > > > +
> > > > +static void ffs_dmabuf_fence_release(struct dma_fence *fence)
> > > > +{
> > > > +	struct ffs_dma_fence *dma_fence =
> > > > +		container_of(fence, struct ffs_dma_fence,
> > > > base);
> > > > +
> > > > +	kfree(dma_fence);
> > > > +}
> > > > +
> > > > +static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
> > > > +	.get_driver_name	= ffs_dmabuf_get_driver_name,
> > > > +	.get_timeline_name	=
> > > > ffs_dmabuf_get_timeline_name,
> > > > +	.release		= ffs_dmabuf_fence_release,
> > > > +};
> > > > +
> > > > +static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool
> > > > nonblock)
> > > > +{
> > > > +	int ret;
> > > > +
> > > > +	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
> > > > +	if (ret) {
> > > > +		if (ret != -EDEADLK)
> > > > +			return ret;
> > > > +		if (nonblock)
> > > > +			return -EBUSY;
> > > > +
> > > > +		ret = dma_resv_lock_slow_interruptible(dmabuf-
> > > > > resv, NULL);
> > > 
> > > This is overkill, without a reservation context you will never
> > > get
> > > -EDEADLK and so never have to do slowpath locking. So just dead
> > > code.
> > > 
> > > If you want to check, build with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
> > 
> > Ok.
> > 
> > > 
> > > > +	}
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +static struct dma_buf_attachment *
> > > > +ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf
> > > > *dmabuf,
> > > > +			   bool nonblock)
> > > > +{
> > > > +	struct dma_buf_attachment *elm, *attach = NULL;
> > > > +	int ret;
> > > > +
> > > > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > > > +	if (ret)
> > > > +		return ERR_PTR(ret);
> > > > +
> > > > +	list_for_each_entry(elm, &dmabuf->attachments, node) {
> > > > +		if (elm->dev == dev) {
> > > > +			attach = elm;
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	if (attach)
> > > > +		ffs_dmabuf_get(elm);
> > > 
> > > This needs a kref_get_unless_zero or you can race with the final
> > > free.
> > > 
> > > I'm not super keen that usb-gadget is noodling around in the
> > > attachment
> > > list like this, your own lookup structure (you have the dma-buf
> > > list
> > > already anyway to keep track of all attachments) would be much
> > > nicer.
> > > But
> > > the get_unless_zero I think is mandatory here for this weak
> > > reference.
> > 
> > Christian suggested to move that to a dma_buf function.
> > Alternatively I can browse my epfile->dmabufs list, sure - that
> > won't
> > be hard to do. That's probably a better idea too.
> 
> The trouble with the dma_buf function is that you need to have the
> kref_get_unless_zero on the kref in your private thing _within_ the
> dma_resv_lock critical section. Otherwise this lookup function and a
> concurrent final kref_put might race. So that helper function would
> need
> to require the caller to hold dma_resv_lock already.
> 
> It's that kind of locking context leaking across subsystems I don't
> really
> like much. It's not buggy, but it is a bit much leaky abstraction.
> 
> > > > +
> > > > +	dma_resv_unlock(dmabuf->resv);
> > > > +
> > > > +	return attach ?: ERR_PTR(-EPERM);
> > > > +}
> > > > +
> > > > +static int ffs_dmabuf_attach(struct file *file, int fd)
> > > > +{
> > > > +	struct ffs_epfile *epfile = file->private_data;
> > > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > > +	struct dma_buf_attachment *attach;
> > > > +	struct ffs_dmabuf_priv *priv;
> > > > +	struct dma_buf *dmabuf;
> > > > +	int err;
> > > > +
> > > > +	if (!gadget || !gadget->sg_supported)
> > > > +		return -EPERM;
> > > > +
> > > > +	dmabuf = dma_buf_get(fd);
> > > > +	if (IS_ERR(dmabuf))
> > > > +		return PTR_ERR(dmabuf);
> > > > +
> > > > +	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
> > > > +	if (IS_ERR(attach)) {
> > > > +		err = PTR_ERR(attach);
> > > > +		goto err_dmabuf_put;
> > > > +	}
> > > > +
> > > > +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > > > +	if (!priv) {
> > > > +		err = -ENOMEM;
> > > > +		goto err_dmabuf_detach;
> > > > +	}
> > > > +
> > > > +	attach->importer_priv = priv;
> > > > +
> > > > +	priv->attach = attach;
> > > > +	spin_lock_init(&priv->lock);
> > > > +	kref_init(&priv->ref);
> > > > +	priv->context = dma_fence_context_alloc(1);
> > > 
> > > Just to check: usb gagdet gurantees that all requests on an ep
> > > are
> > > ordered?
> > 
> > The documentation of usb_ep_queue() states that the transfer
> > requests
> > are processed in FIFO order, yes.
> 
> Might be good to document this in the comment message, or if you feel
> like, in the seqno generation for each dma_fence.
> > 
> > > 
> > > > +
> > > > +	list_add(&priv->entry, &epfile->dmabufs);
> > > > +
> > > > +	return 0;
> > > > +
> > > > +err_dmabuf_detach:
> > > > +	dma_buf_detach(dmabuf, attach);
> > > > +err_dmabuf_put:
> > > > +	dma_buf_put(dmabuf);
> > > > +
> > > > +	return err;
> > > > +}
> > > > +
> > > > +static int ffs_dmabuf_detach(struct file *file, int fd)
> > > > +{
> > > > +	struct ffs_epfile *epfile = file->private_data;
> > > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > > +	bool nonblock = file->f_flags & O_NONBLOCK;
> > > > +	struct dma_buf_attachment *attach;
> > > > +	struct dma_buf *dmabuf;
> > > > +	int ret = 0;
> > > > +
> > > > +	dmabuf = dma_buf_get(fd);
> > > > +	if (IS_ERR(dmabuf))
> > > > +		return PTR_ERR(dmabuf);
> > > > +
> > > > +	attach = ffs_dmabuf_find_attachment(gadget-
> > > > >dev.parent,
> > > > +					    dmabuf, nonblock);
> > > > +	if (IS_ERR(attach)) {
> > > > +		ret = PTR_ERR(attach);
> > > > +		goto out_dmabuf_put;
> > > > +	}
> > > > +
> > > > +	/*
> > > > +	 * Unref twice to release the reference obtained with
> > > > +	 * ffs_dmabuf_find_attachment() above, and the one
> > > > obtained in
> > > > +	 * ffs_dmabuf_attach().
> > > > +	 */
> > > > +	ffs_dmabuf_put(attach);
> > > 
> > > This looks strange, what's stopping userspace from calling detach
> > > multiple
> > > times while a transfer is pending (so that the destruction is
> > > delayed)?
> > > That smells like a refcount underflow.
> > 
> > My idea was that the second ffs_dmabuf_put() would trigger
> > ffs_dmabuf_release(), which calls the list_del(); so a second call
> > to
> > ffs_dmabuf_detach() would fail to find the attachment.
> > 
> > Indeed, if there's an on-going transfer, the refcount is higher,
> > and
> > this breaks miserably.
> 
> Yup that was the scenario I was thinking of.
> 
> > Christian pointed out that it breaks if ffs_dmabuf_detach() is
> > called
> > concurrently, but this is even worse :)
> 
> You need the 2nd detach ioctl call to actually make things underrun,
> but I
> guess it also means that with just one you destroy the
> mapping/attachment
> before the hw is finished. Which I missed :-)

No, that shouldn't happen, because an extra reference is obtained in
ffs_dmabuf_find_attachment() called by ffs_dmabuf_transfer(), and is
only released in the ffs_dmabuf_signal_done() function.

> > > You probably need to tie the refcounts you acquire in
> > > ffs_dmabuf_attach to
> > > epfile->dmabufs 1:1 to make sure there's no way userspace can
> > > pull
> > > you
> > > over the table. This is also the reason why I looked for the
> > > locking
> > > of
> > > that list, and didn't find it.
> > 
> > I'll change the code to atomically get the dma_buf_attachment and
> > remove it from the list.
> > 
> > > 
> > > > +	ffs_dmabuf_put(attach);
> > > > +
> > > > +out_dmabuf_put:
> > > > +	dma_buf_put(dmabuf);
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +static int ffs_dmabuf_transfer(struct file *file,
> > > > +			       const struct
> > > > usb_ffs_dmabuf_transfer_req *req)
> > > > +{
> > > > +	bool dma_to_ram, nonblock = file->f_flags &
> > > > O_NONBLOCK;
> > > > +	struct ffs_epfile *epfile = file->private_data;
> > > > +	struct usb_gadget *gadget = epfile->ffs->gadget;
> > > > +	struct dma_buf_attachment *attach;
> > > > +	struct ffs_dmabuf_priv *priv;
> > > > +	enum dma_data_direction dir;
> > > > +	struct ffs_dma_fence *fence;
> > > > +	struct usb_request *usb_req;
> > > > +	struct sg_table *sg_table;
> > > > +	struct dma_buf *dmabuf;
> > > > +	struct ffs_ep *ep;
> > > > +	int ret;
> > > > +
> > > > +	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
> > > > +		return -EINVAL;
> > > > +
> > > > +	dmabuf = dma_buf_get(req->fd);
> > > > +	if (IS_ERR(dmabuf))
> > > > +		return PTR_ERR(dmabuf);
> > > > +
> > > > +	if (req->length > dmabuf->size || req->length == 0) {
> > > > +		ret = -EINVAL;
> > > > +		goto err_dmabuf_put;
> > > > +	}
> > > > +
> > > > +	attach = ffs_dmabuf_find_attachment(gadget-
> > > > >dev.parent,
> > > > +					    dmabuf, nonblock);
> > > > +	if (IS_ERR(attach)) {
> > > > +		ret = PTR_ERR(attach);
> > > > +		goto err_dmabuf_put;
> > > > +	}
> > > > +
> > > > +	priv = attach->importer_priv;
> > > > +
> > > > +	if (epfile->in)
> > > > +		dir = DMA_FROM_DEVICE;
> > > > +	else
> > > > +		dir = DMA_TO_DEVICE;
> > > > +
> > > > +	sg_table = dma_buf_map_attachment(attach, dir);
> > > > +	if (IS_ERR(sg_table)) {
> > > > +		ret = PTR_ERR(sg_table);
> > > > +		goto err_attachment_put;
> > > > +	}
> > > > +
> > > > +	ep = ffs_epfile_wait_ep(file);
> > > > +	if (IS_ERR(ep)) {
> > > > +		ret = PTR_ERR(ep);
> > > > +		goto err_unmap_attachment;
> > > > +	}
> > > > +
> > > > +	ret = ffs_dma_resv_lock(dmabuf, nonblock);
> > > > +	if (ret)
> > > > +		goto err_unmap_attachment;
> > > > +
> > > > +	/* Make sure we don't have writers */
> > > > +	if (!dma_resv_test_signaled(dmabuf->resv,
> > > > DMA_RESV_USAGE_WRITE)) {
> > > > +		pr_debug("FFS WRITE fence is not signaled\n");
> > > > +		ret = -EBUSY;
> > > > +		goto err_resv_unlock;
> > > > +	}
> > > > +
> > > > +	dma_to_ram = dir == DMA_FROM_DEVICE;
> > > > +
> > > > +	/* If we're writing to the DMABUF, make sure we don't
> > > > have
> > > > readers */
> > > > +	if (dma_to_ram &&
> > > > +	    !dma_resv_test_signaled(dmabuf->resv,
> > > > DMA_RESV_USAGE_READ)) {
> > > > +		pr_debug("FFS READ fence is not signaled\n");
> > > > +		ret = -EBUSY;
> > > > +		goto err_resv_unlock;
> > > > +	}
> > > > +
> > > > +	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
> > > > +	if (ret)
> > > > +		goto err_resv_unlock;
> > > > +
> > > > +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> > > > +	if (!fence) {
> > > > +		ret = -ENOMEM;
> > > > +		goto err_resv_unlock;
> > > > +	}
> > > > +
> > > > +	fence->sgt = sg_table;
> > > > +	fence->dir = dir;
> > > > +	fence->priv = priv;
> > > > +
> > > > +	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
> > > > +		       &priv->lock, priv->context, 0);
> > > 
> > > You need a real seqno here or things break with fence merging. Or
> > > alternatively unordered dma_fence (which are implemented by
> > > allocating a
> > > new context for each fence, maybe we should change that
> > > eventually
> > > ...).
> > 
> > Understood.
> > 
> > > > +
> > > > +	spin_lock_irq(&epfile->ffs->eps_lock);
> > > > +
> > > > +	/* In the meantime, endpoint got disabled or changed.
> > > > */
> > > > +	if (epfile->ep != ep) {
> > > > +		ret = -ESHUTDOWN;
> > > > +		goto err_fence_put;
> > > > +	}
> > > > +
> > > > +	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
> > > > +	if (!usb_req) {
> > > > +		ret = -ENOMEM;
> > > > +		goto err_fence_put;
> > > > +	}
> > > > +
> > > > +	dma_resv_add_fence(dmabuf->resv, &fence->base,
> > > > +			   dma_resv_usage_rw(dma_to_ram));
> > > > +	dma_resv_unlock(dmabuf->resv);
> > > > +
> > > > +	/* Now that the dma_fence is in place, queue the
> > > > transfer.
> > > > */
> > > > +
> > > > +	usb_req->length = req->length;
> > > > +	usb_req->buf = NULL;
> > > > +	usb_req->sg = sg_table->sgl;
> > > > +	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl,
> > > > req-
> > > > > length);
> > > > +	usb_req->sg_was_mapped = true;
> > > > +	usb_req->context  = fence;
> > > > +	usb_req->complete = ffs_epfile_dmabuf_io_complete;
> > > > +
> > > > +	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
> > > > +	if (ret) {
> > > > +		usb_ep_free_request(ep->ep, usb_req);
> > > > +
> > > > +		spin_unlock_irq(&epfile->ffs->eps_lock);
> > > > +
> > > > +		pr_warn("FFS: Failed to queue DMABUF: %d\n",
> > > > ret);
> > > > +		ffs_dmabuf_signal_done(fence, ret);
> > > > +		goto out_dma_buf_put;
> > > > +	}
> > > > +
> > > > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > > > +
> > > > +out_dma_buf_put:
> > > > +	dma_buf_put(dmabuf);
> > > > +
> > > > +	return ret;
> > > > +
> > > > +err_fence_put:
> > > > +	spin_unlock_irq(&epfile->ffs->eps_lock);
> > > > +	dma_fence_put(&fence->base);
> > > > +err_resv_unlock:
> > > > +	dma_resv_unlock(dmabuf->resv);
> > > > +err_unmap_attachment:
> > > > +	dma_buf_unmap_attachment(attach, sg_table, dir);
> > > > +err_attachment_put:
> > > > +	ffs_dmabuf_put(attach);
> > > > +err_dmabuf_put:
> > > > +	dma_buf_put(dmabuf);
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > >  static long ffs_epfile_ioctl(struct file *file, unsigned code,
> > > >  			     unsigned long value)
> > > >  {
> > > > @@ -1292,6 +1670,44 @@ static long ffs_epfile_ioctl(struct file
> > > > *file, unsigned code,
> > > >  	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
> > > >  		return -ENODEV;
> > > >  
> > > > +	switch (code) {
> > > > +	case FUNCTIONFS_DMABUF_ATTACH:
> > > > +	{
> > > > +		int fd;
> > > > +
> > > > +		if (copy_from_user(&fd, (void __user *)value,
> > > > sizeof(fd))) {
> > > > +			ret = -EFAULT;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		return ffs_dmabuf_attach(file, fd);
> > > > +	}
> > > > +	case FUNCTIONFS_DMABUF_DETACH:
> > > > +	{
> > > > +		int fd;
> > > > +
> > > > +		if (copy_from_user(&fd, (void __user *)value,
> > > > sizeof(fd))) {
> > > > +			ret = -EFAULT;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		return ffs_dmabuf_detach(file, fd);
> > > > +	}
> > > > +	case FUNCTIONFS_DMABUF_TRANSFER:
> > > > +	{
> > > > +		struct usb_ffs_dmabuf_transfer_req req;
> > > > +
> > > > +		if (copy_from_user(&req, (void __user *)value,
> > > > sizeof(req))) {
> > > > +			ret = -EFAULT;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		return ffs_dmabuf_transfer(file, &req);
> > > > +	}
> > > > +	default:
> > > > +		break;
> > > > +	}
> > > > +
> > > >  	/* Wait for endpoint to be enabled */
> > > >  	ep = ffs_epfile_wait_ep(file);
> > > >  	if (IS_ERR(ep))
> > > > @@ -1869,6 +2285,7 @@ static int ffs_epfiles_create(struct
> > > > ffs_data
> > > > *ffs)
> > > >  	for (i = 1; i <= count; ++i, ++epfile) {
> > > >  		epfile->ffs = ffs;
> > > >  		mutex_init(&epfile->mutex);
> > > > +		INIT_LIST_HEAD(&epfile->dmabufs);
> > > >  		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
> > > >  			sprintf(epfile->name, "ep%02x", ffs-
> > > > > eps_addrmap[i]);
> > > >  		else
> > > > diff --git a/include/uapi/linux/usb/functionfs.h
> > > > b/include/uapi/linux/usb/functionfs.h
> > > > index 078098e73fd3..9f88de9c3d66 100644
> > > > --- a/include/uapi/linux/usb/functionfs.h
> > > > +++ b/include/uapi/linux/usb/functionfs.h
> > > > @@ -86,6 +86,22 @@ struct usb_ext_prop_desc {
> > > >  	__le16	wPropertyNameLength;
> > > >  } __attribute__((packed));
> > > >  
> > > > +/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now)
> > > > */
> > > > +#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
> > > > +
> > > > +/**
> > > > + * struct usb_ffs_dmabuf_transfer_req - Transfer request for a
> > > > DMABUF object
> > > > + * @fd:		file descriptor of the DMABUF object
> > > > + * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
> > > > + * @length:	number of bytes used in this DMABUF for the
> > > > data
> > > > transfer.
> > > > + *		Should generally be set to the DMABUF's size.
> > > > + */
> > > > +struct usb_ffs_dmabuf_transfer_req {
> > > > +	int fd;
> > > > +	__u32 flags;
> > > > +	__u64 length;
> > > > +} __attribute__((packed));
> > > > +
> > > >  #ifndef __KERNEL__
> > > >  
> > > >  /*
> > > > @@ -290,6 +306,31 @@ struct usb_functionfs_event {
> > > >  #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130,
> > > > \
> > > >  					     struct
> > > > usb_endpoint_descriptor)
> > > >  
> > > > +/*
> > > > + * Attach the DMABUF object, identified by its file
> > > > descriptor, to
> > > > the
> > > > + * data endpoint. Returns zero on success, and a negative
> > > > errno
> > > > value
> > > > + * on error.
> > > > + */
> > > > +#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
> > > > +
> > > >  
> > > > +/*
> > > > + * Detach the given DMABUF object, identified by its file
> > > > descriptor,
> > > > + * from the data endpoint. Returns zero on success, and a
> > > > negative
> > > > + * errno value on error. Note that closing the endpoint's file
> > > > + * descriptor will automatically detach all attached DMABUFs.
> > > > + */
> > > > +#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
> > > > +
> > > > +/*
> > > > + * Enqueue the previously attached DMABUF to the transfer
> > > > queue.
> > > > + * The argument is a structure that packs the DMABUF's file
> > > > descriptor,
> > > > + * the size in bytes to transfer (which should generally
> > > > correspond to
> > > > + * the size of the DMABUF), and a 'flags' field which is
> > > > unused
> > > > + * for now. Returns zero on success, and a negative errno
> > > > value on
> > > > + * error.
> > > > + */
> > > > +#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
> > > > +					     struct
> > > > usb_ffs_dmabuf_transfer_req)
> > > >  
> > > >  #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */
> > > 
> > > Only things I've found are (I think at least) bugs in the usb
> > > gadget
> > > logic, not directly in how dma-buf/fence is used. The only thing
> > > I've
> > > noticed is the lack of actual dma_fence seqno (which I think
> > > Christian
> > > already pointed out in an already review, looking at archives at
> > > least).
> > > With that addressed:
> > > 
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > 
> > > Cheers, Sima
> > 
> > Thanks for the review!
> 
> btw if you address all the locking/lifetime issues I guess you can
> upgrade
> to r-b: me on the next version.
> 
> Cheers, Daniel

Great! Thanks!

It's a bit dangerous though, the chances that I get things wrong are
non-zero.

Cheers,
-Paul
Daniel Vetter Jan. 8, 2024, 7:19 p.m. UTC | #6
On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil wrote:
> > > Hi Daniel (Sima?),
> > > 
> > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil wrote:
> > > > > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > > > > *dma_fence, int ret)
> > > > > +{
> > > > > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > +
> > > > > +	dma_fence_get(fence);
> > > > > +	fence->error = ret;
> > > > > +	dma_fence_signal(fence);
> > > > > +
> > > > > +	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt,
> > > > > dma_fence->dir);
> > > > > +	dma_fence_put(fence);
> > > > > +	ffs_dmabuf_put(priv->attach);
> > > > 
> > > > So this can in theory take the dma_resv lock, and if the usb
> > > > completion
> > > > isn't an unlimited worker this could hold up completion of future
> > > > dma_fence, resulting in a deadlock.
> > > > 
> > > > Needs to be checked how usb works, and if stalling indefinitely
> > > > in
> > > > the
> > > > io_complete callback can hold up the usb stack you need to:
> > > > 
> > > > - drop a dma_fence_begin/end_signalling annotations in here
> > > > - pull out the unref stuff into a separate preallocated worker
> > > > (or at
> > > >   least the final unrefs for ffs_dma_buf).
> > > 
> > > Only ffs_dmabuf_put() can attempt to take the dma_resv and would
> > > have
> > > to be in a worker, right? Everything else would be inside the
> > > dma_fence_begin/end_signalling() annotations?
> > 
> > Yup. Also I noticed that unlike the iio patches you don't have the
> > dma_buf_unmap here in the completion path (or I'm blind?), which
> > helps a
> > lot with avoiding trouble.
> 
> They both call dma_buf_unmap_attachment() in the "signal done"
> callback, the only difference I see is that it is called after the
> dma_fence_put() in the iio patches, while it's called before
> dma_fence_put() here.

I was indeed blind ...

So the trouble is this wont work because:
- dma_buf_unmap_attachment() requires dma_resv_lock. This is a somewhat
  recent-ish change from 47e982d5195d ("dma-buf: Move
  dma_buf_map_attachment() to dynamic locking specification"), so maybe
  old kernel or you don't have full lockdep enabled to get the right
  splat.

- dma_fence critical section forbids dma_resv_lock

Which means you need to move this out, but then there's the potential
cache management issue. Which current gpu drivers just kinda ignore
because it doesn't matter for current use-case, they all cache the mapping
for about as long as the attachment exists. You might want to do the same,
unless that somehow breaks a use-case you have, I have no idea about that.
If something breaks with unmap_attachment moved out of the fence handling
then I guess it's high time to add separate cache-management only to
dma_buf (and that's probably going to be quite some wiring up, not sure
even how easy that would be to do nor what exactly the interface should
look like).

Cheers, Sima
Paul Cercueil Jan. 9, 2024, 11:06 a.m. UTC | #7
Hi Daniel / Sima,

Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a écrit :
> On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil wrote:
> > > > Hi Daniel (Sima?),
> > > > 
> > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil
> > > > > wrote:
> > > > > > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > > > > > *dma_fence, int ret)
> > > > > > +{
> > > > > > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > > +
> > > > > > +	dma_fence_get(fence);
> > > > > > +	fence->error = ret;
> > > > > > +	dma_fence_signal(fence);
> > > > > > +
> > > > > > +	dma_buf_unmap_attachment(priv->attach, dma_fence-
> > > > > > >sgt,
> > > > > > dma_fence->dir);
> > > > > > +	dma_fence_put(fence);
> > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > 
> > > > > So this can in theory take the dma_resv lock, and if the usb
> > > > > completion
> > > > > isn't an unlimited worker this could hold up completion of
> > > > > future
> > > > > dma_fence, resulting in a deadlock.
> > > > > 
> > > > > Needs to be checked how usb works, and if stalling
> > > > > indefinitely
> > > > > in
> > > > > the
> > > > > io_complete callback can hold up the usb stack you need to:
> > > > > 
> > > > > - drop a dma_fence_begin/end_signalling annotations in here
> > > > > - pull out the unref stuff into a separate preallocated
> > > > > worker
> > > > > (or at
> > > > >   least the final unrefs for ffs_dma_buf).
> > > > 
> > > > Only ffs_dmabuf_put() can attempt to take the dma_resv and
> > > > would
> > > > have
> > > > to be in a worker, right? Everything else would be inside the
> > > > dma_fence_begin/end_signalling() annotations?
> > > 
> > > Yup. Also I noticed that unlike the iio patches you don't have
> > > the
> > > dma_buf_unmap here in the completion path (or I'm blind?), which
> > > helps a
> > > lot with avoiding trouble.
> > 
> > They both call dma_buf_unmap_attachment() in the "signal done"
> > callback, the only difference I see is that it is called after the
> > dma_fence_put() in the iio patches, while it's called before
> > dma_fence_put() here.
> 
> I was indeed blind ...
> 
> So the trouble is this wont work because:
> - dma_buf_unmap_attachment() requires dma_resv_lock. This is a
> somewhat
>   recent-ish change from 47e982d5195d ("dma-buf: Move
>   dma_buf_map_attachment() to dynamic locking specification"), so
> maybe
>   old kernel or you don't have full lockdep enabled to get the right
>   splat.
> 
> - dma_fence critical section forbids dma_resv_lock
> 
> Which means you need to move this out, but then there's the potential
> cache management issue. Which current gpu drivers just kinda ignore
> because it doesn't matter for current use-case, they all cache the
> mapping
> for about as long as the attachment exists. You might want to do the
> same,
> unless that somehow breaks a use-case you have, I have no idea about
> that.
> If something breaks with unmap_attachment moved out of the fence
> handling
> then I guess it's high time to add separate cache-management only to
> dma_buf (and that's probably going to be quite some wiring up, not
> sure
> even how easy that would be to do nor what exactly the interface
> should
> look like).

Ok. Then I'll just cache the mapping for now, I think.

> Cheers, Sima

Cheers,
-Paul
Daniel Vetter Jan. 9, 2024, 1:01 p.m. UTC | #8
On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> Hi Daniel / Sima,
> 
> Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a écrit :
> > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil wrote:
> > > > > Hi Daniel (Sima?),
> > > > > 
> > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a écrit :
> > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil
> > > > > > wrote:
> > > > > > > +static void ffs_dmabuf_signal_done(struct ffs_dma_fence
> > > > > > > *dma_fence, int ret)
> > > > > > > +{
> > > > > > > +	struct ffs_dmabuf_priv *priv = dma_fence->priv;
> > > > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > > > +
> > > > > > > +	dma_fence_get(fence);
> > > > > > > +	fence->error = ret;
> > > > > > > +	dma_fence_signal(fence);
> > > > > > > +
> > > > > > > +	dma_buf_unmap_attachment(priv->attach, dma_fence-
> > > > > > > >sgt,
> > > > > > > dma_fence->dir);
> > > > > > > +	dma_fence_put(fence);
> > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > 
> > > > > > So this can in theory take the dma_resv lock, and if the usb
> > > > > > completion
> > > > > > isn't an unlimited worker this could hold up completion of
> > > > > > future
> > > > > > dma_fence, resulting in a deadlock.
> > > > > > 
> > > > > > Needs to be checked how usb works, and if stalling
> > > > > > indefinitely
> > > > > > in
> > > > > > the
> > > > > > io_complete callback can hold up the usb stack you need to:
> > > > > > 
> > > > > > - drop a dma_fence_begin/end_signalling annotations in here
> > > > > > - pull out the unref stuff into a separate preallocated
> > > > > > worker
> > > > > > (or at
> > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > 
> > > > > Only ffs_dmabuf_put() can attempt to take the dma_resv and
> > > > > would
> > > > > have
> > > > > to be in a worker, right? Everything else would be inside the
> > > > > dma_fence_begin/end_signalling() annotations?
> > > > 
> > > > Yup. Also I noticed that unlike the iio patches you don't have
> > > > the
> > > > dma_buf_unmap here in the completion path (or I'm blind?), which
> > > > helps a
> > > > lot with avoiding trouble.
> > > 
> > > They both call dma_buf_unmap_attachment() in the "signal done"
> > > callback, the only difference I see is that it is called after the
> > > dma_fence_put() in the iio patches, while it's called before
> > > dma_fence_put() here.
> > 
> > I was indeed blind ...
> > 
> > So the trouble is this wont work because:
> > - dma_buf_unmap_attachment() requires dma_resv_lock. This is a
> > somewhat
> >   recent-ish change from 47e982d5195d ("dma-buf: Move
> >   dma_buf_map_attachment() to dynamic locking specification"), so
> > maybe
> >   old kernel or you don't have full lockdep enabled to get the right
> >   splat.
> > 
> > - dma_fence critical section forbids dma_resv_lock
> > 
> > Which means you need to move this out, but then there's the potential
> > cache management issue. Which current gpu drivers just kinda ignore
> > because it doesn't matter for current use-case, they all cache the
> > mapping
> > for about as long as the attachment exists. You might want to do the
> > same,
> > unless that somehow breaks a use-case you have, I have no idea about
> > that.
> > If something breaks with unmap_attachment moved out of the fence
> > handling
> > then I guess it's high time to add separate cache-management only to
> > dma_buf (and that's probably going to be quite some wiring up, not
> > sure
> > even how easy that would be to do nor what exactly the interface
> > should
> > look like).
> 
> Ok. Then I'll just cache the mapping for now, I think.

Yeah I think that's simplest. I did ponder a bit and I don't think it'd be
too much pain to add the cache-management functions for device
attachments/mappings. But it would be quite some typing ...
-Sima
Paul Cercueil Jan. 15, 2024, 12:54 p.m. UTC | #9
Hi Daniel / Sima,

Le mardi 09 janvier 2024 à 14:01 +0100, Daniel Vetter a écrit :
> On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> > Hi Daniel / Sima,
> > 
> > Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a écrit :
> > > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> > > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil
> > > > > wrote:
> > > > > > Hi Daniel (Sima?),
> > > > > > 
> > > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a
> > > > > > écrit :
> > > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil
> > > > > > > wrote:
> > > > > > > > +static void ffs_dmabuf_signal_done(struct
> > > > > > > > ffs_dma_fence
> > > > > > > > *dma_fence, int ret)
> > > > > > > > +{
> > > > > > > > +	struct ffs_dmabuf_priv *priv = dma_fence-
> > > > > > > > >priv;
> > > > > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > > > > +
> > > > > > > > +	dma_fence_get(fence);
> > > > > > > > +	fence->error = ret;
> > > > > > > > +	dma_fence_signal(fence);
> > > > > > > > +
> > > > > > > > +	dma_buf_unmap_attachment(priv->attach,
> > > > > > > > dma_fence-
> > > > > > > > > sgt,
> > > > > > > > dma_fence->dir);
> > > > > > > > +	dma_fence_put(fence);
> > > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > > 
> > > > > > > So this can in theory take the dma_resv lock, and if the
> > > > > > > usb
> > > > > > > completion
> > > > > > > isn't an unlimited worker this could hold up completion
> > > > > > > of
> > > > > > > future
> > > > > > > dma_fence, resulting in a deadlock.
> > > > > > > 
> > > > > > > Needs to be checked how usb works, and if stalling
> > > > > > > indefinitely
> > > > > > > in
> > > > > > > the
> > > > > > > io_complete callback can hold up the usb stack you need
> > > > > > > to:
> > > > > > > 
> > > > > > > - drop a dma_fence_begin/end_signalling annotations in
> > > > > > > here
> > > > > > > - pull out the unref stuff into a separate preallocated
> > > > > > > worker
> > > > > > > (or at
> > > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > > 
> > > > > > Only ffs_dmabuf_put() can attempt to take the dma_resv and
> > > > > > would
> > > > > > have
> > > > > > to be in a worker, right? Everything else would be inside
> > > > > > the
> > > > > > dma_fence_begin/end_signalling() annotations?
> > > > > 
> > > > > Yup. Also I noticed that unlike the iio patches you don't
> > > > > have
> > > > > the
> > > > > dma_buf_unmap here in the completion path (or I'm blind?),
> > > > > which
> > > > > helps a
> > > > > lot with avoiding trouble.
> > > > 
> > > > They both call dma_buf_unmap_attachment() in the "signal done"
> > > > callback, the only difference I see is that it is called after
> > > > the
> > > > dma_fence_put() in the iio patches, while it's called before
> > > > dma_fence_put() here.
> > > 
> > > I was indeed blind ...
> > > 
> > > So the trouble is this wont work because:
> > > - dma_buf_unmap_attachment() requires dma_resv_lock. This is a
> > > somewhat
> > >   recent-ish change from 47e982d5195d ("dma-buf: Move
> > >   dma_buf_map_attachment() to dynamic locking specification"), so
> > > maybe
> > >   old kernel or you don't have full lockdep enabled to get the
> > > right
> > >   splat.
> > > 
> > > - dma_fence critical section forbids dma_resv_lock
> > > 
> > > Which means you need to move this out, but then there's the
> > > potential
> > > cache management issue. Which current gpu drivers just kinda
> > > ignore
> > > because it doesn't matter for current use-case, they all cache
> > > the
> > > mapping
> > > for about as long as the attachment exists. You might want to do
> > > the
> > > same,
> > > unless that somehow breaks a use-case you have, I have no idea
> > > about
> > > that.
> > > If something breaks with unmap_attachment moved out of the fence
> > > handling
> > > then I guess it's high time to add separate cache-management only
> > > to
> > > dma_buf (and that's probably going to be quite some wiring up,
> > > not
> > > sure
> > > even how easy that would be to do nor what exactly the interface
> > > should
> > > look like).
> > 
> > Ok. Then I'll just cache the mapping for now, I think.
> 
> Yeah I think that's simplest. I did ponder a bit and I don't think
> it'd be
> too much pain to add the cache-management functions for device
> attachments/mappings. But it would be quite some typing ...
> -Sima

It looks like I actually do have some hardware which requires the cache
management. If I cache the mappings in both my IIO and USB code, it
works fine on my ZedBoard, but it doesn't work on my ZCU102.

(Or maybe it's something else? What I get from USB in that case is a
stream of zeros, I'd expect it to be more like a stream of
garbage/stale data).

So, change of plans; I will now unmap the attachment in the cleanup
worker after the fence is signalled, and add a warning comment before
the end of the fence critical section about the need to do cache
management before the signal.

Does that work for you?

Cheers,
-Paul
Daniel Vetter Jan. 18, 2024, 1:56 p.m. UTC | #10
On Mon, Jan 15, 2024 at 01:54:27PM +0100, Paul Cercueil wrote:
> Hi Daniel / Sima,
> 
> Le mardi 09 janvier 2024 à 14:01 +0100, Daniel Vetter a écrit :
> > On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> > > Hi Daniel / Sima,
> > > 
> > > Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a écrit :
> > > > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> > > > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > > > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil
> > > > > > wrote:
> > > > > > > Hi Daniel (Sima?),
> > > > > > > 
> > > > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a
> > > > > > > écrit :
> > > > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil
> > > > > > > > wrote:
> > > > > > > > > +static void ffs_dmabuf_signal_done(struct
> > > > > > > > > ffs_dma_fence
> > > > > > > > > *dma_fence, int ret)
> > > > > > > > > +{
> > > > > > > > > +	struct ffs_dmabuf_priv *priv = dma_fence-
> > > > > > > > > >priv;
> > > > > > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > > > > > +
> > > > > > > > > +	dma_fence_get(fence);
> > > > > > > > > +	fence->error = ret;
> > > > > > > > > +	dma_fence_signal(fence);
> > > > > > > > > +
> > > > > > > > > +	dma_buf_unmap_attachment(priv->attach,
> > > > > > > > > dma_fence-
> > > > > > > > > > sgt,
> > > > > > > > > dma_fence->dir);
> > > > > > > > > +	dma_fence_put(fence);
> > > > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > > > 
> > > > > > > > So this can in theory take the dma_resv lock, and if the
> > > > > > > > usb
> > > > > > > > completion
> > > > > > > > isn't an unlimited worker this could hold up completion
> > > > > > > > of
> > > > > > > > future
> > > > > > > > dma_fence, resulting in a deadlock.
> > > > > > > > 
> > > > > > > > Needs to be checked how usb works, and if stalling
> > > > > > > > indefinitely
> > > > > > > > in
> > > > > > > > the
> > > > > > > > io_complete callback can hold up the usb stack you need
> > > > > > > > to:
> > > > > > > > 
> > > > > > > > - drop a dma_fence_begin/end_signalling annotations in
> > > > > > > > here
> > > > > > > > - pull out the unref stuff into a separate preallocated
> > > > > > > > worker
> > > > > > > > (or at
> > > > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > > > 
> > > > > > > Only ffs_dmabuf_put() can attempt to take the dma_resv and
> > > > > > > would
> > > > > > > have
> > > > > > > to be in a worker, right? Everything else would be inside
> > > > > > > the
> > > > > > > dma_fence_begin/end_signalling() annotations?
> > > > > > 
> > > > > > Yup. Also I noticed that unlike the iio patches you don't
> > > > > > have
> > > > > > the
> > > > > > dma_buf_unmap here in the completion path (or I'm blind?),
> > > > > > which
> > > > > > helps a
> > > > > > lot with avoiding trouble.
> > > > > 
> > > > > They both call dma_buf_unmap_attachment() in the "signal done"
> > > > > callback, the only difference I see is that it is called after
> > > > > the
> > > > > dma_fence_put() in the iio patches, while it's called before
> > > > > dma_fence_put() here.
> > > > 
> > > > I was indeed blind ...
> > > > 
> > > > So the trouble is this wont work because:
> > > > - dma_buf_unmap_attachment() requires dma_resv_lock. This is a
> > > > somewhat
> > > >   recent-ish change from 47e982d5195d ("dma-buf: Move
> > > >   dma_buf_map_attachment() to dynamic locking specification"), so
> > > > maybe
> > > >   old kernel or you don't have full lockdep enabled to get the
> > > > right
> > > >   splat.
> > > > 
> > > > - dma_fence critical section forbids dma_resv_lock
> > > > 
> > > > Which means you need to move this out, but then there's the
> > > > potential
> > > > cache management issue. Which current gpu drivers just kinda
> > > > ignore
> > > > because it doesn't matter for current use-case, they all cache
> > > > the
> > > > mapping
> > > > for about as long as the attachment exists. You might want to do
> > > > the
> > > > same,
> > > > unless that somehow breaks a use-case you have, I have no idea
> > > > about
> > > > that.
> > > > If something breaks with unmap_attachment moved out of the fence
> > > > handling
> > > > then I guess it's high time to add separate cache-management only
> > > > to
> > > > dma_buf (and that's probably going to be quite some wiring up,
> > > > not
> > > > sure
> > > > even how easy that would be to do nor what exactly the interface
> > > > should
> > > > look like).
> > > 
> > > Ok. Then I'll just cache the mapping for now, I think.
> > 
> > Yeah I think that's simplest. I did ponder a bit and I don't think
> > it'd be
> > too much pain to add the cache-management functions for device
> > attachments/mappings. But it would be quite some typing ...
> > -Sima
> 
> It looks like I actually do have some hardware which requires the cache
> management. If I cache the mappings in both my IIO and USB code, it
> works fine on my ZedBoard, but it doesn't work on my ZCU102.
> 
> (Or maybe it's something else? What I get from USB in that case is a
> stream of zeros, I'd expect it to be more like a stream of
> garbage/stale data).
> 
> So, change of plans; I will now unmap the attachment in the cleanup
> worker after the fence is signalled, and add a warning comment before
> the end of the fence critical section about the need to do cache
> management before the signal.
> 
> Does that work for you?

The trouble is, I'm not sure this works for you. If you rely on the
fences, and you have to do cache management in between dma operations,
then doing the unmap somewhen later will only mostly paper over the issue,
but not consistently.

I think that's really bad because the bugs this will cause are very hard
to track down and with the current infrastructure impossible to fix.

Imo cache the mappings, and then fix the cache management bug properly.

If you want an interim solution that isn't blocked on the dma-buf cache
management api addition, the only thing that works is doing the operations
synchronously in the ioctl call. Then you don't need fences, and you can
guarantee that the unmap has finished before userspace proceeds.

With the dma_fences you can't guarantee that, it's just pure luck.

Cheers, Sima
Daniel Vetter Jan. 18, 2024, 1:59 p.m. UTC | #11
On Thu, Jan 18, 2024 at 02:56:31PM +0100, Daniel Vetter wrote:
> On Mon, Jan 15, 2024 at 01:54:27PM +0100, Paul Cercueil wrote:
> > Hi Daniel / Sima,
> > 
> > Le mardi 09 janvier 2024 à 14:01 +0100, Daniel Vetter a écrit :
> > > On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> > > > Hi Daniel / Sima,
> > > > 
> > > > Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a écrit :
> > > > > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil wrote:
> > > > > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a écrit :
> > > > > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil
> > > > > > > wrote:
> > > > > > > > Hi Daniel (Sima?),
> > > > > > > > 
> > > > > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter a
> > > > > > > > écrit :
> > > > > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul Cercueil
> > > > > > > > > wrote:
> > > > > > > > > > +static void ffs_dmabuf_signal_done(struct
> > > > > > > > > > ffs_dma_fence
> > > > > > > > > > *dma_fence, int ret)
> > > > > > > > > > +{
> > > > > > > > > > +	struct ffs_dmabuf_priv *priv = dma_fence-
> > > > > > > > > > >priv;
> > > > > > > > > > +	struct dma_fence *fence = &dma_fence->base;
> > > > > > > > > > +
> > > > > > > > > > +	dma_fence_get(fence);
> > > > > > > > > > +	fence->error = ret;
> > > > > > > > > > +	dma_fence_signal(fence);
> > > > > > > > > > +
> > > > > > > > > > +	dma_buf_unmap_attachment(priv->attach,
> > > > > > > > > > dma_fence-
> > > > > > > > > > > sgt,
> > > > > > > > > > dma_fence->dir);
> > > > > > > > > > +	dma_fence_put(fence);
> > > > > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > > > > 
> > > > > > > > > So this can in theory take the dma_resv lock, and if the
> > > > > > > > > usb
> > > > > > > > > completion
> > > > > > > > > isn't an unlimited worker this could hold up completion
> > > > > > > > > of
> > > > > > > > > future
> > > > > > > > > dma_fence, resulting in a deadlock.
> > > > > > > > > 
> > > > > > > > > Needs to be checked how usb works, and if stalling
> > > > > > > > > indefinitely
> > > > > > > > > in
> > > > > > > > > the
> > > > > > > > > io_complete callback can hold up the usb stack you need
> > > > > > > > > to:
> > > > > > > > > 
> > > > > > > > > - drop a dma_fence_begin/end_signalling annotations in
> > > > > > > > > here
> > > > > > > > > - pull out the unref stuff into a separate preallocated
> > > > > > > > > worker
> > > > > > > > > (or at
> > > > > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > > > > 
> > > > > > > > Only ffs_dmabuf_put() can attempt to take the dma_resv and
> > > > > > > > would
> > > > > > > > have
> > > > > > > > to be in a worker, right? Everything else would be inside
> > > > > > > > the
> > > > > > > > dma_fence_begin/end_signalling() annotations?
> > > > > > > 
> > > > > > > Yup. Also I noticed that unlike the iio patches you don't
> > > > > > > have
> > > > > > > the
> > > > > > > dma_buf_unmap here in the completion path (or I'm blind?),
> > > > > > > which
> > > > > > > helps a
> > > > > > > lot with avoiding trouble.
> > > > > > 
> > > > > > They both call dma_buf_unmap_attachment() in the "signal done"
> > > > > > callback, the only difference I see is that it is called after
> > > > > > the
> > > > > > dma_fence_put() in the iio patches, while it's called before
> > > > > > dma_fence_put() here.
> > > > > 
> > > > > I was indeed blind ...
> > > > > 
> > > > > So the trouble is this wont work because:
> > > > > - dma_buf_unmap_attachment() requires dma_resv_lock. This is a
> > > > > somewhat
> > > > >   recent-ish change from 47e982d5195d ("dma-buf: Move
> > > > >   dma_buf_map_attachment() to dynamic locking specification"), so
> > > > > maybe
> > > > >   old kernel or you don't have full lockdep enabled to get the
> > > > > right
> > > > >   splat.
> > > > > 
> > > > > - dma_fence critical section forbids dma_resv_lock
> > > > > 
> > > > > Which means you need to move this out, but then there's the
> > > > > potential
> > > > > cache management issue. Which current gpu drivers just kinda
> > > > > ignore
> > > > > because it doesn't matter for current use-case, they all cache
> > > > > the
> > > > > mapping
> > > > > for about as long as the attachment exists. You might want to do
> > > > > the
> > > > > same,
> > > > > unless that somehow breaks a use-case you have, I have no idea
> > > > > about
> > > > > that.
> > > > > If something breaks with unmap_attachment moved out of the fence
> > > > > handling
> > > > > then I guess it's high time to add separate cache-management only
> > > > > to
> > > > > dma_buf (and that's probably going to be quite some wiring up,
> > > > > not
> > > > > sure
> > > > > even how easy that would be to do nor what exactly the interface
> > > > > should
> > > > > look like).
> > > > 
> > > > Ok. Then I'll just cache the mapping for now, I think.
> > > 
> > > Yeah I think that's simplest. I did ponder a bit and I don't think
> > > it'd be
> > > too much pain to add the cache-management functions for device
> > > attachments/mappings. But it would be quite some typing ...
> > > -Sima
> > 
> > It looks like I actually do have some hardware which requires the cache
> > management. If I cache the mappings in both my IIO and USB code, it
> > works fine on my ZedBoard, but it doesn't work on my ZCU102.
> > 
> > (Or maybe it's something else? What I get from USB in that case is a
> > stream of zeros, I'd expect it to be more like a stream of
> > garbage/stale data).
> > 
> > So, change of plans; I will now unmap the attachment in the cleanup
> > worker after the fence is signalled, and add a warning comment before
> > the end of the fence critical section about the need to do cache
> > management before the signal.
> > 
> > Does that work for you?
> 
> The trouble is, I'm not sure this works for you. If you rely on the
> fences, and you have to do cache management in between dma operations,
> then doing the unmap somewhen later will only mostly paper over the issue,
> but not consistently.
> 
> I think that's really bad because the bugs this will cause are very hard
> to track down and with the current infrastructure impossible to fix.
> 
> Imo cache the mappings, and then fix the cache management bug properly.
> 
> If you want an interim solution that isn't blocked on the dma-buf cache
> management api addition, the only thing that works is doing the operations
> synchronously in the ioctl call. Then you don't need fences, and you can
> guarantee that the unmap has finished before userspace proceeds.
> 
> With the dma_fences you can't guarantee that, it's just pure luck.

Maybe a follow up: Double check you really need the cache management
between the dma operations from 2 different devices, and not for the cpu
access that you then probably do to check the result.

Because if the issue is just cpu access, then protecting the cpu access
needs to use the begin/end_cpu_access dma-functions (or the corresponding
ioctl if you use mmap from userspace) anyway, and that should sort out any
issues you have for cpu access.

Just to make sure we're not needlessly trying to fix something that isn't
actually the problem.
-Sima
Paul Cercueil Jan. 18, 2024, 7:39 p.m. UTC | #12
Hi Daniel / Sima,

Le jeudi 18 janvier 2024 à 14:59 +0100, Daniel Vetter a écrit :
> On Thu, Jan 18, 2024 at 02:56:31PM +0100, Daniel Vetter wrote:
> > On Mon, Jan 15, 2024 at 01:54:27PM +0100, Paul Cercueil wrote:
> > > Hi Daniel / Sima,
> > > 
> > > Le mardi 09 janvier 2024 à 14:01 +0100, Daniel Vetter a écrit :
> > > > On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> > > > > Hi Daniel / Sima,
> > > > > 
> > > > > Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a
> > > > > écrit :
> > > > > > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil
> > > > > > wrote:
> > > > > > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a
> > > > > > > écrit :
> > > > > > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil
> > > > > > > > wrote:
> > > > > > > > > Hi Daniel (Sima?),
> > > > > > > > > 
> > > > > > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter
> > > > > > > > > a
> > > > > > > > > écrit :
> > > > > > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul
> > > > > > > > > > Cercueil
> > > > > > > > > > wrote:
> > > > > > > > > > > +static void ffs_dmabuf_signal_done(struct
> > > > > > > > > > > ffs_dma_fence
> > > > > > > > > > > *dma_fence, int ret)
> > > > > > > > > > > +{
> > > > > > > > > > > +	struct ffs_dmabuf_priv *priv =
> > > > > > > > > > > dma_fence-
> > > > > > > > > > > > priv;
> > > > > > > > > > > +	struct dma_fence *fence = &dma_fence-
> > > > > > > > > > > >base;
> > > > > > > > > > > +
> > > > > > > > > > > +	dma_fence_get(fence);
> > > > > > > > > > > +	fence->error = ret;
> > > > > > > > > > > +	dma_fence_signal(fence);
> > > > > > > > > > > +
> > > > > > > > > > > +	dma_buf_unmap_attachment(priv->attach,
> > > > > > > > > > > dma_fence-
> > > > > > > > > > > > sgt,
> > > > > > > > > > > dma_fence->dir);
> > > > > > > > > > > +	dma_fence_put(fence);
> > > > > > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > > > > > 
> > > > > > > > > > So this can in theory take the dma_resv lock, and
> > > > > > > > > > if the
> > > > > > > > > > usb
> > > > > > > > > > completion
> > > > > > > > > > isn't an unlimited worker this could hold up
> > > > > > > > > > completion
> > > > > > > > > > of
> > > > > > > > > > future
> > > > > > > > > > dma_fence, resulting in a deadlock.
> > > > > > > > > > 
> > > > > > > > > > Needs to be checked how usb works, and if stalling
> > > > > > > > > > indefinitely
> > > > > > > > > > in
> > > > > > > > > > the
> > > > > > > > > > io_complete callback can hold up the usb stack you
> > > > > > > > > > need
> > > > > > > > > > to:
> > > > > > > > > > 
> > > > > > > > > > - drop a dma_fence_begin/end_signalling annotations
> > > > > > > > > > in
> > > > > > > > > > here
> > > > > > > > > > - pull out the unref stuff into a separate
> > > > > > > > > > preallocated
> > > > > > > > > > worker
> > > > > > > > > > (or at
> > > > > > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > > > > > 
> > > > > > > > > Only ffs_dmabuf_put() can attempt to take the
> > > > > > > > > dma_resv and
> > > > > > > > > would
> > > > > > > > > have
> > > > > > > > > to be in a worker, right? Everything else would be
> > > > > > > > > inside
> > > > > > > > > the
> > > > > > > > > dma_fence_begin/end_signalling() annotations?
> > > > > > > > 
> > > > > > > > Yup. Also I noticed that unlike the iio patches you
> > > > > > > > don't
> > > > > > > > have
> > > > > > > > the
> > > > > > > > dma_buf_unmap here in the completion path (or I'm
> > > > > > > > blind?),
> > > > > > > > which
> > > > > > > > helps a
> > > > > > > > lot with avoiding trouble.
> > > > > > > 
> > > > > > > They both call dma_buf_unmap_attachment() in the "signal
> > > > > > > done"
> > > > > > > callback, the only difference I see is that it is called
> > > > > > > after
> > > > > > > the
> > > > > > > dma_fence_put() in the iio patches, while it's called
> > > > > > > before
> > > > > > > dma_fence_put() here.
> > > > > > 
> > > > > > I was indeed blind ...
> > > > > > 
> > > > > > So the trouble is this wont work because:
> > > > > > - dma_buf_unmap_attachment() requires dma_resv_lock. This
> > > > > > is a
> > > > > > somewhat
> > > > > >   recent-ish change from 47e982d5195d ("dma-buf: Move
> > > > > >   dma_buf_map_attachment() to dynamic locking
> > > > > > specification"), so
> > > > > > maybe
> > > > > >   old kernel or you don't have full lockdep enabled to get
> > > > > > the
> > > > > > right
> > > > > >   splat.
> > > > > > 
> > > > > > - dma_fence critical section forbids dma_resv_lock
> > > > > > 
> > > > > > Which means you need to move this out, but then there's the
> > > > > > potential
> > > > > > cache management issue. Which current gpu drivers just
> > > > > > kinda
> > > > > > ignore
> > > > > > because it doesn't matter for current use-case, they all
> > > > > > cache
> > > > > > the
> > > > > > mapping
> > > > > > for about as long as the attachment exists. You might want
> > > > > > to do
> > > > > > the
> > > > > > same,
> > > > > > unless that somehow breaks a use-case you have, I have no
> > > > > > idea
> > > > > > about
> > > > > > that.
> > > > > > If something breaks with unmap_attachment moved out of the
> > > > > > fence
> > > > > > handling
> > > > > > then I guess it's high time to add separate cache-
> > > > > > management only
> > > > > > to
> > > > > > dma_buf (and that's probably going to be quite some wiring
> > > > > > up,
> > > > > > not
> > > > > > sure
> > > > > > even how easy that would be to do nor what exactly the
> > > > > > interface
> > > > > > should
> > > > > > look like).
> > > > > 
> > > > > Ok. Then I'll just cache the mapping for now, I think.
> > > > 
> > > > Yeah I think that's simplest. I did ponder a bit and I don't
> > > > think
> > > > it'd be
> > > > too much pain to add the cache-management functions for device
> > > > attachments/mappings. But it would be quite some typing ...
> > > > -Sima
> > > 
> > > It looks like I actually do have some hardware which requires the
> > > cache
> > > management. If I cache the mappings in both my IIO and USB code,
> > > it
> > > works fine on my ZedBoard, but it doesn't work on my ZCU102.
> > > 
> > > (Or maybe it's something else? What I get from USB in that case
> > > is a
> > > stream of zeros, I'd expect it to be more like a stream of
> > > garbage/stale data).
> > > 
> > > So, change of plans; I will now unmap the attachment in the
> > > cleanup
> > > worker after the fence is signalled, and add a warning comment
> > > before
> > > the end of the fence critical section about the need to do cache
> > > management before the signal.
> > > 
> > > Does that work for you?
> > 
> > The trouble is, I'm not sure this works for you. If you rely on the
> > fences, and you have to do cache management in between dma
> > operations,
> > then doing the unmap somewhen later will only mostly paper over the
> > issue,
> > but not consistently.
> > 
> > I think that's really bad because the bugs this will cause are very
> > hard
> > to track down and with the current infrastructure impossible to
> > fix.
> > 
> > Imo cache the mappings, and then fix the cache management bug
> > properly.
> > 
> > If you want an interim solution that isn't blocked on the dma-buf
> > cache
> > management api addition, the only thing that works is doing the
> > operations
> > synchronously in the ioctl call. Then you don't need fences, and
> > you can
> > guarantee that the unmap has finished before userspace proceeds.
> > 
> > With the dma_fences you can't guarantee that, it's just pure luck.
> 
> Maybe a follow up: Double check you really need the cache management
> between the dma operations from 2 different devices, and not for the
> cpu
> access that you then probably do to check the result.
> 
> Because if the issue is just cpu access, then protecting the cpu
> access
> needs to use the begin/end_cpu_access dma-functions (or the
> corresponding
> ioctl if you use mmap from userspace) anyway, and that should sort
> out any
> issues you have for cpu access.
> 
> Just to make sure we're not needlessly trying to fix something that
> isn't
> actually the problem.

I am not doing any CPU access - I'm just attaching the same DMABUF to
IIO and USB and use the new IOCTLs to transfer data.

Can I just roll my own cache management then, using
dma_sync_sg_for_cpu/device? I did a quick-and-dirty check with it, and
it seems to make things work with cached mappings.

> -Sima

Cheers,
-Paul
Daniel Vetter Jan. 18, 2024, 9:05 p.m. UTC | #13
On Thu, Jan 18, 2024 at 08:39:23PM +0100, Paul Cercueil wrote:
> Hi Daniel / Sima,
> 
> Le jeudi 18 janvier 2024 à 14:59 +0100, Daniel Vetter a écrit :
> > On Thu, Jan 18, 2024 at 02:56:31PM +0100, Daniel Vetter wrote:
> > > On Mon, Jan 15, 2024 at 01:54:27PM +0100, Paul Cercueil wrote:
> > > > Hi Daniel / Sima,
> > > > 
> > > > Le mardi 09 janvier 2024 à 14:01 +0100, Daniel Vetter a écrit :
> > > > > On Tue, Jan 09, 2024 at 12:06:58PM +0100, Paul Cercueil wrote:
> > > > > > Hi Daniel / Sima,
> > > > > > 
> > > > > > Le lundi 08 janvier 2024 à 20:19 +0100, Daniel Vetter a
> > > > > > écrit :
> > > > > > > On Mon, Jan 08, 2024 at 05:27:33PM +0100, Paul Cercueil
> > > > > > > wrote:
> > > > > > > > Le lundi 08 janvier 2024 à 16:29 +0100, Daniel Vetter a
> > > > > > > > écrit :
> > > > > > > > > On Mon, Jan 08, 2024 at 03:21:21PM +0100, Paul Cercueil
> > > > > > > > > wrote:
> > > > > > > > > > Hi Daniel (Sima?),
> > > > > > > > > > 
> > > > > > > > > > Le lundi 08 janvier 2024 à 13:39 +0100, Daniel Vetter
> > > > > > > > > > a
> > > > > > > > > > écrit :
> > > > > > > > > > > On Mon, Jan 08, 2024 at 01:00:55PM +0100, Paul
> > > > > > > > > > > Cercueil
> > > > > > > > > > > wrote:
> > > > > > > > > > > > +static void ffs_dmabuf_signal_done(struct
> > > > > > > > > > > > ffs_dma_fence
> > > > > > > > > > > > *dma_fence, int ret)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +	struct ffs_dmabuf_priv *priv =
> > > > > > > > > > > > dma_fence-
> > > > > > > > > > > > > priv;
> > > > > > > > > > > > +	struct dma_fence *fence = &dma_fence-
> > > > > > > > > > > > >base;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	dma_fence_get(fence);
> > > > > > > > > > > > +	fence->error = ret;
> > > > > > > > > > > > +	dma_fence_signal(fence);
> > > > > > > > > > > > +
> > > > > > > > > > > > +	dma_buf_unmap_attachment(priv->attach,
> > > > > > > > > > > > dma_fence-
> > > > > > > > > > > > > sgt,
> > > > > > > > > > > > dma_fence->dir);
> > > > > > > > > > > > +	dma_fence_put(fence);
> > > > > > > > > > > > +	ffs_dmabuf_put(priv->attach);
> > > > > > > > > > > 
> > > > > > > > > > > So this can in theory take the dma_resv lock, and
> > > > > > > > > > > if the
> > > > > > > > > > > usb
> > > > > > > > > > > completion
> > > > > > > > > > > isn't an unlimited worker this could hold up
> > > > > > > > > > > completion
> > > > > > > > > > > of
> > > > > > > > > > > future
> > > > > > > > > > > dma_fence, resulting in a deadlock.
> > > > > > > > > > > 
> > > > > > > > > > > Needs to be checked how usb works, and if stalling
> > > > > > > > > > > indefinitely
> > > > > > > > > > > in
> > > > > > > > > > > the
> > > > > > > > > > > io_complete callback can hold up the usb stack you
> > > > > > > > > > > need
> > > > > > > > > > > to:
> > > > > > > > > > > 
> > > > > > > > > > > - drop a dma_fence_begin/end_signalling annotations
> > > > > > > > > > > in
> > > > > > > > > > > here
> > > > > > > > > > > - pull out the unref stuff into a separate
> > > > > > > > > > > preallocated
> > > > > > > > > > > worker
> > > > > > > > > > > (or at
> > > > > > > > > > >   least the final unrefs for ffs_dma_buf).
> > > > > > > > > > 
> > > > > > > > > > Only ffs_dmabuf_put() can attempt to take the
> > > > > > > > > > dma_resv and
> > > > > > > > > > would
> > > > > > > > > > have
> > > > > > > > > > to be in a worker, right? Everything else would be
> > > > > > > > > > inside
> > > > > > > > > > the
> > > > > > > > > > dma_fence_begin/end_signalling() annotations?
> > > > > > > > > 
> > > > > > > > > Yup. Also I noticed that unlike the iio patches you
> > > > > > > > > don't
> > > > > > > > > have
> > > > > > > > > the
> > > > > > > > > dma_buf_unmap here in the completion path (or I'm
> > > > > > > > > blind?),
> > > > > > > > > which
> > > > > > > > > helps a
> > > > > > > > > lot with avoiding trouble.
> > > > > > > > 
> > > > > > > > They both call dma_buf_unmap_attachment() in the "signal
> > > > > > > > done"
> > > > > > > > callback, the only difference I see is that it is called
> > > > > > > > after
> > > > > > > > the
> > > > > > > > dma_fence_put() in the iio patches, while it's called
> > > > > > > > before
> > > > > > > > dma_fence_put() here.
> > > > > > > 
> > > > > > > I was indeed blind ...
> > > > > > > 
> > > > > > > So the trouble is this wont work because:
> > > > > > > - dma_buf_unmap_attachment() requires dma_resv_lock. This
> > > > > > > is a
> > > > > > > somewhat
> > > > > > >   recent-ish change from 47e982d5195d ("dma-buf: Move
> > > > > > >   dma_buf_map_attachment() to dynamic locking
> > > > > > > specification"), so
> > > > > > > maybe
> > > > > > >   old kernel or you don't have full lockdep enabled to get
> > > > > > > the
> > > > > > > right
> > > > > > >   splat.
> > > > > > > 
> > > > > > > - dma_fence critical section forbids dma_resv_lock
> > > > > > > 
> > > > > > > Which means you need to move this out, but then there's the
> > > > > > > potential
> > > > > > > cache management issue. Which current gpu drivers just
> > > > > > > kinda
> > > > > > > ignore
> > > > > > > because it doesn't matter for current use-case, they all
> > > > > > > cache
> > > > > > > the
> > > > > > > mapping
> > > > > > > for about as long as the attachment exists. You might want
> > > > > > > to do
> > > > > > > the
> > > > > > > same,
> > > > > > > unless that somehow breaks a use-case you have, I have no
> > > > > > > idea
> > > > > > > about
> > > > > > > that.
> > > > > > > If something breaks with unmap_attachment moved out of the
> > > > > > > fence
> > > > > > > handling
> > > > > > > then I guess it's high time to add separate cache-
> > > > > > > management only
> > > > > > > to
> > > > > > > dma_buf (and that's probably going to be quite some wiring
> > > > > > > up,
> > > > > > > not
> > > > > > > sure
> > > > > > > even how easy that would be to do nor what exactly the
> > > > > > > interface
> > > > > > > should
> > > > > > > look like).
> > > > > > 
> > > > > > Ok. Then I'll just cache the mapping for now, I think.
> > > > > 
> > > > > Yeah I think that's simplest. I did ponder a bit and I don't
> > > > > think
> > > > > it'd be
> > > > > too much pain to add the cache-management functions for device
> > > > > attachments/mappings. But it would be quite some typing ...
> > > > > -Sima
> > > > 
> > > > It looks like I actually do have some hardware which requires the
> > > > cache
> > > > management. If I cache the mappings in both my IIO and USB code,
> > > > it
> > > > works fine on my ZedBoard, but it doesn't work on my ZCU102.
> > > > 
> > > > (Or maybe it's something else? What I get from USB in that case
> > > > is a
> > > > stream of zeros, I'd expect it to be more like a stream of
> > > > garbage/stale data).
> > > > 
> > > > So, change of plans; I will now unmap the attachment in the
> > > > cleanup
> > > > worker after the fence is signalled, and add a warning comment
> > > > before
> > > > the end of the fence critical section about the need to do cache
> > > > management before the signal.
> > > > 
> > > > Does that work for you?
> > > 
> > > The trouble is, I'm not sure this works for you. If you rely on the
> > > fences, and you have to do cache management in between dma
> > > operations,
> > > then doing the unmap somewhen later will only mostly paper over the
> > > issue,
> > > but not consistently.
> > > 
> > > I think that's really bad because the bugs this will cause are very
> > > hard
> > > to track down and with the current infrastructure impossible to
> > > fix.
> > > 
> > > Imo cache the mappings, and then fix the cache management bug
> > > properly.
> > > 
> > > If you want an interim solution that isn't blocked on the dma-buf
> > > cache
> > > management api addition, the only thing that works is doing the
> > > operations
> > > synchronously in the ioctl call. Then you don't need fences, and
> > > you can
> > > guarantee that the unmap has finished before userspace proceeds.
> > > 
> > > With the dma_fences you can't guarantee that, it's just pure luck.
> > 
> > Maybe a follow up: Double check you really need the cache management
> > between the dma operations from 2 different devices, and not for the
> > cpu
> > access that you then probably do to check the result.
> > 
> > Because if the issue is just cpu access, then protecting the cpu
> > access
> > needs to use the begin/end_cpu_access dma-functions (or the
> > corresponding
> > ioctl if you use mmap from userspace) anyway, and that should sort
> > out any
> > issues you have for cpu access.
> > 
> > Just to make sure we're not needlessly trying to fix something that
> > isn't
> > actually the problem.
> 
> I am not doing any CPU access - I'm just attaching the same DMABUF to
> IIO and USB and use the new IOCTLs to transfer data.
> 
> Can I just roll my own cache management then, using
> dma_sync_sg_for_cpu/device? I did a quick-and-dirty check with it, and
> it seems to make things work with cached mappings.

Nope, because you might have an sg list which does not work for these
apis. Only the exporter knows whether it's dma-api allocated (in which
case these are the right apis), or whether nothing is needed or something
driver private.

So I'm afraid, we need to wire these through. It shouldn't be too bad
though because we really only need to wire these through for the cases you
need them, not for all dma-buf exporters. The one tricky part would what
we call the functions, since I guess you need to call _for_cpu() after you
finished access by the first device, and then _for_device() before
starting the access on the next one? That's a bit confusing lingo in a
dma-buf context where you only move data ownership from one device to the
other, so I think for dma_buf we want maybe dma_buf_attachment_end_access (for
sync_for_cpu) and dma_buf_attachment_begin_access (for sync_for_device) to
be consistent with the cpu flush functions.

Well maybe drop the _attachment_ if you want since other functions like
dma_buf_pin are also not consistent with the naming.

Cheers, Sima
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index ed2a6d5fcef7..9df1f5abb0d4 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -15,6 +15,9 @@ 
 /* #define VERBOSE_DEBUG */
 
 #include <linux/blkdev.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-resv.h>
 #include <linux/pagemap.h>
 #include <linux/export.h>
 #include <linux/fs_parser.h>
@@ -43,6 +46,8 @@ 
 
 #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
 
+MODULE_IMPORT_NS(DMA_BUF);
+
 /* Reference counter handling */
 static void ffs_data_get(struct ffs_data *ffs);
 static void ffs_data_put(struct ffs_data *ffs);
@@ -124,6 +129,21 @@  struct ffs_ep {
 	u8				num;
 };
 
+struct ffs_dmabuf_priv {
+	struct list_head entry;
+	struct kref ref;
+	struct dma_buf_attachment *attach;
+	spinlock_t lock;
+	u64 context;
+};
+
+struct ffs_dma_fence {
+	struct dma_fence base;
+	struct ffs_dmabuf_priv *priv;
+	struct sg_table *sgt;
+	enum dma_data_direction dir;
+};
+
 struct ffs_epfile {
 	/* Protects ep->ep and ep->req. */
 	struct mutex			mutex;
@@ -197,6 +217,8 @@  struct ffs_epfile {
 	unsigned char			isoc;	/* P: ffs->eps_lock */
 
 	unsigned char			_pad;
+
+	struct list_head		dmabufs;
 };
 
 struct ffs_buffer {
@@ -1271,10 +1293,44 @@  static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 	return res;
 }
 
+static void ffs_dmabuf_release(struct kref *ref)
+{
+	struct ffs_dmabuf_priv *priv = container_of(ref, struct ffs_dmabuf_priv, ref);
+	struct dma_buf_attachment *attach = priv->attach;
+	struct dma_buf *dmabuf = attach->dmabuf;
+
+	pr_debug("FFS DMABUF release\n");
+	dma_buf_detach(attach->dmabuf, attach);
+	dma_buf_put(dmabuf);
+
+	list_del(&priv->entry);
+	kfree(priv);
+}
+
+static void ffs_dmabuf_get(struct dma_buf_attachment *attach)
+{
+	struct ffs_dmabuf_priv *priv = attach->importer_priv;
+
+	kref_get(&priv->ref);
+}
+
+static void ffs_dmabuf_put(struct dma_buf_attachment *attach)
+{
+	struct ffs_dmabuf_priv *priv = attach->importer_priv;
+
+	kref_put(&priv->ref, ffs_dmabuf_release);
+}
+
 static int
 ffs_epfile_release(struct inode *inode, struct file *file)
 {
 	struct ffs_epfile *epfile = inode->i_private;
+	struct ffs_dmabuf_priv *priv, *tmp;
+
+	/* Close all attached DMABUFs */
+	list_for_each_entry_safe(priv, tmp, &epfile->dmabufs, entry) {
+		ffs_dmabuf_put(priv->attach);
+	}
 
 	__ffs_epfile_read_buffer_free(epfile);
 	ffs_data_closed(epfile->ffs);
@@ -1282,6 +1338,328 @@  ffs_epfile_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static void ffs_dmabuf_signal_done(struct ffs_dma_fence *dma_fence, int ret)
+{
+	struct ffs_dmabuf_priv *priv = dma_fence->priv;
+	struct dma_fence *fence = &dma_fence->base;
+
+	dma_fence_get(fence);
+	fence->error = ret;
+	dma_fence_signal(fence);
+
+	dma_buf_unmap_attachment(priv->attach, dma_fence->sgt, dma_fence->dir);
+	dma_fence_put(fence);
+	ffs_dmabuf_put(priv->attach);
+}
+
+static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep,
+					  struct usb_request *req)
+{
+	pr_debug("FFS: DMABUF transfer complete, status=%d\n", req->status);
+	ffs_dmabuf_signal_done(req->context, req->status);
+	usb_ep_free_request(ep, req);
+}
+
+static const char *ffs_dmabuf_get_driver_name(struct dma_fence *fence)
+{
+	return "functionfs";
+}
+
+static const char *ffs_dmabuf_get_timeline_name(struct dma_fence *fence)
+{
+	return "";
+}
+
+static void ffs_dmabuf_fence_release(struct dma_fence *fence)
+{
+	struct ffs_dma_fence *dma_fence =
+		container_of(fence, struct ffs_dma_fence, base);
+
+	kfree(dma_fence);
+}
+
+static const struct dma_fence_ops ffs_dmabuf_fence_ops = {
+	.get_driver_name	= ffs_dmabuf_get_driver_name,
+	.get_timeline_name	= ffs_dmabuf_get_timeline_name,
+	.release		= ffs_dmabuf_fence_release,
+};
+
+static int ffs_dma_resv_lock(struct dma_buf *dmabuf, bool nonblock)
+{
+	int ret;
+
+	ret = dma_resv_lock_interruptible(dmabuf->resv, NULL);
+	if (ret) {
+		if (ret != -EDEADLK)
+			return ret;
+		if (nonblock)
+			return -EBUSY;
+
+		ret = dma_resv_lock_slow_interruptible(dmabuf->resv, NULL);
+	}
+
+	return ret;
+}
+
+static struct dma_buf_attachment *
+ffs_dmabuf_find_attachment(struct device *dev, struct dma_buf *dmabuf,
+			   bool nonblock)
+{
+	struct dma_buf_attachment *elm, *attach = NULL;
+	int ret;
+
+	ret = ffs_dma_resv_lock(dmabuf, nonblock);
+	if (ret)
+		return ERR_PTR(ret);
+
+	list_for_each_entry(elm, &dmabuf->attachments, node) {
+		if (elm->dev == dev) {
+			attach = elm;
+			break;
+		}
+	}
+
+	if (attach)
+		ffs_dmabuf_get(elm);
+
+	dma_resv_unlock(dmabuf->resv);
+
+	return attach ?: ERR_PTR(-EPERM);
+}
+
+static int ffs_dmabuf_attach(struct file *file, int fd)
+{
+	struct ffs_epfile *epfile = file->private_data;
+	struct usb_gadget *gadget = epfile->ffs->gadget;
+	struct dma_buf_attachment *attach;
+	struct ffs_dmabuf_priv *priv;
+	struct dma_buf *dmabuf;
+	int err;
+
+	if (!gadget || !gadget->sg_supported)
+		return -EPERM;
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	attach = dma_buf_attach(dmabuf, gadget->dev.parent);
+	if (IS_ERR(attach)) {
+		err = PTR_ERR(attach);
+		goto err_dmabuf_put;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		err = -ENOMEM;
+		goto err_dmabuf_detach;
+	}
+
+	attach->importer_priv = priv;
+
+	priv->attach = attach;
+	spin_lock_init(&priv->lock);
+	kref_init(&priv->ref);
+	priv->context = dma_fence_context_alloc(1);
+
+	list_add(&priv->entry, &epfile->dmabufs);
+
+	return 0;
+
+err_dmabuf_detach:
+	dma_buf_detach(dmabuf, attach);
+err_dmabuf_put:
+	dma_buf_put(dmabuf);
+
+	return err;
+}
+
+static int ffs_dmabuf_detach(struct file *file, int fd)
+{
+	struct ffs_epfile *epfile = file->private_data;
+	struct usb_gadget *gadget = epfile->ffs->gadget;
+	bool nonblock = file->f_flags & O_NONBLOCK;
+	struct dma_buf_attachment *attach;
+	struct dma_buf *dmabuf;
+	int ret = 0;
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
+					    dmabuf, nonblock);
+	if (IS_ERR(attach)) {
+		ret = PTR_ERR(attach);
+		goto out_dmabuf_put;
+	}
+
+	/*
+	 * Unref twice to release the reference obtained with
+	 * ffs_dmabuf_find_attachment() above, and the one obtained in
+	 * ffs_dmabuf_attach().
+	 */
+	ffs_dmabuf_put(attach);
+	ffs_dmabuf_put(attach);
+
+out_dmabuf_put:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
+static int ffs_dmabuf_transfer(struct file *file,
+			       const struct usb_ffs_dmabuf_transfer_req *req)
+{
+	bool dma_to_ram, nonblock = file->f_flags & O_NONBLOCK;
+	struct ffs_epfile *epfile = file->private_data;
+	struct usb_gadget *gadget = epfile->ffs->gadget;
+	struct dma_buf_attachment *attach;
+	struct ffs_dmabuf_priv *priv;
+	enum dma_data_direction dir;
+	struct ffs_dma_fence *fence;
+	struct usb_request *usb_req;
+	struct sg_table *sg_table;
+	struct dma_buf *dmabuf;
+	struct ffs_ep *ep;
+	int ret;
+
+	if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
+		return -EINVAL;
+
+	dmabuf = dma_buf_get(req->fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	if (req->length > dmabuf->size || req->length == 0) {
+		ret = -EINVAL;
+		goto err_dmabuf_put;
+	}
+
+	attach = ffs_dmabuf_find_attachment(gadget->dev.parent,
+					    dmabuf, nonblock);
+	if (IS_ERR(attach)) {
+		ret = PTR_ERR(attach);
+		goto err_dmabuf_put;
+	}
+
+	priv = attach->importer_priv;
+
+	if (epfile->in)
+		dir = DMA_FROM_DEVICE;
+	else
+		dir = DMA_TO_DEVICE;
+
+	sg_table = dma_buf_map_attachment(attach, dir);
+	if (IS_ERR(sg_table)) {
+		ret = PTR_ERR(sg_table);
+		goto err_attachment_put;
+	}
+
+	ep = ffs_epfile_wait_ep(file);
+	if (IS_ERR(ep)) {
+		ret = PTR_ERR(ep);
+		goto err_unmap_attachment;
+	}
+
+	ret = ffs_dma_resv_lock(dmabuf, nonblock);
+	if (ret)
+		goto err_unmap_attachment;
+
+	/* Make sure we don't have writers */
+	if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) {
+		pr_debug("FFS WRITE fence is not signaled\n");
+		ret = -EBUSY;
+		goto err_resv_unlock;
+	}
+
+	dma_to_ram = dir == DMA_FROM_DEVICE;
+
+	/* If we're writing to the DMABUF, make sure we don't have readers */
+	if (dma_to_ram &&
+	    !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) {
+		pr_debug("FFS READ fence is not signaled\n");
+		ret = -EBUSY;
+		goto err_resv_unlock;
+	}
+
+	ret = dma_resv_reserve_fences(dmabuf->resv, 1);
+	if (ret)
+		goto err_resv_unlock;
+
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto err_resv_unlock;
+	}
+
+	fence->sgt = sg_table;
+	fence->dir = dir;
+	fence->priv = priv;
+
+	dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
+		       &priv->lock, priv->context, 0);
+
+	spin_lock_irq(&epfile->ffs->eps_lock);
+
+	/* In the meantime, endpoint got disabled or changed. */
+	if (epfile->ep != ep) {
+		ret = -ESHUTDOWN;
+		goto err_fence_put;
+	}
+
+	usb_req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
+	if (!usb_req) {
+		ret = -ENOMEM;
+		goto err_fence_put;
+	}
+
+	dma_resv_add_fence(dmabuf->resv, &fence->base,
+			   dma_resv_usage_rw(dma_to_ram));
+	dma_resv_unlock(dmabuf->resv);
+
+	/* Now that the dma_fence is in place, queue the transfer. */
+
+	usb_req->length = req->length;
+	usb_req->buf = NULL;
+	usb_req->sg = sg_table->sgl;
+	usb_req->num_sgs = sg_nents_for_len(sg_table->sgl, req->length);
+	usb_req->sg_was_mapped = true;
+	usb_req->context  = fence;
+	usb_req->complete = ffs_epfile_dmabuf_io_complete;
+
+	ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC);
+	if (ret) {
+		usb_ep_free_request(ep->ep, usb_req);
+
+		spin_unlock_irq(&epfile->ffs->eps_lock);
+
+		pr_warn("FFS: Failed to queue DMABUF: %d\n", ret);
+		ffs_dmabuf_signal_done(fence, ret);
+		goto out_dma_buf_put;
+	}
+
+	spin_unlock_irq(&epfile->ffs->eps_lock);
+
+out_dma_buf_put:
+	dma_buf_put(dmabuf);
+
+	return ret;
+
+err_fence_put:
+	spin_unlock_irq(&epfile->ffs->eps_lock);
+	dma_fence_put(&fence->base);
+err_resv_unlock:
+	dma_resv_unlock(dmabuf->resv);
+err_unmap_attachment:
+	dma_buf_unmap_attachment(attach, sg_table, dir);
+err_attachment_put:
+	ffs_dmabuf_put(attach);
+err_dmabuf_put:
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
 static long ffs_epfile_ioctl(struct file *file, unsigned code,
 			     unsigned long value)
 {
@@ -1292,6 +1670,44 @@  static long ffs_epfile_ioctl(struct file *file, unsigned code,
 	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
 		return -ENODEV;
 
+	switch (code) {
+	case FUNCTIONFS_DMABUF_ATTACH:
+	{
+		int fd;
+
+		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		return ffs_dmabuf_attach(file, fd);
+	}
+	case FUNCTIONFS_DMABUF_DETACH:
+	{
+		int fd;
+
+		if (copy_from_user(&fd, (void __user *)value, sizeof(fd))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		return ffs_dmabuf_detach(file, fd);
+	}
+	case FUNCTIONFS_DMABUF_TRANSFER:
+	{
+		struct usb_ffs_dmabuf_transfer_req req;
+
+		if (copy_from_user(&req, (void __user *)value, sizeof(req))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		return ffs_dmabuf_transfer(file, &req);
+	}
+	default:
+		break;
+	}
+
 	/* Wait for endpoint to be enabled */
 	ep = ffs_epfile_wait_ep(file);
 	if (IS_ERR(ep))
@@ -1869,6 +2285,7 @@  static int ffs_epfiles_create(struct ffs_data *ffs)
 	for (i = 1; i <= count; ++i, ++epfile) {
 		epfile->ffs = ffs;
 		mutex_init(&epfile->mutex);
+		INIT_LIST_HEAD(&epfile->dmabufs);
 		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
 			sprintf(epfile->name, "ep%02x", ffs->eps_addrmap[i]);
 		else
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 078098e73fd3..9f88de9c3d66 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -86,6 +86,22 @@  struct usb_ext_prop_desc {
 	__le16	wPropertyNameLength;
 } __attribute__((packed));
 
+/* Flags for usb_ffs_dmabuf_transfer_req->flags (none for now) */
+#define USB_FFS_DMABUF_TRANSFER_MASK	0x0
+
+/**
+ * struct usb_ffs_dmabuf_transfer_req - Transfer request for a DMABUF object
+ * @fd:		file descriptor of the DMABUF object
+ * @flags:	one or more USB_FFS_DMABUF_TRANSFER_* flags
+ * @length:	number of bytes used in this DMABUF for the data transfer.
+ *		Should generally be set to the DMABUF's size.
+ */
+struct usb_ffs_dmabuf_transfer_req {
+	int fd;
+	__u32 flags;
+	__u64 length;
+} __attribute__((packed));
+
 #ifndef __KERNEL__
 
 /*
@@ -290,6 +306,31 @@  struct usb_functionfs_event {
 #define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
 					     struct usb_endpoint_descriptor)
 
+/*
+ * Attach the DMABUF object, identified by its file descriptor, to the
+ * data endpoint. Returns zero on success, and a negative errno value
+ * on error.
+ */
+#define FUNCTIONFS_DMABUF_ATTACH	_IOW('g', 131, int)
+
 
+/*
+ * Detach the given DMABUF object, identified by its file descriptor,
+ * from the data endpoint. Returns zero on success, and a negative
+ * errno value on error. Note that closing the endpoint's file
+ * descriptor will automatically detach all attached DMABUFs.
+ */
+#define FUNCTIONFS_DMABUF_DETACH	_IOW('g', 132, int)
+
+/*
+ * Enqueue the previously attached DMABUF to the transfer queue.
+ * The argument is a structure that packs the DMABUF's file descriptor,
+ * the size in bytes to transfer (which should generally correspond to
+ * the size of the DMABUF), and a 'flags' field which is unused
+ * for now. Returns zero on success, and a negative errno value on
+ * error.
+ */
+#define FUNCTIONFS_DMABUF_TRANSFER	_IOW('g', 133, \
+					     struct usb_ffs_dmabuf_transfer_req)
 
 #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */