diff mbox series

[v2,3/3] usb: chipidea: udc: create bounce buffer for problem sglist entries if possible

Message ID 20240923081203.2851768-3-xu.yang_2@nxp.com (mailing list archive)
State New
Headers show
Series [v2,1/3] usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag | expand

Commit Message

Xu Yang Sept. 23, 2024, 8:12 a.m. UTC
The chipidea controller doesn't fully support sglist, such as it can not
transfer data spanned more dTDs to form a bus packet, so it can only work
on very limited cases.

The limitations as below:
1. the end address of the first sg buffer must be 4KB aligned.
2. the start and end address of the middle sg buffer must be 4KB aligned.
3. the start address of the first sg buffer must be 4KB aligned.

However, not all the use cases violate these limitations. To make the
controller compatible with most of the cases, this will try to bounce the
problem sglist entries which can be found by sglist_get_invalid_entry().
Then a bounced line buffer (the size will roundup to page size) will be
allocated to replace the remaining problem sg entries. The data will be
copied between problem sg entries and bounce buffer according to the
transfer direction. The bounce buffer will be freed when the request
completed.

Acked-by: Peter Chen <peter.chen@kernel.com>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>

---
Changes in v2:
 - judge ci->has_short_pkt_limit
 - add Ack-by tag
---
 drivers/usb/chipidea/udc.c | 148 +++++++++++++++++++++++++++++++++++++
 drivers/usb/chipidea/udc.h |   2 +
 2 files changed, 150 insertions(+)

Comments

Kees Bakker Oct. 8, 2024, 8:27 p.m. UTC | #1
Op 23-09-2024 om 10:12 schreef Xu Yang:
> The chipidea controller doesn't fully support sglist, such as it can not
> transfer data spanned more dTDs to form a bus packet, so it can only work
> on very limited cases.
>
> The limitations as below:
> 1. the end address of the first sg buffer must be 4KB aligned.
> 2. the start and end address of the middle sg buffer must be 4KB aligned.
> 3. the start address of the first sg buffer must be 4KB aligned.
>
> However, not all the use cases violate these limitations. To make the
> controller compatible with most of the cases, this will try to bounce the
> problem sglist entries which can be found by sglist_get_invalid_entry().
> Then a bounced line buffer (the size will roundup to page size) will be
> allocated to replace the remaining problem sg entries. The data will be
> copied between problem sg entries and bounce buffer according to the
> transfer direction. The bounce buffer will be freed when the request
> completed.
>
> Acked-by: Peter Chen <peter.chen@kernel.com>
> Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
>
> ---
> Changes in v2:
>   - judge ci->has_short_pkt_limit
>   - add Ack-by tag
> ---
>   drivers/usb/chipidea/udc.c | 148 +++++++++++++++++++++++++++++++++++++
>   drivers/usb/chipidea/udc.h |   2 +
>   2 files changed, 150 insertions(+)
> [...]
> @@ -552,6 +673,8 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
>   	struct ci_hdrc *ci = hwep->ci;
>   	int ret = 0;
>   	struct td_node *firstnode, *lastnode;
> +	unsigned int bounced_size;
> +	struct scatterlist *sg;
>   
>   	/* don't queue twice */
>   	if (hwreq->req.status == -EALREADY)
> @@ -559,11 +682,29 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
>   
>   	hwreq->req.status = -EALREADY;
>   
> +	if (hwreq->req.num_sgs && hwreq->req.length &&
> +		ci->has_short_pkt_limit) {
> +		ret = sglist_get_invalid_entry(ci->dev->parent, hwep->dir,
> +					&hwreq->req);
> +		if (ret < hwreq->req.num_sgs) {
bounced_size is only initialized here
> +			ret = sglist_do_bounce(hwreq, ret, hwep->dir == TX,
> +					&bounced_size);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
>   	ret = usb_gadget_map_request_by_dev(ci->dev->parent,
>   					    &hwreq->req, hwep->dir);
>   	if (ret)
>   		return ret;
>   
> +	if (hwreq->sgt.sgl) {
> +		/* We've mapped a bigger buffer, now recover the actual size */
> +		sg = sg_last(hwreq->req.sg, hwreq->req.num_sgs);
bounced_size can be uninitialized at this point, if the earlier if 
condition is false.
> +		sg_dma_len(sg) = min(sg_dma_len(sg), bounced_size);
> +	}
> +
>
Xu Yang Oct. 9, 2024, 1:49 a.m. UTC | #2
Hi Kees,

On Tue, Oct 08, 2024 at 10:27:15PM +0200, Kees Bakker wrote:
> Op 23-09-2024 om 10:12 schreef Xu Yang:
> > The chipidea controller doesn't fully support sglist, such as it can not
> > transfer data spanned more dTDs to form a bus packet, so it can only work
> > on very limited cases.
> > 
> > The limitations as below:
> > 1. the end address of the first sg buffer must be 4KB aligned.
> > 2. the start and end address of the middle sg buffer must be 4KB aligned.
> > 3. the start address of the first sg buffer must be 4KB aligned.
> > 
> > However, not all the use cases violate these limitations. To make the
> > controller compatible with most of the cases, this will try to bounce the
> > problem sglist entries which can be found by sglist_get_invalid_entry().
> > Then a bounced line buffer (the size will roundup to page size) will be
> > allocated to replace the remaining problem sg entries. The data will be
> > copied between problem sg entries and bounce buffer according to the
> > transfer direction. The bounce buffer will be freed when the request
> > completed.
> > 
> > Acked-by: Peter Chen <peter.chen@kernel.com>
> > Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
> > 
> > ---
> > Changes in v2:
> >   - judge ci->has_short_pkt_limit
> >   - add Ack-by tag
> > ---
> >   drivers/usb/chipidea/udc.c | 148 +++++++++++++++++++++++++++++++++++++
> >   drivers/usb/chipidea/udc.h |   2 +
> >   2 files changed, 150 insertions(+)
> > [...]
> > @@ -552,6 +673,8 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
> >   	struct ci_hdrc *ci = hwep->ci;
> >   	int ret = 0;
> >   	struct td_node *firstnode, *lastnode;
> > +	unsigned int bounced_size;
> > +	struct scatterlist *sg;
> >   	/* don't queue twice */
> >   	if (hwreq->req.status == -EALREADY)
> > @@ -559,11 +682,29 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
> >   	hwreq->req.status = -EALREADY;
> > +	if (hwreq->req.num_sgs && hwreq->req.length &&
> > +		ci->has_short_pkt_limit) {
> > +		ret = sglist_get_invalid_entry(ci->dev->parent, hwep->dir,
> > +					&hwreq->req);
> > +		if (ret < hwreq->req.num_sgs) {
> bounced_size is only initialized here
> > +			ret = sglist_do_bounce(hwreq, ret, hwep->dir == TX,
> > +					&bounced_size);
> > +			if (ret)
> > +				return ret;
> > +		}
> > +	}
> > +
> >   	ret = usb_gadget_map_request_by_dev(ci->dev->parent,
> >   					    &hwreq->req, hwep->dir);
> >   	if (ret)
> >   		return ret;
> > +	if (hwreq->sgt.sgl) {
> > +		/* We've mapped a bigger buffer, now recover the actual size */
> > +		sg = sg_last(hwreq->req.sg, hwreq->req.num_sgs);
> bounced_size can be uninitialized at this point, if the earlier if condition
> is false.

If sglist_do_bounce() isn't called, hwreq->sgt.sgl is NULL too.

Thanks,
Xu Yang
diff mbox series

Patch

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 5badb39cb2bf..8a9b31fd5c89 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -10,6 +10,7 @@ 
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dmapool.h>
+#include <linux/dma-direct.h>
 #include <linux/err.h>
 #include <linux/irqreturn.h>
 #include <linux/kernel.h>
@@ -540,6 +541,126 @@  static int prepare_td_for_sg(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
 	return ret;
 }
 
+/*
+ * Verify if the scatterlist is valid by iterating each sg entry.
+ * Return invalid sg entry index which is less than num_sgs.
+ */
+static int sglist_get_invalid_entry(struct device *dma_dev, u8 dir,
+			struct usb_request *req)
+{
+	int i;
+	struct scatterlist *s = req->sg;
+
+	if (req->num_sgs == 1)
+		return 1;
+
+	dir = dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+	for (i = 0; i < req->num_sgs; i++, s = sg_next(s)) {
+		/* Only small sg (generally last sg) may be bounced. If
+		 * that happens. we can't ensure the addr is page-aligned
+		 * after dma map.
+		 */
+		if (dma_kmalloc_needs_bounce(dma_dev, s->length, dir))
+			break;
+
+		/* Make sure each sg start address (except first sg) is
+		 * page-aligned and end address (except last sg) is also
+		 * page-aligned.
+		 */
+		if (i == 0) {
+			if (!IS_ALIGNED(s->offset + s->length,
+						CI_HDRC_PAGE_SIZE))
+				break;
+		} else {
+			if (s->offset)
+				break;
+			if (!sg_is_last(s) && !IS_ALIGNED(s->length,
+						CI_HDRC_PAGE_SIZE))
+				break;
+		}
+	}
+
+	return i;
+}
+
+static int sglist_do_bounce(struct ci_hw_req *hwreq, int index,
+			bool copy, unsigned int *bounced)
+{
+	void *buf;
+	int i, ret, nents, num_sgs;
+	unsigned int rest, rounded;
+	struct scatterlist *sg, *src, *dst;
+
+	nents = index + 1;
+	ret = sg_alloc_table(&hwreq->sgt, nents, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	sg = src = hwreq->req.sg;
+	num_sgs = hwreq->req.num_sgs;
+	rest = hwreq->req.length;
+	dst = hwreq->sgt.sgl;
+
+	for (i = 0; i < index; i++) {
+		memcpy(dst, src, sizeof(*src));
+		rest -= src->length;
+		src = sg_next(src);
+		dst = sg_next(dst);
+	}
+
+	/* create one bounce buffer */
+	rounded = round_up(rest, CI_HDRC_PAGE_SIZE);
+	buf = kmalloc(rounded, GFP_KERNEL);
+	if (!buf) {
+		sg_free_table(&hwreq->sgt);
+		return -ENOMEM;
+	}
+
+	sg_set_buf(dst, buf, rounded);
+
+	hwreq->req.sg = hwreq->sgt.sgl;
+	hwreq->req.num_sgs = nents;
+	hwreq->sgt.sgl = sg;
+	hwreq->sgt.nents = num_sgs;
+
+	if (copy)
+		sg_copy_to_buffer(src, num_sgs - index, buf, rest);
+
+	*bounced = rest;
+
+	return 0;
+}
+
+static void sglist_do_debounce(struct ci_hw_req *hwreq, bool copy)
+{
+	void *buf;
+	int i, nents, num_sgs;
+	struct scatterlist *sg, *src, *dst;
+
+	sg = hwreq->req.sg;
+	num_sgs = hwreq->req.num_sgs;
+	src = sg_last(sg, num_sgs);
+	buf = sg_virt(src);
+
+	if (copy) {
+		dst = hwreq->sgt.sgl;
+		for (i = 0; i < num_sgs - 1; i++)
+			dst = sg_next(dst);
+
+		nents = hwreq->sgt.nents - num_sgs + 1;
+		sg_copy_from_buffer(dst, nents, buf, sg_dma_len(src));
+	}
+
+	hwreq->req.sg = hwreq->sgt.sgl;
+	hwreq->req.num_sgs = hwreq->sgt.nents;
+	hwreq->sgt.sgl = sg;
+	hwreq->sgt.nents = num_sgs;
+
+	kfree(buf);
+	sg_free_table(&hwreq->sgt);
+}
+
 /**
  * _hardware_enqueue: configures a request at hardware level
  * @hwep:   endpoint
@@ -552,6 +673,8 @@  static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
 	struct ci_hdrc *ci = hwep->ci;
 	int ret = 0;
 	struct td_node *firstnode, *lastnode;
+	unsigned int bounced_size;
+	struct scatterlist *sg;
 
 	/* don't queue twice */
 	if (hwreq->req.status == -EALREADY)
@@ -559,11 +682,29 @@  static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
 
 	hwreq->req.status = -EALREADY;
 
+	if (hwreq->req.num_sgs && hwreq->req.length &&
+		ci->has_short_pkt_limit) {
+		ret = sglist_get_invalid_entry(ci->dev->parent, hwep->dir,
+					&hwreq->req);
+		if (ret < hwreq->req.num_sgs) {
+			ret = sglist_do_bounce(hwreq, ret, hwep->dir == TX,
+					&bounced_size);
+			if (ret)
+				return ret;
+		}
+	}
+
 	ret = usb_gadget_map_request_by_dev(ci->dev->parent,
 					    &hwreq->req, hwep->dir);
 	if (ret)
 		return ret;
 
+	if (hwreq->sgt.sgl) {
+		/* We've mapped a bigger buffer, now recover the actual size */
+		sg = sg_last(hwreq->req.sg, hwreq->req.num_sgs);
+		sg_dma_len(sg) = min(sg_dma_len(sg), bounced_size);
+	}
+
 	if (hwreq->req.num_mapped_sgs)
 		ret = prepare_td_for_sg(hwep, hwreq);
 	else
@@ -745,6 +886,10 @@  static int _hardware_dequeue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq)
 	usb_gadget_unmap_request_by_dev(hwep->ci->dev->parent,
 					&hwreq->req, hwep->dir);
 
+	/* sglist bounced */
+	if (hwreq->sgt.sgl)
+		sglist_do_debounce(hwreq, hwep->dir == RX);
+
 	hwreq->req.actual += actual;
 
 	if (hwreq->req.status)
@@ -1592,6 +1737,9 @@  static int ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 
 	usb_gadget_unmap_request(&hwep->ci->gadget, req, hwep->dir);
 
+	if (hwreq->sgt.sgl)
+		sglist_do_debounce(hwreq, false);
+
 	req->status = -ECONNRESET;
 
 	if (hwreq->req.complete != NULL) {
diff --git a/drivers/usb/chipidea/udc.h b/drivers/usb/chipidea/udc.h
index 5193df1e18c7..c8a47389a46b 100644
--- a/drivers/usb/chipidea/udc.h
+++ b/drivers/usb/chipidea/udc.h
@@ -69,11 +69,13 @@  struct td_node {
  * @req: request structure for gadget drivers
  * @queue: link to QH list
  * @tds: link to TD list
+ * @sgt: hold original sglist when bounce sglist
  */
 struct ci_hw_req {
 	struct usb_request	req;
 	struct list_head	queue;
 	struct list_head	tds;
+	struct sg_table		sgt;
 };
 
 #ifdef CONFIG_USB_CHIPIDEA_UDC