diff mbox series

[2/2] usb: cdns3: Optimize DMA request buffer allocation

Message ID 1615267180-9289-2-git-send-email-sparmar@cadence.com (mailing list archive)
State New, archived
Headers show
Series [1/2] usb: cdns3: Use dma_pool_* api to alloc trb pool | expand

Commit Message

Sanket Parmar March 9, 2021, 5:19 a.m. UTC
dma_alloc_coherent() might fail on the platform with a small DMA region.

To avoid such failure in cdns3_prepare_aligned_request_buf(),
dma_alloc_coherent() is replaced with kmalloc and dma_map API to
allocate aligned request buffer of dynamic length.

Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reported-by: Aswath Govindraju <a-govindraju@ti.com>
Signed-off-by: Sanket Parmar <sparmar@cadence.com>
---
 drivers/usb/cdns3/cdns3-gadget.c |   73 +++++++++++++++++++++++++------------
 drivers/usb/cdns3/cdns3-gadget.h |    2 +
 2 files changed, 51 insertions(+), 24 deletions(-)

Comments

Christoph Hellwig March 9, 2021, 9:28 a.m. UTC | #1
On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> dma_alloc_coherent() might fail on the platform with a small DMA region.
> 
> To avoid such failure in cdns3_prepare_aligned_request_buf(),
> dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> allocate aligned request buffer of dynamic length.

dma_alloc_noncoherent is the proper API instead of using kmalloc, which
can lead to unaddressable memory that might require bounce buffering.
Sanket Parmar March 9, 2021, 10:18 a.m. UTC | #2
> On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > dma_alloc_coherent() might fail on the platform with a small DMA region.
> >
> > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > allocate aligned request buffer of dynamic length.
> 
> dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> can lead to unaddressable memory that might require bounce buffering.

cdns3 device required DMA coherent buffer to perform operations. So 
dma_alloc_noncoherent will not help here.

Also all gadget classes(except g_ether) use kmalloc to allocated request buffer,
and device driver uses usb_gadget_map_request_by_dev to map the request
buffer. Similar approach is used to allocate aligned buffer. 

Thanks,
Sanket
Christoph Hellwig March 9, 2021, 10:31 a.m. UTC | #3
On Tue, Mar 09, 2021 at 10:18:43AM +0000, Sanket Parmar wrote:
> > On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > > dma_alloc_coherent() might fail on the platform with a small DMA region.
> > >
> > > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > > allocate aligned request buffer of dynamic length.
> > 
> > dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> > can lead to unaddressable memory that might require bounce buffering.
> 
> cdns3 device required DMA coherent buffer to perform operations. So 
> dma_alloc_noncoherent will not help here.
> 
> Also all gadget classes(except g_ether) use kmalloc to allocated request buffer,
> and device driver uses usb_gadget_map_request_by_dev to map the request
> buffer. Similar approach is used to allocate aligned buffer. 

If you can use kmalloc and dma_map_single you can use
dma_alloc_noncoherent per definition.
Sanket Parmar March 9, 2021, 10:49 a.m. UTC | #4
> On Tue, Mar 09, 2021 at 10:18:43AM +0000, Sanket Parmar wrote:
> > > On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > > > dma_alloc_coherent() might fail on the platform with a small DMA
> region.
> > > >
> > > > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > > > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > > > allocate aligned request buffer of dynamic length.
> > >
> > > dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> > > can lead to unaddressable memory that might require bounce buffering.
> >
> > cdns3 device required DMA coherent buffer to perform operations. So
> > dma_alloc_noncoherent will not help here.
> >
> > Also all gadget classes(except g_ether) use kmalloc to allocated request
> buffer,
> > and device driver uses usb_gadget_map_request_by_dev to map the
> request
> > buffer. Similar approach is used to allocate aligned buffer.
> 
> If you can use kmalloc and dma_map_single you can use
> dma_alloc_noncoherent per definition.

Okay. I was not aware of it. I will test it. 
Thank you for your feedback.

--
Sanket
Peter Chen March 14, 2021, 5:10 a.m. UTC | #5
On 21-03-09 06:19:40, Sanket Parmar wrote:
> dma_alloc_coherent() might fail on the platform with a small DMA region.
> 
> To avoid such failure in cdns3_prepare_aligned_request_buf(),
> dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> allocate aligned request buffer of dynamic length.
> 
> Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")

The comment with the 1st patch, it is not a bug-fix.

> Reported-by: Aswath Govindraju <a-govindraju@ti.com>
> Signed-off-by: Sanket Parmar <sparmar@cadence.com>
> ---
>  drivers/usb/cdns3/cdns3-gadget.c |   73 +++++++++++++++++++++++++------------
>  drivers/usb/cdns3/cdns3-gadget.h |    2 +
>  2 files changed, 51 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
> index 5f51215..b4955ce 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.c
> +++ b/drivers/usb/cdns3/cdns3-gadget.c
> @@ -818,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
>  	usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
>  					priv_ep->dir);
>  
> -	if ((priv_req->flags & REQUEST_UNALIGNED) &&
> -	    priv_ep->dir == USB_DIR_OUT && !request->status)
> -		memcpy(request->buf, priv_req->aligned_buf->buf,
> -		       request->length);
> +	if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
> +		struct cdns3_aligned_buf *buf;
> +
> +		buf = priv_req->aligned_buf;
> +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> +			buf->dir);
> +		priv_req->flags &= ~REQUEST_UNALIGNED;
> +
> +		if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> +			memcpy(request->buf, priv_req->aligned_buf->buf,
> +			       request->length);
> +		}
> +
> +		trace_cdns3_free_aligned_request(priv_req);
> +		priv_req->aligned_buf->in_use = 0;
> +		queue_work(system_freezable_wq,
> +			   &priv_dev->aligned_buf_wq);
> +		priv_req->aligned_buf = NULL;
> +
> +	}
>  
>  	priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
>  	/* All TRBs have finished, clear the counter */
> @@ -883,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct work_struct *work)
>  			 * interrupts.
>  			 */
>  			spin_unlock_irqrestore(&priv_dev->lock, flags);
> -			dma_free_coherent(priv_dev->sysdev, buf->size,
> -					  buf->buf, buf->dma);
> +			kfree(buf->buf);
>  			kfree(buf);
>  			spin_lock_irqsave(&priv_dev->lock, flags);
>  		}
> @@ -910,27 +925,16 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
>  		if (!buf)
>  			return -ENOMEM;
>  
> -		buf->size = priv_req->request.length;
> +		buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
> +				usb_ep_align(&(priv_ep->endpoint), priv_req->request.length)
> +				: priv_req->request.length;
>  
> -		buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> -					      buf->size,
> -					      &buf->dma,
> -					      GFP_ATOMIC);
> +		buf->buf = kmalloc(buf->size, GFP_ATOMIC);
>  		if (!buf->buf) {
>  			kfree(buf);
>  			return -ENOMEM;
>  		}
>  
> -		if (priv_req->aligned_buf) {
> -			trace_cdns3_free_aligned_request(priv_req);
> -			priv_req->aligned_buf->in_use = 0;
> -			queue_work(system_freezable_wq,
> -				   &priv_dev->aligned_buf_wq);
> -		}
> -
> -		buf->in_use = 1;
> -		priv_req->aligned_buf = buf;
> -
>  		list_add_tail(&buf->list,
>  			      &priv_dev->aligned_buf_list);
>  	}
> @@ -940,6 +944,27 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
>  		       priv_req->request.length);
>  	}
>  
> +	if (priv_req->aligned_buf) {
> +		trace_cdns3_free_aligned_request(priv_req);
> +		priv_req->aligned_buf->in_use = 0;
> +		queue_work(system_freezable_wq,
> +			   &priv_dev->aligned_buf_wq);

@Pawel, do you remember when this condition is met?

> +	}
> +
> +	buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> +	buf->in_use = 1;
> +	priv_req->aligned_buf = buf;
> +
> +	buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
> +				buf->dir);
> +
> +	if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
> +		dev_err(priv_dev->dev, "Failed to map buffer\n");
> +		kfree(buf->buf);
> +		kfree(buf);
> +		return -EFAULT;
> +	}
> +
>  	priv_req->flags |= REQUEST_UNALIGNED;
>  	trace_cdns3_prepare_aligned_request(priv_req);
>  
> @@ -3088,11 +3113,11 @@ static void cdns3_gadget_exit(struct cdns *cdns)
>  		struct cdns3_aligned_buf *buf;
>  
>  		buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> -		dma_free_coherent(priv_dev->sysdev, buf->size,
> -				  buf->buf,
> -				  buf->dma);
> +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> +			buf->dir);

It only needs to DMA unmap after DMA has completed, this buf will not be
used, otherwise, the kfree below will cause issue.

>  
>  		list_del(&buf->list);
> +		kfree(buf->buf);
>  		kfree(buf);
>  	}
>  
> diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h
> index ecf9b91..c5660f2 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.h
> +++ b/drivers/usb/cdns3/cdns3-gadget.h
> @@ -12,6 +12,7 @@
>  #ifndef __LINUX_CDNS3_GADGET
>  #define __LINUX_CDNS3_GADGET
>  #include <linux/usb/gadget.h>
> +#include <linux/dma-direction.h>
>  
>  /*
>   * USBSS-DEV register interface.
> @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf {
>  	void			*buf;
>  	dma_addr_t		dma;
>  	u32			size;
> +	enum dma_data_direction dir;
>  	unsigned		in_use:1;
>  	struct list_head	list;
>  };
> -- 
> 1.7.1
>
Sanket Parmar March 15, 2021, 3:51 p.m. UTC | #6
> 
> On 21-03-09 06:19:40, Sanket Parmar wrote:
> > dma_alloc_coherent() might fail on the platform with a small DMA region.
> >
> > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > allocate aligned request buffer of dynamic length.
> >
> > Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
> 
> The comment with the 1st patch, it is not a bug-fix.

I will remove this. 

> 
> > Reported-by: Aswath Govindraju <a-govindraju@ti.com>
> > Signed-off-by: Sanket Parmar <sparmar@cadence.com>
> > ---
> >  drivers/usb/cdns3/cdns3-gadget.c |   73 +++++++++++++++++++++++++--
> ----------
> >  drivers/usb/cdns3/cdns3-gadget.h |    2 +
> >  2 files changed, 51 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
> gadget.c
> > index 5f51215..b4955ce 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.c
> > +++ b/drivers/usb/cdns3/cdns3-gadget.c
> > @@ -818,10 +818,26 @@ void cdns3_gadget_giveback(struct
> cdns3_endpoint *priv_ep,
> >  	usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> >  					priv_ep->dir);
> >
> > -	if ((priv_req->flags & REQUEST_UNALIGNED) &&
> > -	    priv_ep->dir == USB_DIR_OUT && !request->status)
> > -		memcpy(request->buf, priv_req->aligned_buf->buf,
> > -		       request->length);
> > +	if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req-
> >aligned_buf) {
> > +		struct cdns3_aligned_buf *buf;
> > +
> > +		buf = priv_req->aligned_buf;
> > +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > +			buf->dir);
> > +		priv_req->flags &= ~REQUEST_UNALIGNED;
> > +
> > +		if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> > +			memcpy(request->buf, priv_req->aligned_buf->buf,
> > +			       request->length);
> > +		}
> > +
> > +		trace_cdns3_free_aligned_request(priv_req);
> > +		priv_req->aligned_buf->in_use = 0;
> > +		queue_work(system_freezable_wq,
> > +			   &priv_dev->aligned_buf_wq);
> > +		priv_req->aligned_buf = NULL;
> > +
> > +	}
> >
> >  	priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
> >  	/* All TRBs have finished, clear the counter */
> > @@ -883,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct
> work_struct *work)
> >  			 * interrupts.
> >  			 */
> >  			spin_unlock_irqrestore(&priv_dev->lock, flags);
> > -			dma_free_coherent(priv_dev->sysdev, buf->size,
> > -					  buf->buf, buf->dma);
> > +			kfree(buf->buf);
> >  			kfree(buf);
> >  			spin_lock_irqsave(&priv_dev->lock, flags);
> >  		}
> > @@ -910,27 +925,16 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> >  		if (!buf)
> >  			return -ENOMEM;
> >
> > -		buf->size = priv_req->request.length;
> > +		buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc)
> ?
> > +				usb_ep_align(&(priv_ep->endpoint),
> priv_req->request.length)
> > +				: priv_req->request.length;
> >
> > -		buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> > -					      buf->size,
> > -					      &buf->dma,
> > -					      GFP_ATOMIC);
> > +		buf->buf = kmalloc(buf->size, GFP_ATOMIC);
> >  		if (!buf->buf) {
> >  			kfree(buf);
> >  			return -ENOMEM;
> >  		}
> >
> > -		if (priv_req->aligned_buf) {
> > -			trace_cdns3_free_aligned_request(priv_req);
> > -			priv_req->aligned_buf->in_use = 0;
> > -			queue_work(system_freezable_wq,
> > -				   &priv_dev->aligned_buf_wq);
> > -		}
> > -
> > -		buf->in_use = 1;
> > -		priv_req->aligned_buf = buf;
> > -
> >  		list_add_tail(&buf->list,
> >  			      &priv_dev->aligned_buf_list);
> >  	}
> > @@ -940,6 +944,27 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> >  		       priv_req->request.length);
> >  	}
> >
> > +	if (priv_req->aligned_buf) {
> > +		trace_cdns3_free_aligned_request(priv_req);
> > +		priv_req->aligned_buf->in_use = 0;
> > +		queue_work(system_freezable_wq,
> > +			   &priv_dev->aligned_buf_wq);
> 
> @Pawel, do you remember when this condition is met?
> 
> > +	}
> > +
> > +	buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > +	buf->in_use = 1;
> > +	priv_req->aligned_buf = buf;
> > +
> > +	buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf-
> >size,
> > +				buf->dir);
> > +
> > +	if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
> > +		dev_err(priv_dev->dev, "Failed to map buffer\n");
> > +		kfree(buf->buf);
> > +		kfree(buf);
> > +		return -EFAULT;
> > +	}
> > +
> >  	priv_req->flags |= REQUEST_UNALIGNED;
> >  	trace_cdns3_prepare_aligned_request(priv_req);
> >
> > @@ -3088,11 +3113,11 @@ static void cdns3_gadget_exit(struct cdns
> *cdns)
> >  		struct cdns3_aligned_buf *buf;
> >
> >  		buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> > -		dma_free_coherent(priv_dev->sysdev, buf->size,
> > -				  buf->buf,
> > -				  buf->dma);
> > +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > +			buf->dir);
> 
> It only needs to DMA unmap after DMA has completed, this buf will not be
> used, otherwise, the kfree below will cause issue.

This part is not clear.  Aligned DMA buffer is allocated and mapped in cdns3_prepare_aligned_request_buf()
and put into aligned_buf_list. While unloading the gadget, We need to undo the same if aligned_buf_list is not
empty.  Am I missing something here? 

Also, I will post v2 of this patch which uses dma_*_noncoherent APIs suggested by Christoph Hellwig.
 
> 
> >
> >  		list_del(&buf->list);
> > +		kfree(buf->buf);
> >  		kfree(buf);
> >  	}
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-
> gadget.h
> > index ecf9b91..c5660f2 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.h
> > +++ b/drivers/usb/cdns3/cdns3-gadget.h
> > @@ -12,6 +12,7 @@
> >  #ifndef __LINUX_CDNS3_GADGET
> >  #define __LINUX_CDNS3_GADGET
> >  #include <linux/usb/gadget.h>
> > +#include <linux/dma-direction.h>
> >
> >  /*
> >   * USBSS-DEV register interface.
> > @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf {
> >  	void			*buf;
> >  	dma_addr_t		dma;
> >  	u32			size;
> > +	enum dma_data_direction dir;
> >  	unsigned		in_use:1;
> >  	struct list_head	list;
> >  };
> > --
> > 1.7.1
> >
> 
> --
> 
> Thanks,
> Peter Chen


Thanks,
Sanket
Peter Chen March 16, 2021, 12:33 a.m. UTC | #7
On 21-03-15 15:51:04, Sanket Parmar wrote:
> > > +
> > >  	priv_req->flags |= REQUEST_UNALIGNED;
> > >  	trace_cdns3_prepare_aligned_request(priv_req);
> > >
> > > @@ -3088,11 +3113,11 @@ static void cdns3_gadget_exit(struct cdns
> > *cdns)
> > >  		struct cdns3_aligned_buf *buf;
> > >
> > >  		buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> > > -		dma_free_coherent(priv_dev->sysdev, buf->size,
> > > -				  buf->buf,
> > > -				  buf->dma);
> > > +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > > +			buf->dir);
> > 
> > It only needs to DMA unmap after DMA has completed, this buf will not be
> > used, otherwise, the kfree below will cause issue.
> 
> This part is not clear.  Aligned DMA buffer is allocated and mapped in cdns3_prepare_aligned_request_buf()
> and put into aligned_buf_list. While unloading the gadget, We need to undo the same if aligned_buf_list is not
> empty.  Am I missing something here? 

My point is this unmap operation is useless since there is no user for
aligned buf, and it calls kfree afterwards. You could also keep it as it has
no harm.

> 
> Also, I will post v2 of this patch which uses dma_*_noncoherent APIs suggested by Christoph Hellwig.
diff mbox series

Patch

diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index 5f51215..b4955ce 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -818,10 +818,26 @@  void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
 	usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
 					priv_ep->dir);
 
-	if ((priv_req->flags & REQUEST_UNALIGNED) &&
-	    priv_ep->dir == USB_DIR_OUT && !request->status)
-		memcpy(request->buf, priv_req->aligned_buf->buf,
-		       request->length);
+	if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
+		struct cdns3_aligned_buf *buf;
+
+		buf = priv_req->aligned_buf;
+		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
+			buf->dir);
+		priv_req->flags &= ~REQUEST_UNALIGNED;
+
+		if (priv_ep->dir == USB_DIR_OUT && !request->status) {
+			memcpy(request->buf, priv_req->aligned_buf->buf,
+			       request->length);
+		}
+
+		trace_cdns3_free_aligned_request(priv_req);
+		priv_req->aligned_buf->in_use = 0;
+		queue_work(system_freezable_wq,
+			   &priv_dev->aligned_buf_wq);
+		priv_req->aligned_buf = NULL;
+
+	}
 
 	priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
 	/* All TRBs have finished, clear the counter */
@@ -883,8 +899,7 @@  static void cdns3_free_aligned_request_buf(struct work_struct *work)
 			 * interrupts.
 			 */
 			spin_unlock_irqrestore(&priv_dev->lock, flags);
-			dma_free_coherent(priv_dev->sysdev, buf->size,
-					  buf->buf, buf->dma);
+			kfree(buf->buf);
 			kfree(buf);
 			spin_lock_irqsave(&priv_dev->lock, flags);
 		}
@@ -910,27 +925,16 @@  static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
 		if (!buf)
 			return -ENOMEM;
 
-		buf->size = priv_req->request.length;
+		buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
+				usb_ep_align(&(priv_ep->endpoint), priv_req->request.length)
+				: priv_req->request.length;
 
-		buf->buf = dma_alloc_coherent(priv_dev->sysdev,
-					      buf->size,
-					      &buf->dma,
-					      GFP_ATOMIC);
+		buf->buf = kmalloc(buf->size, GFP_ATOMIC);
 		if (!buf->buf) {
 			kfree(buf);
 			return -ENOMEM;
 		}
 
-		if (priv_req->aligned_buf) {
-			trace_cdns3_free_aligned_request(priv_req);
-			priv_req->aligned_buf->in_use = 0;
-			queue_work(system_freezable_wq,
-				   &priv_dev->aligned_buf_wq);
-		}
-
-		buf->in_use = 1;
-		priv_req->aligned_buf = buf;
-
 		list_add_tail(&buf->list,
 			      &priv_dev->aligned_buf_list);
 	}
@@ -940,6 +944,27 @@  static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
 		       priv_req->request.length);
 	}
 
+	if (priv_req->aligned_buf) {
+		trace_cdns3_free_aligned_request(priv_req);
+		priv_req->aligned_buf->in_use = 0;
+		queue_work(system_freezable_wq,
+			   &priv_dev->aligned_buf_wq);
+	}
+
+	buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	buf->in_use = 1;
+	priv_req->aligned_buf = buf;
+
+	buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
+				buf->dir);
+
+	if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
+		dev_err(priv_dev->dev, "Failed to map buffer\n");
+		kfree(buf->buf);
+		kfree(buf);
+		return -EFAULT;
+	}
+
 	priv_req->flags |= REQUEST_UNALIGNED;
 	trace_cdns3_prepare_aligned_request(priv_req);
 
@@ -3088,11 +3113,11 @@  static void cdns3_gadget_exit(struct cdns *cdns)
 		struct cdns3_aligned_buf *buf;
 
 		buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
-		dma_free_coherent(priv_dev->sysdev, buf->size,
-				  buf->buf,
-				  buf->dma);
+		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
+			buf->dir);
 
 		list_del(&buf->list);
+		kfree(buf->buf);
 		kfree(buf);
 	}
 
diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h
index ecf9b91..c5660f2 100644
--- a/drivers/usb/cdns3/cdns3-gadget.h
+++ b/drivers/usb/cdns3/cdns3-gadget.h
@@ -12,6 +12,7 @@ 
 #ifndef __LINUX_CDNS3_GADGET
 #define __LINUX_CDNS3_GADGET
 #include <linux/usb/gadget.h>
+#include <linux/dma-direction.h>
 
 /*
  * USBSS-DEV register interface.
@@ -1205,6 +1206,7 @@  struct cdns3_aligned_buf {
 	void			*buf;
 	dma_addr_t		dma;
 	u32			size;
+	enum dma_data_direction dir;
 	unsigned		in_use:1;
 	struct list_head	list;
 };