diff mbox

[v3,1/2] usb: dwc2: alloc dma aligned buffer for isoc split in

Message ID 1525748846-7767-2-git-send-email-william.wu@rock-chips.com (mailing list archive)
State New, archived
Headers show

Commit Message

William Wu May 8, 2018, 3:07 a.m. UTC
The commit 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in
a more supported way") rips out a lot of code to simply the
allocation of aligned DMA. However, it also introduces a new
issue when use isoc split in transfer.

In my test case, I connect the dwc2 controller with an usb hs
Hub (GL852G-12), and plug an usb fs audio device (Plantronics
headset) into the downstream port of Hub. Then use the usb mic
to record, we can find noise when playback.

It's because that the usb Hub uses an MDATA for the first
transaction and a DATA0 for the second transaction for the isoc
split in transaction. An typical isoc split in transaction sequence
like this:

- SSPLIT IN transaction
- CSPLIT IN transaction
  - MDATA packet
- CSPLIT IN transaction
  - DATA0 packet

The DMA address of MDATA (urb->dma) is always DWORD-aligned, but
the DMA address of DATA0 (urb->dma + qtd->isoc_split_offset) may
not be DWORD-aligned, it depends on the qtd->isoc_split_offset (the
length of MDATA). In my test case, the length of MDATA is usually
unaligned, this cause DATA0 packet transmission error.

This patch use kmem_cache to allocate aligned DMA buf for isoc
split in transaction. Note that according to usb 2.0 spec, the
maximum data payload size is 1023 bytes for each fs isoc ep,
and the maximum allowable interrupt data payload size is 64 bytes
or less for fs interrupt ep. So we set the size of object to be
1024 bytes in the kmem cache.

Signed-off-by: William Wu <william.wu@rock-chips.com>
---
Changes in v3:
- Modify the commit message
- Use Kmem_cache to allocate aligned DMA buf
- Fix coding style

Changes in v2:
- None

 drivers/usb/dwc2/core.h      |  3 ++
 drivers/usb/dwc2/hcd.c       | 85 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/usb/dwc2/hcd.h       | 10 ++++++
 drivers/usb/dwc2/hcd_intr.c  |  8 +++++
 drivers/usb/dwc2/hcd_queue.c |  3 ++
 5 files changed, 106 insertions(+), 3 deletions(-)

Comments

Doug Anderson May 8, 2018, 5:11 a.m. UTC | #1
Hi,

On Mon, May 7, 2018 at 8:07 PM, William Wu <william.wu@rock-chips.com> wrote:
> +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,
> +                                           struct dwc2_qh *qh,
> +                                           struct dwc2_host_chan *chan)
> +{
> +       if (!hsotg->unaligned_cache)
> +               return -ENOMEM;
> +
> +       if (!qh->dw_align_buf) {
> +               qh->dw_align_buf = kmem_cache_alloc(hsotg->unaligned_cache,
> +                                                   GFP_ATOMIC | GFP_DMA);
> +               if (!qh->dw_align_buf)
> +                       return -ENOMEM;
> +
> +               qh->dw_align_buf_size = min_t(u32, chan->max_packet,
> +                                             DWC2_KMEM_UNALIGNED_BUF_SIZE);

Rather than using min_t, wouldn't it be better to return -ENOMEM if
"max_packet" > DWC2_KMEM_UNALIGNED_BUF_SIZE?  As it is, you might
allocate less space than you need, right?  That seems like it would be
bad (even though this is probably impossible).


> @@ -2797,6 +2837,32 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
>         /* Set the transfer attributes */
>         dwc2_hc_init_xfer(hsotg, chan, qtd);
>
> +       /* For non-dword aligned buffers */
> +       if (hsotg->params.host_dma && qh->do_split &&
> +           chan->ep_is_in && (chan->xfer_dma & 0x3)) {
> +               dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
> +               if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) {
> +                       dev_err(hsotg->dev,
> +                               "Failed to allocate memory to handle non-aligned buffer\n");
> +                       /* Add channel back to free list */
> +                       chan->align_buf = 0;
> +                       chan->multi_count = 0;
> +                       list_add_tail(&chan->hc_list_entry,
> +                                     &hsotg->free_hc_list);
> +                       qtd->in_process = 0;
> +                       qh->channel = NULL;
> +                       return -ENOMEM;
> +               }
> +       } else {
> +               /*
> +                * We assume that DMA is always aligned in non-split
> +                * case or split out case. Warn if not.
> +                */
> +               WARN_ON_ONCE(hsotg->params.host_dma &&
> +                            (chan->xfer_dma & 0x3));
> +               chan->align_buf = 0;
> +       }
> +
>         if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
>             chan->ep_type == USB_ENDPOINT_XFER_ISOC)
>                 /*
> @@ -5241,6 +5307,17 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>                         hsotg->params.dma_desc_enable = false;
>                         hsotg->params.dma_desc_fs_enable = false;
>                 }
> +       } else if (hsotg->params.host_dma) {

Are you sure this is "else if"?  Can't you have descriptor DMA enabled
in the controller and still need to do a normal DMA transfer if you
plug in a hub?  Seems like this should be just "if".


> +               /*
> +                * Create kmem caches to handle non-aligned buffer
> +                * in Buffer DMA mode.
> +                */
> +               hsotg->unaligned_cache = kmem_cache_create("dwc2-unaligned-dma",
> +                                               DWC2_KMEM_UNALIGNED_BUF_SIZE, 4,

Worth using "DWC2_USB_DMA_ALIGN" rather than 4?


> +                                               SLAB_CACHE_DMA, NULL);
> +               if (!hsotg->unaligned_cache)
> +                       dev_err(hsotg->dev,
> +                               "unable to create dwc2 unaligned cache\n");
>         }
>
>         hsotg->otg_port = 1;
> @@ -5279,6 +5356,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>  error4:
>         kmem_cache_destroy(hsotg->desc_gen_cache);
>         kmem_cache_destroy(hsotg->desc_hsisoc_cache);
> +       kmem_cache_destroy(hsotg->unaligned_cache);

nitty nit: freeing order should be opposite of allocation, so the new
line should be above the other two.
wuliangfeng May 8, 2018, 7:43 a.m. UTC | #2
Dear Doug,

在 2018年05月08日 13:11, Doug Anderson 写道:
> Hi,
>
> On Mon, May 7, 2018 at 8:07 PM, William Wu <william.wu@rock-chips.com> wrote:
>> +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,
>> +                                           struct dwc2_qh *qh,
>> +                                           struct dwc2_host_chan *chan)
>> +{
>> +       if (!hsotg->unaligned_cache)
>> +               return -ENOMEM;
>> +
>> +       if (!qh->dw_align_buf) {
>> +               qh->dw_align_buf = kmem_cache_alloc(hsotg->unaligned_cache,
>> +                                                   GFP_ATOMIC | GFP_DMA);
>> +               if (!qh->dw_align_buf)
>> +                       return -ENOMEM;
>> +
>> +               qh->dw_align_buf_size = min_t(u32, chan->max_packet,
>> +                                             DWC2_KMEM_UNALIGNED_BUF_SIZE);
> Rather than using min_t, wouldn't it be better to return -ENOMEM if
> "max_packet" > DWC2_KMEM_UNALIGNED_BUF_SIZE?  As it is, you might
> allocate less space than you need, right?  That seems like it would be
> bad (even though this is probably impossible).
Yes, good idea! So is it good to fix it like this?

if (!qh->dw_align_buf || chan->max_packet >
     DWC2_KMEM_UNALIGNED_BUF_SIZE)
	return -ENOMEM;	

qh->dw_align_buf_size = chan->max_packet;

>
>> @@ -2797,6 +2837,32 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
>>          /* Set the transfer attributes */
>>          dwc2_hc_init_xfer(hsotg, chan, qtd);
>>
>> +       /* For non-dword aligned buffers */
>> +       if (hsotg->params.host_dma && qh->do_split &&
>> +           chan->ep_is_in && (chan->xfer_dma & 0x3)) {
>> +               dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
>> +               if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) {
>> +                       dev_err(hsotg->dev,
>> +                               "Failed to allocate memory to handle non-aligned buffer\n");
>> +                       /* Add channel back to free list */
>> +                       chan->align_buf = 0;
>> +                       chan->multi_count = 0;
>> +                       list_add_tail(&chan->hc_list_entry,
>> +                                     &hsotg->free_hc_list);
>> +                       qtd->in_process = 0;
>> +                       qh->channel = NULL;
>> +                       return -ENOMEM;
>> +               }
>> +       } else {
>> +               /*
>> +                * We assume that DMA is always aligned in non-split
>> +                * case or split out case. Warn if not.
>> +                */
>> +               WARN_ON_ONCE(hsotg->params.host_dma &&
>> +                            (chan->xfer_dma & 0x3));
>> +               chan->align_buf = 0;
>> +       }
>> +
>>          if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
>>              chan->ep_type == USB_ENDPOINT_XFER_ISOC)
>>                  /*
>> @@ -5241,6 +5307,17 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>                          hsotg->params.dma_desc_enable = false;
>>                          hsotg->params.dma_desc_fs_enable = false;
>>                  }
>> +       } else if (hsotg->params.host_dma) {
> Are you sure this is "else if"?  Can't you have descriptor DMA enabled
> in the controller and still need to do a normal DMA transfer if you
> plug in a hub?  Seems like this should be just "if".
Sorry, I don't understand the case "have descriptor DMA enabled in the 
controller and still need to do a normal DMA transfer". But maybe it 
still has another problem if just use "if" here, because it will create 
kmem caches for Slave mode which actually doesn't need aligned DMA buf.
>
>
>> +               /*
>> +                * Create kmem caches to handle non-aligned buffer
>> +                * in Buffer DMA mode.
>> +                */
>> +               hsotg->unaligned_cache = kmem_cache_create("dwc2-unaligned-dma",
>> +                                               DWC2_KMEM_UNALIGNED_BUF_SIZE, 4,
> Worth using "DWC2_USB_DMA_ALIGN" rather than 4?
>
>
>> +                                               SLAB_CACHE_DMA, NULL);
>> +               if (!hsotg->unaligned_cache)
>> +                       dev_err(hsotg->dev,
>> +                               "unable to create dwc2 unaligned cache\n");
>>          }
>>
>>          hsotg->otg_port = 1;
>> @@ -5279,6 +5356,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>   error4:
>>          kmem_cache_destroy(hsotg->desc_gen_cache);
>>          kmem_cache_destroy(hsotg->desc_hsisoc_cache);
>> +       kmem_cache_destroy(hsotg->unaligned_cache);
> nitty nit: freeing order should be opposite of allocation, so the new
> line should be above the other two.
Ah, I got it. But note that it's impossible to allocate the 
"unaligned_cache" and "desc *cache" at the same time. Should we still 
fix the free order? If yes, maybe the correct free order is:

kmem_cache_destroy(hsotg->unaligned_cache);
kmem_cache_destroy(hsotg->desc_hsisoc_cache);
kmem_cache_destroy(hsotg->desc_gen_cache);

Right?

And should we also need to fix the same free order in the "dwc2_hcd_remove"?
Best regards,
         wulf
>
>
Doug Anderson May 8, 2018, 3:29 p.m. UTC | #3
Hi,

On Tue, May 8, 2018 at 12:43 AM, wlf <wulf@rock-chips.com> wrote:
> Dear Doug,
>
> 在 2018年05月08日 13:11, Doug Anderson 写道:
>>
>> Hi,
>>
>> On Mon, May 7, 2018 at 8:07 PM, William Wu <william.wu@rock-chips.com>
>> wrote:
>>>
>>> +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,
>>> +                                           struct dwc2_qh *qh,
>>> +                                           struct dwc2_host_chan *chan)
>>> +{
>>> +       if (!hsotg->unaligned_cache)
>>> +               return -ENOMEM;
>>> +
>>> +       if (!qh->dw_align_buf) {
>>> +               qh->dw_align_buf =
>>> kmem_cache_alloc(hsotg->unaligned_cache,
>>> +                                                   GFP_ATOMIC |
>>> GFP_DMA);
>>> +               if (!qh->dw_align_buf)
>>> +                       return -ENOMEM;
>>> +
>>> +               qh->dw_align_buf_size = min_t(u32, chan->max_packet,
>>> +
>>> DWC2_KMEM_UNALIGNED_BUF_SIZE);
>>
>> Rather than using min_t, wouldn't it be better to return -ENOMEM if
>> "max_packet" > DWC2_KMEM_UNALIGNED_BUF_SIZE?  As it is, you might
>> allocate less space than you need, right?  That seems like it would be
>> bad (even though this is probably impossible).
>
> Yes, good idea! So is it good to fix it like this?
>
> if (!qh->dw_align_buf || chan->max_packet >
>     DWC2_KMEM_UNALIGNED_BUF_SIZE)
>         return -ENOMEM;
>
> qh->dw_align_buf_size = chan->max_packet;

Won't that orphan memory in the case that the allocation is OK but the
size is wrong?

I would have put it all the way at the top:

if (!hsotg->unaligned_cache ||
    chan->max_packet > DWC2_KMEM_UNALIGNED_BUF_SIZE)
  return -ENOMEM;


It also feels like you should get rid of "qh->dw_align_buf_size".  The
buffer is always DWC2_KMEM_UNALIGNED_BUF_SIZE.

...if you need to keep track of how many bytes you mapped with
dma_map_single() and somehow can't get back to "chan", rename this to
qh->dw_align_buf_mapped_bytes or something.  I haven't followed
through all the code, but I _think_ you'd want to make sure to set
qh->dw_align_buf_mapped_bytes every time through
dwc2_alloc_split_dma_aligned_buf(), even if dw_align_buf was already
allocated.  Specifically, my worry is in the case where you enter
dwc2_alloc_split_dma_aligned_buf() and qh->dw_align_buf is non-NULL.
Could "max_packet" be different in this case compared to what it was
when dw_align_buf was first allocated?



>>> @@ -2797,6 +2837,32 @@ static int dwc2_assign_and_init_hc(struct
>>> dwc2_hsotg *hsotg, struct dwc2_qh *qh)
>>>          /* Set the transfer attributes */
>>>          dwc2_hc_init_xfer(hsotg, chan, qtd);
>>>
>>> +       /* For non-dword aligned buffers */
>>> +       if (hsotg->params.host_dma && qh->do_split &&
>>> +           chan->ep_is_in && (chan->xfer_dma & 0x3)) {
>>> +               dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
>>> +               if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) {
>>> +                       dev_err(hsotg->dev,
>>> +                               "Failed to allocate memory to handle
>>> non-aligned buffer\n");
>>> +                       /* Add channel back to free list */
>>> +                       chan->align_buf = 0;
>>> +                       chan->multi_count = 0;
>>> +                       list_add_tail(&chan->hc_list_entry,
>>> +                                     &hsotg->free_hc_list);
>>> +                       qtd->in_process = 0;
>>> +                       qh->channel = NULL;
>>> +                       return -ENOMEM;
>>> +               }
>>> +       } else {
>>> +               /*
>>> +                * We assume that DMA is always aligned in non-split
>>> +                * case or split out case. Warn if not.
>>> +                */
>>> +               WARN_ON_ONCE(hsotg->params.host_dma &&
>>> +                            (chan->xfer_dma & 0x3));
>>> +               chan->align_buf = 0;
>>> +       }
>>> +
>>>          if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
>>>              chan->ep_type == USB_ENDPOINT_XFER_ISOC)
>>>                  /*
>>> @@ -5241,6 +5307,17 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>>                          hsotg->params.dma_desc_enable = false;
>>>                          hsotg->params.dma_desc_fs_enable = false;
>>>                  }
>>> +       } else if (hsotg->params.host_dma) {
>>
>> Are you sure this is "else if"?  Can't you have descriptor DMA enabled
>> in the controller and still need to do a normal DMA transfer if you
>> plug in a hub?  Seems like this should be just "if".
>
> Sorry, I don't understand the case "have descriptor DMA enabled in the
> controller and still need to do a normal DMA transfer". But maybe it still
> has another problem if just use "if" here, because it will create kmem
> caches for Slave mode which actually doesn't need aligned DMA buf.

So right now your code looks like:

if (hsotg->params.dma_desc_enable ||
    hsotg->params.dma_desc_fs_enable) {
    ...
    ...
} else if (hsotg->params.host_dma) {
   ...
}


I've never played much with "descriptor DMA" on dwc2 because every
time I enabled it (last I tried was years ago) a whole bunch of
peripherals stopped working and I didn't dig (I just left it off).
Thus, I'm no expert on descriptor DMA.  ...that being said, my
understanding is that if you enable descriptor DMA in the controller
then it will enable a smarter DMA mode for some of the transfers.
IIRC Descriptor DMA mode specifically can't handle splits.  Is my
memory correct there?  Presumably that means that when you enable
descriptor DMA in the controller the driver will automatically fall
back to normal Address DMA mode if things get too complicated.  When
it falls back to Address DMA mode, presumably dwc2_hcd_init() won't
get called again.  That means that any data structures that are needed
for Address DMA need to be allocated in dwc2_hcd_init() even if
descriptor DMA is enabled because we might need to fall back to
Address DMA.

Thus, I'm suggesting changing the code to:

if (hsotg->params.dma_desc_enable ||
    hsotg->params.dma_desc_fs_enable) {
    ...
    ...
}

if (hsotg->params.host_dma) {
   ...
}


...as I said, I'm no expert and I could just be confused.  If
something I say seems wrong please correct me.


>>> +                                               SLAB_CACHE_DMA, NULL);
>>> +               if (!hsotg->unaligned_cache)
>>> +                       dev_err(hsotg->dev,
>>> +                               "unable to create dwc2 unaligned
>>> cache\n");
>>>          }
>>>
>>>          hsotg->otg_port = 1;
>>> @@ -5279,6 +5356,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>>   error4:
>>>          kmem_cache_destroy(hsotg->desc_gen_cache);
>>>          kmem_cache_destroy(hsotg->desc_hsisoc_cache);
>>> +       kmem_cache_destroy(hsotg->unaligned_cache);
>>
>> nitty nit: freeing order should be opposite of allocation, so the new
>> line should be above the other two.
>
> Ah, I got it. But note that it's impossible to allocate the
> "unaligned_cache" and "desc *cache" at the same time. Should we still fix
> the free order? If yes, maybe the correct free order is:
>
> kmem_cache_destroy(hsotg->unaligned_cache);
> kmem_cache_destroy(hsotg->desc_hsisoc_cache);
> kmem_cache_destroy(hsotg->desc_gen_cache);
>
> Right?
>
> And should we also need to fix the same free order in the "dwc2_hcd_remove"?

Right.  Yes, it totally doesn't matter which is why I tagged it with
"nitty nit" (or, another way of saying it is: I'm just being totally
obsessive here).  It's just that, for me, I always expect frees to be
in the opposite order of allocations so it makes it easier for me to
parse the code if this is consistent.


-Doug
wuliangfeng May 9, 2018, 8:55 a.m. UTC | #4
Dear Doug,

在 2018年05月08日 23:29, Doug Anderson 写道:
> Hi,
>
> On Tue, May 8, 2018 at 12:43 AM, wlf <wulf@rock-chips.com> wrote:
>> Dear Doug,
>>
>> 在 2018年05月08日 13:11, Doug Anderson 写道:
>>> Hi,
>>>
>>> On Mon, May 7, 2018 at 8:07 PM, William Wu <william.wu@rock-chips.com>
>>> wrote:
>>>> +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,
>>>> +                                           struct dwc2_qh *qh,
>>>> +                                           struct dwc2_host_chan *chan)
>>>> +{
>>>> +       if (!hsotg->unaligned_cache)
>>>> +               return -ENOMEM;
>>>> +
>>>> +       if (!qh->dw_align_buf) {
>>>> +               qh->dw_align_buf =
>>>> kmem_cache_alloc(hsotg->unaligned_cache,
>>>> +                                                   GFP_ATOMIC |
>>>> GFP_DMA);
>>>> +               if (!qh->dw_align_buf)
>>>> +                       return -ENOMEM;
>>>> +
>>>> +               qh->dw_align_buf_size = min_t(u32, chan->max_packet,
>>>> +
>>>> DWC2_KMEM_UNALIGNED_BUF_SIZE);
>>> Rather than using min_t, wouldn't it be better to return -ENOMEM if
>>> "max_packet" > DWC2_KMEM_UNALIGNED_BUF_SIZE?  As it is, you might
>>> allocate less space than you need, right?  That seems like it would be
>>> bad (even though this is probably impossible).
>> Yes, good idea! So is it good to fix it like this?
>>
>> if (!qh->dw_align_buf || chan->max_packet >
>>      DWC2_KMEM_UNALIGNED_BUF_SIZE)
>>          return -ENOMEM;
>>
>> qh->dw_align_buf_size = chan->max_packet;
> Won't that orphan memory in the case that the allocation is OK but the
> size is wrong?
>
> I would have put it all the way at the top:
>
> if (!hsotg->unaligned_cache ||
>      chan->max_packet > DWC2_KMEM_UNALIGNED_BUF_SIZE)
>    return -ENOMEM;
Ah, yes, you're right! Thank you for correcting me.
>
> It also feels like you should get rid of "qh->dw_align_buf_size".  The
> buffer is always DWC2_KMEM_UNALIGNED_BUF_SIZE.
>
> ...if you need to keep track of how many bytes you mapped with
> dma_map_single() and somehow can't get back to "chan", rename this to
> qh->dw_align_buf_mapped_bytes or something.  I haven't followed
> through all the code, but I _think_ you'd want to make sure to set
> qh->dw_align_buf_mapped_bytes every time through
> dwc2_alloc_split_dma_aligned_buf(), even if dw_align_buf was already
> allocated.  Specifically, my worry is in the case where you enter
> dwc2_alloc_split_dma_aligned_buf() and qh->dw_align_buf is non-NULL.
> Could "max_packet" be different in this case compared to what it was
> when dw_align_buf was first allocated?
Sorry to make you feel confused. It's really not suitable to use 
"qh->dw_align_buf_size" for the dma mapped size. And I think the 
"max_packet" should be  always be the same. However, just in case, maybe 
I can get rid of "qh->dw_align_buf_size" and just use 
DWC2_KMEM_UNALIGNED_BUF_SIZE to do dma_map_single().
>
>
>>>> @@ -2797,6 +2837,32 @@ static int dwc2_assign_and_init_hc(struct
>>>> dwc2_hsotg *hsotg, struct dwc2_qh *qh)
>>>>           /* Set the transfer attributes */
>>>>           dwc2_hc_init_xfer(hsotg, chan, qtd);
>>>>
>>>> +       /* For non-dword aligned buffers */
>>>> +       if (hsotg->params.host_dma && qh->do_split &&
>>>> +           chan->ep_is_in && (chan->xfer_dma & 0x3)) {
>>>> +               dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
>>>> +               if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) {
>>>> +                       dev_err(hsotg->dev,
>>>> +                               "Failed to allocate memory to handle
>>>> non-aligned buffer\n");
>>>> +                       /* Add channel back to free list */
>>>> +                       chan->align_buf = 0;
>>>> +                       chan->multi_count = 0;
>>>> +                       list_add_tail(&chan->hc_list_entry,
>>>> +                                     &hsotg->free_hc_list);
>>>> +                       qtd->in_process = 0;
>>>> +                       qh->channel = NULL;
>>>> +                       return -ENOMEM;
>>>> +               }
>>>> +       } else {
>>>> +               /*
>>>> +                * We assume that DMA is always aligned in non-split
>>>> +                * case or split out case. Warn if not.
>>>> +                */
>>>> +               WARN_ON_ONCE(hsotg->params.host_dma &&
>>>> +                            (chan->xfer_dma & 0x3));
>>>> +               chan->align_buf = 0;
>>>> +       }
>>>> +
>>>>           if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
>>>>               chan->ep_type == USB_ENDPOINT_XFER_ISOC)
>>>>                   /*
>>>> @@ -5241,6 +5307,17 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>>>                           hsotg->params.dma_desc_enable = false;
>>>>                           hsotg->params.dma_desc_fs_enable = false;
>>>>                   }
>>>> +       } else if (hsotg->params.host_dma) {
>>> Are you sure this is "else if"?  Can't you have descriptor DMA enabled
>>> in the controller and still need to do a normal DMA transfer if you
>>> plug in a hub?  Seems like this should be just "if".
>> Sorry, I don't understand the case "have descriptor DMA enabled in the
>> controller and still need to do a normal DMA transfer". But maybe it still
>> has another problem if just use "if" here, because it will create kmem
>> caches for Slave mode which actually doesn't need aligned DMA buf.
> So right now your code looks like:
>
> if (hsotg->params.dma_desc_enable ||
>      hsotg->params.dma_desc_fs_enable) {
>      ...
>      ...
> } else if (hsotg->params.host_dma) {
>     ...
> }
>
>
> I've never played much with "descriptor DMA" on dwc2 because every
> time I enabled it (last I tried was years ago) a whole bunch of
> peripherals stopped working and I didn't dig (I just left it off).
> Thus, I'm no expert on descriptor DMA.  ...that being said, my
> understanding is that if you enable descriptor DMA in the controller
> then it will enable a smarter DMA mode for some of the transfers.
> IIRC Descriptor DMA mode specifically can't handle splits.  Is my
> memory correct there?  Presumably that means that when you enable
> descriptor DMA in the controller the driver will automatically fall
> back to normal Address DMA mode if things get too complicated.  When
> it falls back to Address DMA mode, presumably dwc2_hcd_init() won't
> get called again.  That means that any data structures that are needed
> for Address DMA need to be allocated in dwc2_hcd_init() even if
> descriptor DMA is enabled because we might need to fall back to
> Address DMA.
>
> Thus, I'm suggesting changing the code to:
>
> if (hsotg->params.dma_desc_enable ||
>      hsotg->params.dma_desc_fs_enable) {
>      ...
>      ...
> }
>
> if (hsotg->params.host_dma) {
>     ...
> }
>
>
> ...as I said, I'm no expert and I could just be confused.  If
> something I say seems wrong please correct me.
Ah, I got it. Thanks for your detailed explanation. Although I don't 
know if it's possible that dwc2 driver automatically fall back to normal 
Address DMA mode when desc DMA work abnormally with split, I agree with 
your suggestion. I'll fix it in V4 patch.
>
>>>> +                                               SLAB_CACHE_DMA, NULL);
>>>> +               if (!hsotg->unaligned_cache)
>>>> +                       dev_err(hsotg->dev,
>>>> +                               "unable to create dwc2 unaligned
>>>> cache\n");
>>>>           }
>>>>
>>>>           hsotg->otg_port = 1;
>>>> @@ -5279,6 +5356,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
>>>>    error4:
>>>>           kmem_cache_destroy(hsotg->desc_gen_cache);
>>>>           kmem_cache_destroy(hsotg->desc_hsisoc_cache);
>>>> +       kmem_cache_destroy(hsotg->unaligned_cache);
>>> nitty nit: freeing order should be opposite of allocation, so the new
>>> line should be above the other two.
>> Ah, I got it. But note that it's impossible to allocate the
>> "unaligned_cache" and "desc *cache" at the same time. Should we still fix
>> the free order? If yes, maybe the correct free order is:
>>
>> kmem_cache_destroy(hsotg->unaligned_cache);
>> kmem_cache_destroy(hsotg->desc_hsisoc_cache);
>> kmem_cache_destroy(hsotg->desc_gen_cache);
>>
>> Right?
>>
>> And should we also need to fix the same free order in the "dwc2_hcd_remove"?
> Right.  Yes, it totally doesn't matter which is why I tagged it with
> "nitty nit" (or, another way of saying it is: I'm just being totally
> obsessive here).  It's just that, for me, I always expect frees to be
> in the opposite order of allocations so it makes it easier for me to
> parse the code if this is consistent.
It seems like a good idea to me. I'll fix it.
>
Best regards,
         wulf
>
>
>
Doug Anderson May 10, 2018, 8:59 p.m. UTC | #5
Hi,

On Wed, May 9, 2018 at 1:55 AM, wlf <wulf@rock-chips.com> wrote:
>>>>> +       } else if (hsotg->params.host_dma) {
>>>>
>>>> Are you sure this is "else if"?  Can't you have descriptor DMA enabled
>>>> in the controller and still need to do a normal DMA transfer if you
>>>> plug in a hub?  Seems like this should be just "if".
>>>
>>> Sorry, I don't understand the case "have descriptor DMA enabled in the
>>> controller and still need to do a normal DMA transfer". But maybe it
>>> still
>>> has another problem if just use "if" here, because it will create kmem
>>> caches for Slave mode which actually doesn't need aligned DMA buf.
>>
>> So right now your code looks like:
>>
>> if (hsotg->params.dma_desc_enable ||
>>      hsotg->params.dma_desc_fs_enable) {
>>      ...
>>      ...
>> } else if (hsotg->params.host_dma) {
>>     ...
>> }
>>
>>
>> I've never played much with "descriptor DMA" on dwc2 because every
>> time I enabled it (last I tried was years ago) a whole bunch of
>> peripherals stopped working and I didn't dig (I just left it off).
>> Thus, I'm no expert on descriptor DMA.  ...that being said, my
>> understanding is that if you enable descriptor DMA in the controller
>> then it will enable a smarter DMA mode for some of the transfers.
>> IIRC Descriptor DMA mode specifically can't handle splits.  Is my
>> memory correct there?  Presumably that means that when you enable
>> descriptor DMA in the controller the driver will automatically fall
>> back to normal Address DMA mode if things get too complicated.  When
>> it falls back to Address DMA mode, presumably dwc2_hcd_init() won't
>> get called again.  That means that any data structures that are needed
>> for Address DMA need to be allocated in dwc2_hcd_init() even if
>> descriptor DMA is enabled because we might need to fall back to
>> Address DMA.
>>
>> Thus, I'm suggesting changing the code to:
>>
>> if (hsotg->params.dma_desc_enable ||
>>      hsotg->params.dma_desc_fs_enable) {
>>      ...
>>      ...
>> }
>>
>> if (hsotg->params.host_dma) {
>>     ...
>> }
>>
>>
>> ...as I said, I'm no expert and I could just be confused.  If
>> something I say seems wrong please correct me.
>
> Ah, I got it. Thanks for your detailed explanation. Although I don't know if
> it's possible that dwc2 driver automatically fall back to normal Address DMA
> mode when desc DMA work abnormally with split, I agree with your suggestion.
> I'll fix it in V4 patch.

Hmm, I guess you're right.  I did a quick search and I can't find any
evidence of a fallback like this.  Maybe I dreamed that.  I found some
old comment in the commit history that said:

/*
 * Disable descriptor dma mode by default as the HW can support
 * it, but does not support it for SPLIT transactions.
 * Disable it for FS devices as well.
 */

...and it looks there's currently nobody using descriptor DMA anyway
(assuming I read the code correctly).  It does seem like people were
ever going to turn it on then it would have to be dynamic as I was
dreaming it was...

-Doug
wuliangfeng May 11, 2018, 9:26 a.m. UTC | #6
Dear Doug,


在 2018年05月11日 04:59, Doug Anderson 写道:
> Hi,
>
> On Wed, May 9, 2018 at 1:55 AM, wlf <wulf@rock-chips.com> wrote:
>>>>>> +       } else if (hsotg->params.host_dma) {
>>>>> Are you sure this is "else if"?  Can't you have descriptor DMA enabled
>>>>> in the controller and still need to do a normal DMA transfer if you
>>>>> plug in a hub?  Seems like this should be just "if".
>>>> Sorry, I don't understand the case "have descriptor DMA enabled in the
>>>> controller and still need to do a normal DMA transfer". But maybe it
>>>> still
>>>> has another problem if just use "if" here, because it will create kmem
>>>> caches for Slave mode which actually doesn't need aligned DMA buf.
>>> So right now your code looks like:
>>>
>>> if (hsotg->params.dma_desc_enable ||
>>>       hsotg->params.dma_desc_fs_enable) {
>>>       ...
>>>       ...
>>> } else if (hsotg->params.host_dma) {
>>>      ...
>>> }
>>>
>>>
>>> I've never played much with "descriptor DMA" on dwc2 because every
>>> time I enabled it (last I tried was years ago) a whole bunch of
>>> peripherals stopped working and I didn't dig (I just left it off).
>>> Thus, I'm no expert on descriptor DMA.  ...that being said, my
>>> understanding is that if you enable descriptor DMA in the controller
>>> then it will enable a smarter DMA mode for some of the transfers.
>>> IIRC Descriptor DMA mode specifically can't handle splits.  Is my
>>> memory correct there?  Presumably that means that when you enable
>>> descriptor DMA in the controller the driver will automatically fall
>>> back to normal Address DMA mode if things get too complicated.  When
>>> it falls back to Address DMA mode, presumably dwc2_hcd_init() won't
>>> get called again.  That means that any data structures that are needed
>>> for Address DMA need to be allocated in dwc2_hcd_init() even if
>>> descriptor DMA is enabled because we might need to fall back to
>>> Address DMA.
>>>
>>> Thus, I'm suggesting changing the code to:
>>>
>>> if (hsotg->params.dma_desc_enable ||
>>>       hsotg->params.dma_desc_fs_enable) {
>>>       ...
>>>       ...
>>> }
>>>
>>> if (hsotg->params.host_dma) {
>>>      ...
>>> }
>>>
>>>
>>> ...as I said, I'm no expert and I could just be confused.  If
>>> something I say seems wrong please correct me.
>> Ah, I got it. Thanks for your detailed explanation. Although I don't know if
>> it's possible that dwc2 driver automatically fall back to normal Address DMA
>> mode when desc DMA work abnormally with split, I agree with your suggestion.
>> I'll fix it in V4 patch.
> Hmm, I guess you're right.  I did a quick search and I can't find any
> evidence of a fallback like this.  Maybe I dreamed that.  I found some
> old comment in the commit history that said:
I think you're right. I find that it's possible to fall back to Address 
DMA mode if desc DMA initialization failure. The error case is:in 
dwc2_hcd_init(),  if it fails to create dwc2 generic desc cache or dwc2 
hs isoc desc cache, then it will disable desc DMA and fall back to 
Address DMA mode.
>
> /*
>   * Disable descriptor dma mode by default as the HW can support
>   * it, but does not support it for SPLIT transactions.
>   * Disable it for FS devices as well.
>   */
>
> ...and it looks there's currently nobody using descriptor DMA anyway
> (assuming I read the code correctly).  It does seem like people were
> ever going to turn it on then it would have to be dynamic as I was
> dreaming it was...
Yes, the dwc2 desc DMA mode can't support split transaction. So few 
people use desc DMA mode for OTG Host mode. BTW, I find that the latest 
code enable desc DMA mode by default for the OTG peripheral mode if the 
dwc2 hardware support desc DMA.
>
diff mbox

Patch

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index d83be56..c1983f8 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -915,6 +915,7 @@  struct dwc2_hregs_backup {
  * @frame_list_sz:      Frame list size
  * @desc_gen_cache:     Kmem cache for generic descriptors
  * @desc_hsisoc_cache:  Kmem cache for hs isochronous descriptors
+ * @unaligned_cache:    Kmem cache for DMA mode to handle non-aligned buf
  *
  * These are for peripheral mode:
  *
@@ -1059,6 +1060,8 @@  struct dwc2_hsotg {
 	u32 frame_list_sz;
 	struct kmem_cache *desc_gen_cache;
 	struct kmem_cache *desc_hsisoc_cache;
+	struct kmem_cache *unaligned_cache;
+#define DWC2_KMEM_UNALIGNED_BUF_SIZE 1024
 
 #endif /* CONFIG_USB_DWC2_HOST || CONFIG_USB_DWC2_DUAL_ROLE */
 
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 190f959..6f22dee 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -1562,11 +1562,20 @@  static void dwc2_hc_start_transfer(struct dwc2_hsotg *hsotg,
 	}
 
 	if (hsotg->params.host_dma) {
-		dwc2_writel((u32)chan->xfer_dma,
-			    hsotg->regs + HCDMA(chan->hc_num));
+		dma_addr_t dma_addr;
+
+		if (chan->align_buf) {
+			if (dbg_hc(chan))
+				dev_vdbg(hsotg->dev, "align_buf\n");
+			dma_addr = chan->align_buf;
+		} else {
+			dma_addr = chan->xfer_dma;
+		}
+		dwc2_writel((u32)dma_addr, hsotg->regs + HCDMA(chan->hc_num));
+
 		if (dbg_hc(chan))
 			dev_vdbg(hsotg->dev, "Wrote %08lx to HCDMA(%d)\n",
-				 (unsigned long)chan->xfer_dma, chan->hc_num);
+				 (unsigned long)dma_addr, chan->hc_num);
 	}
 
 	/* Start the split */
@@ -2620,6 +2629,37 @@  static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg,
 	}
 }
 
+static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,
+					    struct dwc2_qh *qh,
+					    struct dwc2_host_chan *chan)
+{
+	if (!hsotg->unaligned_cache)
+		return -ENOMEM;
+
+	if (!qh->dw_align_buf) {
+		qh->dw_align_buf = kmem_cache_alloc(hsotg->unaligned_cache,
+						    GFP_ATOMIC | GFP_DMA);
+		if (!qh->dw_align_buf)
+			return -ENOMEM;
+
+		qh->dw_align_buf_size = min_t(u32, chan->max_packet,
+					      DWC2_KMEM_UNALIGNED_BUF_SIZE);
+	}
+
+	qh->dw_align_buf_dma = dma_map_single(hsotg->dev, qh->dw_align_buf,
+					      qh->dw_align_buf_size,
+					      DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(hsotg->dev, qh->dw_align_buf_dma)) {
+		dev_err(hsotg->dev, "can't map align_buf\n");
+		chan->align_buf = 0;
+		return -EINVAL;
+	}
+
+	chan->align_buf = qh->dw_align_buf_dma;
+	return 0;
+}
+
 #define DWC2_USB_DMA_ALIGN 4
 
 struct dma_aligned_buffer {
@@ -2797,6 +2837,32 @@  static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 	/* Set the transfer attributes */
 	dwc2_hc_init_xfer(hsotg, chan, qtd);
 
+	/* For non-dword aligned buffers */
+	if (hsotg->params.host_dma && qh->do_split &&
+	    chan->ep_is_in && (chan->xfer_dma & 0x3)) {
+		dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
+		if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) {
+			dev_err(hsotg->dev,
+				"Failed to allocate memory to handle non-aligned buffer\n");
+			/* Add channel back to free list */
+			chan->align_buf = 0;
+			chan->multi_count = 0;
+			list_add_tail(&chan->hc_list_entry,
+				      &hsotg->free_hc_list);
+			qtd->in_process = 0;
+			qh->channel = NULL;
+			return -ENOMEM;
+		}
+	} else {
+		/*
+		 * We assume that DMA is always aligned in non-split
+		 * case or split out case. Warn if not.
+		 */
+		WARN_ON_ONCE(hsotg->params.host_dma &&
+			     (chan->xfer_dma & 0x3));
+		chan->align_buf = 0;
+	}
+
 	if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
 	    chan->ep_type == USB_ENDPOINT_XFER_ISOC)
 		/*
@@ -5241,6 +5307,17 @@  int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
 			hsotg->params.dma_desc_enable = false;
 			hsotg->params.dma_desc_fs_enable = false;
 		}
+	} else if (hsotg->params.host_dma) {
+		/*
+		 * Create kmem caches to handle non-aligned buffer
+		 * in Buffer DMA mode.
+		 */
+		hsotg->unaligned_cache = kmem_cache_create("dwc2-unaligned-dma",
+						DWC2_KMEM_UNALIGNED_BUF_SIZE, 4,
+						SLAB_CACHE_DMA, NULL);
+		if (!hsotg->unaligned_cache)
+			dev_err(hsotg->dev,
+				"unable to create dwc2 unaligned cache\n");
 	}
 
 	hsotg->otg_port = 1;
@@ -5279,6 +5356,7 @@  int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
 error4:
 	kmem_cache_destroy(hsotg->desc_gen_cache);
 	kmem_cache_destroy(hsotg->desc_hsisoc_cache);
+	kmem_cache_destroy(hsotg->unaligned_cache);
 error3:
 	dwc2_hcd_release(hsotg);
 error2:
@@ -5321,6 +5399,7 @@  void dwc2_hcd_remove(struct dwc2_hsotg *hsotg)
 
 	kmem_cache_destroy(hsotg->desc_gen_cache);
 	kmem_cache_destroy(hsotg->desc_hsisoc_cache);
+	kmem_cache_destroy(hsotg->unaligned_cache);
 
 	dwc2_hcd_release(hsotg);
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h
index 96a9da5..854cef5 100644
--- a/drivers/usb/dwc2/hcd.h
+++ b/drivers/usb/dwc2/hcd.h
@@ -76,6 +76,8 @@  struct dwc2_qh;
  *                      (micro)frame
  * @xfer_buf:           Pointer to current transfer buffer position
  * @xfer_dma:           DMA address of xfer_buf
+ * @align_buf:          In Buffer DMA mode this will be used if xfer_buf is not
+ *                      DWORD aligned
  * @xfer_len:           Total number of bytes to transfer
  * @xfer_count:         Number of bytes transferred so far
  * @start_pkt_count:    Packet count at start of transfer
@@ -133,6 +135,7 @@  struct dwc2_host_chan {
 
 	u8 *xfer_buf;
 	dma_addr_t xfer_dma;
+	dma_addr_t align_buf;
 	u32 xfer_len;
 	u32 xfer_count;
 	u16 start_pkt_count;
@@ -303,6 +306,10 @@  struct dwc2_hs_transfer_time {
  *                           is tightly packed.
  * @ls_duration_us:     Duration on the low speed bus schedule.
  * @ntd:                Actual number of transfer descriptors in a list
+ * @dw_align_buf:       Used instead of original buffer if its physical address
+ *                      is not dword-aligned
+ * @dw_align_buf_size:  Size of dw_align_buf_dma
+ * @dw_align_buf_dma:   DMA address for dw_align_buf
  * @qtd_list:           List of QTDs for this QH
  * @channel:            Host channel currently processing transfers for this QH
  * @qh_list_entry:      Entry for QH in either the periodic or non-periodic
@@ -350,6 +357,9 @@  struct dwc2_qh {
 	struct dwc2_hs_transfer_time hs_transfers[DWC2_HS_SCHEDULE_UFRAMES];
 	u32 ls_start_schedule_slice;
 	u16 ntd;
+	u8 *dw_align_buf;
+	int dw_align_buf_size;
+	dma_addr_t dw_align_buf_dma;
 	struct list_head qtd_list;
 	struct dwc2_host_chan *channel;
 	struct list_head qh_list_entry;
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index a5dfd9d..ba6fd852 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -938,6 +938,14 @@  static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg,
 
 	frame_desc->actual_length += len;
 
+	if (chan->align_buf) {
+		dev_vdbg(hsotg->dev, "non-aligned buffer\n");
+		dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
+				 chan->qh->dw_align_buf_size, DMA_FROM_DEVICE);
+		memcpy(qtd->urb->buf + (chan->xfer_dma - qtd->urb->dma),
+		       chan->qh->dw_align_buf, len);
+	}
+
 	qtd->isoc_split_offset += len;
 
 	hctsiz = dwc2_readl(hsotg->regs + HCTSIZ(chnum));
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index e34ad5e..56464cbd 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -1695,6 +1695,9 @@  void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 
 	if (qh->desc_list)
 		dwc2_hcd_qh_free_ddma(hsotg, qh);
+	else if (hsotg->unaligned_cache && qh->dw_align_buf)
+		kmem_cache_free(hsotg->unaligned_cache, qh->dw_align_buf);
+
 	kfree(qh);
 }