diff mbox series

[RFC,v2,4/4] usb: xhci: Use temporary buffer to consolidate SG

Message ID 969b5c9f31807635785ecc74b2ae2559ddc3bbeb.1587461220.git.joglekar@synopsys.com (mailing list archive)
State Superseded
Headers show
Series Add logic to consolidate TRBs for Synopsys xHC | expand

Commit Message

Tejas Joglekar April 21, 2020, 9:49 a.m. UTC
The Synopsys xHC has an internal TRB cache of size TRB_CACHE_SIZE for
each endpoint. The default value for TRB_CACHE_SIZE is 16 for SS and 8
for HS. The controller loads and updates the TRB cache from the transfer
ring in system memory whenever the driver issues a start transfer or
update transfer command.

For chained TRBs, the Synopsys xHC requires that the total amount of
bytes for all TRBs loaded in the TRB cache be greater than or equal to 1
MPS. Or the chain ends within the TRB cache (with a last TRB).

If this requirement is not met, the controller will not be able to send
or receive a packet and it will hang causing a driver timeout and error.

This can be a problem if a class driver queues SG requests with many
small-buffer entries. The XHCI driver will create a chained TRB for each
entry which may trigger this issue.

This patch adds logic to the XHCI driver to detect and prevent this from
happening.

For every (TRB_CACHE_SIZE - 2), we check the total buffer size of
the SG list and if the last window of (TRB_CACHE_SIZE - 2) SG list length
and we don't make up at least 1 MPS, we create a temporary buffer to
consolidate full SG list into the buffer.

We check at (TRB_CACHE_SIZE - 2) window because it is possible that there
would be a link and/or event data TRB that take up to 2 of the cache
entries.

We discovered this issue with devices on other platforms but have not
yet come across any device that triggers this on Linux. But it could be
a real problem now or in the future. All it takes is N number of small
chained TRBs. And other instances of the Synopsys IP may have smaller
values for the TRB_CACHE_SIZE which would exacerbate the problem.

Signed-off-by: Tejas Joglekar <joglekar@synopsys.com>
---
 Changes in v2:
 - Removed redundant debug messages
 - Modified logic to remove unnecessary changes in hcd.c
 - Rename the quirk

 drivers/usb/host/xhci-ring.c |   2 +-
 drivers/usb/host/xhci.c      | 125 +++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/xhci.h      |   4 ++
 3 files changed, 130 insertions(+), 1 deletion(-)

Comments

Tejas Joglekar May 18, 2020, 8:38 a.m. UTC | #1
Hi Mathias,
 Can you please review and comment?
On 4/21/2020 3:19 PM, Tejas Joglekar wrote:
> The Synopsys xHC has an internal TRB cache of size TRB_CACHE_SIZE for
> each endpoint. The default value for TRB_CACHE_SIZE is 16 for SS and 8
> for HS. The controller loads and updates the TRB cache from the transfer
> ring in system memory whenever the driver issues a start transfer or
> update transfer command.
> 
> For chained TRBs, the Synopsys xHC requires that the total amount of
> bytes for all TRBs loaded in the TRB cache be greater than or equal to 1
> MPS. Or the chain ends within the TRB cache (with a last TRB).
> 
> If this requirement is not met, the controller will not be able to send
> or receive a packet and it will hang causing a driver timeout and error.
> 
> This can be a problem if a class driver queues SG requests with many
> small-buffer entries. The XHCI driver will create a chained TRB for each
> entry which may trigger this issue.
> 
> This patch adds logic to the XHCI driver to detect and prevent this from
> happening.
> 
> For every (TRB_CACHE_SIZE - 2), we check the total buffer size of
> the SG list and if the last window of (TRB_CACHE_SIZE - 2) SG list length
> and we don't make up at least 1 MPS, we create a temporary buffer to
> consolidate full SG list into the buffer.
> 
> We check at (TRB_CACHE_SIZE - 2) window because it is possible that there
> would be a link and/or event data TRB that take up to 2 of the cache
> entries.
> 
> We discovered this issue with devices on other platforms but have not
> yet come across any device that triggers this on Linux. But it could be
> a real problem now or in the future. All it takes is N number of small
> chained TRBs. And other instances of the Synopsys IP may have smaller
> values for the TRB_CACHE_SIZE which would exacerbate the problem.
> 
> Signed-off-by: Tejas Joglekar <joglekar@synopsys.com>
> ---
>  Changes in v2:
>  - Removed redundant debug messages
>  - Modified logic to remove unnecessary changes in hcd.c
>  - Rename the quirk
> 
>  drivers/usb/host/xhci-ring.c |   2 +-
>  drivers/usb/host/xhci.c      | 125 +++++++++++++++++++++++++++++++++++++++++++
>  drivers/usb/host/xhci.h      |   4 ++
>  3 files changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index a78787bb5133..2fad9474912a 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -3291,7 +3291,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	full_len = urb->transfer_buffer_length;
>  	/* If we have scatter/gather list, we use it. */
> -	if (urb->num_sgs) {
> +	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
>  		num_sgs = urb->num_mapped_sgs;
>  		sg = urb->sg;
>  		addr = (u64) sg_dma_address(sg);
> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> index fe38275363e0..15f06bc6b1ad 100644
> --- a/drivers/usb/host/xhci.c
> +++ b/drivers/usb/host/xhci.c
> @@ -1256,6 +1256,106 @@ EXPORT_SYMBOL_GPL(xhci_resume);
>  
>  /*-------------------------------------------------------------------------*/
>  
> +static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
> +{
> +	void *temp;
> +	int ret = 0;
> +	unsigned int len;
> +	unsigned int buf_len;
> +	enum dma_data_direction dir;
> +	struct xhci_hcd *xhci;
> +
> +	xhci = hcd_to_xhci(hcd);
> +	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +	buf_len = urb->transfer_buffer_length;
> +
> +	temp = kzalloc_node(buf_len, GFP_ATOMIC,
> +			    dev_to_node(hcd->self.sysdev));
> +
> +	if (usb_urb_dir_out(urb))
> +		len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
> +					 temp, buf_len, 0);
> +
> +	urb->transfer_buffer = temp;
> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
> +					   urb->transfer_buffer,
> +					   urb->transfer_buffer_length,
> +					   dir);
> +
> +	if (dma_mapping_error(hcd->self.sysdev,
> +			      urb->transfer_dma)) {
> +		ret = -EAGAIN;
> +		kfree(temp);
> +	} else {
> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
> +	}
> +
> +	return ret;
> +}
> +
> +static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
> +					  struct urb *urb)
> +{
> +	bool ret = false;
> +	unsigned int i;
> +	unsigned int len = 0;
> +	unsigned int buf_len;
> +	unsigned int trb_size;
> +	unsigned int max_pkt;
> +	struct scatterlist *sg;
> +	struct scatterlist *tail_sg;
> +
> +	sg = urb->sg;
> +	tail_sg = urb->sg;
> +	buf_len = urb->transfer_buffer_length;
> +	max_pkt = usb_endpoint_maxp(&urb->ep->desc);
> +
> +	if (!urb->num_sgs)
> +		return ret;
> +
> +	if (urb->dev->speed >= USB_SPEED_SUPER)
> +		trb_size = TRB_CACHE_SIZE_SS;
> +	else
> +		trb_size = TRB_CACHE_SIZE_HS;
> +
> +	if (urb->transfer_buffer_length != 0 &&
> +	    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> +		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
> +			len = len + sg->length;
> +			if (i > trb_size - 2) {
> +				len = len - tail_sg->length;
> +				if (len < max_pkt) {
> +					ret = true;
> +					break;
> +				}
> +
> +				tail_sg = sg_next(tail_sg);
> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void xhci_unmap_temp_buf(struct urb *urb)
> +{
> +	struct scatterlist *sg;
> +	unsigned int len;
> +	unsigned int buf_len;
> +
> +	sg = urb->sg;
> +	buf_len = urb->transfer_buffer_length;
> +
> +	if (usb_urb_dir_in(urb)) {
> +		len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
> +					   urb->transfer_buffer,
> +					   buf_len,
> +					   0);
> +	}
> +
> +	kfree(urb->transfer_buffer);
> +	urb->transfer_buffer = NULL;
> +}
> +
>  /*
>   * Bypass the DMA mapping if URB is suitable for Immediate Transfer (IDT),
>   * we'll copy the actual data into the TRB address register. This is limited to
> @@ -1265,12 +1365,36 @@ EXPORT_SYMBOL_GPL(xhci_resume);
>  static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
>  				gfp_t mem_flags)
>  {
> +	struct xhci_hcd *xhci;
> +
> +	xhci = hcd_to_xhci(hcd);
> +
>  	if (xhci_urb_suitable_for_idt(urb))
>  		return 0;
>  
> +	if (xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) {
> +		if (xhci_urb_temp_buffer_required(hcd, urb))
> +			return xhci_map_temp_buffer(hcd, urb);
> +	}
>  	return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
>  }
>  
> +static void xhci_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
> +{
> +	struct xhci_hcd *xhci;
> +	bool unmap_temp_buf = false;
> +
> +	xhci = hcd_to_xhci(hcd);
> +
> +	if (urb->num_sgs && (urb->transfer_flags & URB_DMA_MAP_SINGLE))
> +		unmap_temp_buf = true;
> +
> +	usb_hcd_unmap_urb_for_dma(hcd, urb);
> +
> +	if ((xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) && unmap_temp_buf)
> +		xhci_unmap_temp_buf(urb);
> +}
> +
>  /**
>   * xhci_get_endpoint_index - Used for passing endpoint bitmasks between the core and
>   * HCDs.  Find the index for an endpoint given its descriptor.  Use the return
> @@ -5315,6 +5439,7 @@ static const struct hc_driver xhci_hc_driver = {
>  	 * managing i/o requests and associated device resources
>  	 */
>  	.map_urb_for_dma =      xhci_map_urb_for_dma,
> +	.unmap_urb_for_dma =    xhci_unmap_urb_for_dma,
>  	.urb_enqueue =		xhci_urb_enqueue,
>  	.urb_dequeue =		xhci_urb_dequeue,
>  	.alloc_dev =		xhci_alloc_dev,
> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
> index 4db825459b40..14600214188f 100644
> --- a/drivers/usb/host/xhci.h
> +++ b/drivers/usb/host/xhci.h
> @@ -1330,6 +1330,10 @@ enum xhci_setup_dev {
>  #define TRB_SIA			(1<<31)
>  #define TRB_FRAME_ID(p)		(((p) & 0x7ff) << 20)
>  
> +/* TRB cache size for xHC with TRB cache */
> +#define TRB_CACHE_SIZE_HS	8
> +#define TRB_CACHE_SIZE_SS	16
> +
>  struct xhci_generic_trb {
>  	__le32 field[4];
>  };
> 

Thanks & Regards,
 Tejas Joglekar
Mathias Nyman May 18, 2020, 4:21 p.m. UTC | #2
On 21.4.2020 12.49, Tejas Joglekar wrote:
> The Synopsys xHC has an internal TRB cache of size TRB_CACHE_SIZE for
> each endpoint. The default value for TRB_CACHE_SIZE is 16 for SS and 8
> for HS. The controller loads and updates the TRB cache from the transfer
> ring in system memory whenever the driver issues a start transfer or
> update transfer command.
> 
> For chained TRBs, the Synopsys xHC requires that the total amount of
> bytes for all TRBs loaded in the TRB cache be greater than or equal to 1
> MPS. Or the chain ends within the TRB cache (with a last TRB).
> 
> If this requirement is not met, the controller will not be able to send
> or receive a packet and it will hang causing a driver timeout and error.
> 
> This can be a problem if a class driver queues SG requests with many
> small-buffer entries. The XHCI driver will create a chained TRB for each
> entry which may trigger this issue.
> 
> This patch adds logic to the XHCI driver to detect and prevent this from
> happening.
> 
> For every (TRB_CACHE_SIZE - 2), we check the total buffer size of
> the SG list and if the last window of (TRB_CACHE_SIZE - 2) SG list length
> and we don't make up at least 1 MPS, we create a temporary buffer to
> consolidate full SG list into the buffer.
> 
> We check at (TRB_CACHE_SIZE - 2) window because it is possible that there
> would be a link and/or event data TRB that take up to 2 of the cache
> entries.
> 
> We discovered this issue with devices on other platforms but have not
> yet come across any device that triggers this on Linux. But it could be
> a real problem now or in the future. All it takes is N number of small
> chained TRBs. And other instances of the Synopsys IP may have smaller
> values for the TRB_CACHE_SIZE which would exacerbate the problem.
> 
> Signed-off-by: Tejas Joglekar <joglekar@synopsys.com>
> ---
>  Changes in v2:
>  - Removed redundant debug messages
>  - Modified logic to remove unnecessary changes in hcd.c
>  - Rename the quirk
> 
>  drivers/usb/host/xhci-ring.c |   2 +-
>  drivers/usb/host/xhci.c      | 125 +++++++++++++++++++++++++++++++++++++++++++
>  drivers/usb/host/xhci.h      |   4 ++
>  3 files changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index a78787bb5133..2fad9474912a 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -3291,7 +3291,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	full_len = urb->transfer_buffer_length;
>  	/* If we have scatter/gather list, we use it. */
> -	if (urb->num_sgs) {
> +	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
>  		num_sgs = urb->num_mapped_sgs;
>  		sg = urb->sg;
>  		addr = (u64) sg_dma_address(sg);
> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
> index fe38275363e0..15f06bc6b1ad 100644
> --- a/drivers/usb/host/xhci.c
> +++ b/drivers/usb/host/xhci.c
> @@ -1256,6 +1256,106 @@ EXPORT_SYMBOL_GPL(xhci_resume);
>  
>  /*-------------------------------------------------------------------------*/
>  
> +static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
> +{
> +	void *temp;
> +	int ret = 0;
> +	unsigned int len;
> +	unsigned int buf_len;
> +	enum dma_data_direction dir;
> +	struct xhci_hcd *xhci;
> +
> +	xhci = hcd_to_xhci(hcd);
> +	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +	buf_len = urb->transfer_buffer_length;
> +
> +	temp = kzalloc_node(buf_len, GFP_ATOMIC,
> +			    dev_to_node(hcd->self.sysdev));
> +
> +	if (usb_urb_dir_out(urb))
> +		len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
> +					 temp, buf_len, 0);
> +
> +	urb->transfer_buffer = temp;
> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
> +					   urb->transfer_buffer,
> +					   urb->transfer_buffer_length,
> +					   dir);
> +
> +	if (dma_mapping_error(hcd->self.sysdev,
> +			      urb->transfer_dma)) {
> +		ret = -EAGAIN;
> +		kfree(temp);
> +	} else {
> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;

Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
appropriate.

If Greg or Alan don't object then it's fine for me as well.



> +	}
> +
> +	return ret;
> +}
> +
> +static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
> +					  struct urb *urb)
> +{
> +	bool ret = false;
> +	unsigned int i;
> +	unsigned int len = 0;
> +	unsigned int buf_len;
> +	unsigned int trb_size;
> +	unsigned int max_pkt;
> +	struct scatterlist *sg;
> +	struct scatterlist *tail_sg;
> +
> +	sg = urb->sg;
> +	tail_sg = urb->sg;
> +	buf_len = urb->transfer_buffer_length;
> +	max_pkt = usb_endpoint_maxp(&urb->ep->desc);
> +
> +	if (!urb->num_sgs)
> +		return ret;
> +
> +	if (urb->dev->speed >= USB_SPEED_SUPER)
> +		trb_size = TRB_CACHE_SIZE_SS;
> +	else
> +		trb_size = TRB_CACHE_SIZE_HS;
> +
> +	if (urb->transfer_buffer_length != 0 &&
> +	    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> +		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
> +			len = len + sg->length;
> +			if (i > trb_size - 2) {
> +				len = len - tail_sg->length;
> +				if (len < max_pkt) {
> +					ret = true;
> +					break;
> +				}
> +
> +				tail_sg = sg_next(tail_sg);
> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void xhci_unmap_temp_buf(struct urb *urb)
> +{
> +	struct scatterlist *sg;
> +	unsigned int len;
> +	unsigned int buf_len;
> +
> +	sg = urb->sg;
> +	buf_len = urb->transfer_buffer_length;
> +
> +	if (usb_urb_dir_in(urb)) {
> +		len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
> +					   urb->transfer_buffer,
> +					   buf_len,
> +					   0);
> +	}
> +
> +	kfree(urb->transfer_buffer);
> +	urb->transfer_buffer = NULL;

clear URB_DMA_MAP_SINGLE from urb->transfer_flags?

Everything else looks good do me.
-Mathias
Tejas Joglekar May 20, 2020, 6:41 a.m. UTC | #3
Hi,
On 5/18/2020 9:51 PM, Mathias Nyman wrote:
> On 21.4.2020 12.49, Tejas Joglekar wrote:
>> The Synopsys xHC has an internal TRB cache of size TRB_CACHE_SIZE for
>> each endpoint. The default value for TRB_CACHE_SIZE is 16 for SS and 8
>> for HS. The controller loads and updates the TRB cache from the transfer
>> ring in system memory whenever the driver issues a start transfer or
>> update transfer command.
>>
>> For chained TRBs, the Synopsys xHC requires that the total amount of
>> bytes for all TRBs loaded in the TRB cache be greater than or equal to 1
>> MPS. Or the chain ends within the TRB cache (with a last TRB).
>>
>> If this requirement is not met, the controller will not be able to send
>> or receive a packet and it will hang causing a driver timeout and error.
>>
>> This can be a problem if a class driver queues SG requests with many
>> small-buffer entries. The XHCI driver will create a chained TRB for each
>> entry which may trigger this issue.
>>
>> This patch adds logic to the XHCI driver to detect and prevent this from
>> happening.
>>
>> For every (TRB_CACHE_SIZE - 2), we check the total buffer size of
>> the SG list and if the last window of (TRB_CACHE_SIZE - 2) SG list length
>> and we don't make up at least 1 MPS, we create a temporary buffer to
>> consolidate full SG list into the buffer.
>>
>> We check at (TRB_CACHE_SIZE - 2) window because it is possible that there
>> would be a link and/or event data TRB that take up to 2 of the cache
>> entries.
>>
>> We discovered this issue with devices on other platforms but have not
>> yet come across any device that triggers this on Linux. But it could be
>> a real problem now or in the future. All it takes is N number of small
>> chained TRBs. And other instances of the Synopsys IP may have smaller
>> values for the TRB_CACHE_SIZE which would exacerbate the problem.
>>
>> Signed-off-by: Tejas Joglekar <joglekar@synopsys.com>
>> ---
>>  Changes in v2:
>>  - Removed redundant debug messages
>>  - Modified logic to remove unnecessary changes in hcd.c
>>  - Rename the quirk
>>
>>  drivers/usb/host/xhci-ring.c |   2 +-
>>  drivers/usb/host/xhci.c      | 125 +++++++++++++++++++++++++++++++++++++++++++
>>  drivers/usb/host/xhci.h      |   4 ++
>>  3 files changed, 130 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
>> index a78787bb5133..2fad9474912a 100644
>> --- a/drivers/usb/host/xhci-ring.c
>> +++ b/drivers/usb/host/xhci-ring.c
>> @@ -3291,7 +3291,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>  
>>  	full_len = urb->transfer_buffer_length;
>>  	/* If we have scatter/gather list, we use it. */
>> -	if (urb->num_sgs) {
>> +	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
>>  		num_sgs = urb->num_mapped_sgs;
>>  		sg = urb->sg;
>>  		addr = (u64) sg_dma_address(sg);
>> diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
>> index fe38275363e0..15f06bc6b1ad 100644
>> --- a/drivers/usb/host/xhci.c
>> +++ b/drivers/usb/host/xhci.c
>> @@ -1256,6 +1256,106 @@ EXPORT_SYMBOL_GPL(xhci_resume);
>>  
>>  /*-------------------------------------------------------------------------*/
>>  
>> +static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
>> +{
>> +	void *temp;
>> +	int ret = 0;
>> +	unsigned int len;
>> +	unsigned int buf_len;
>> +	enum dma_data_direction dir;
>> +	struct xhci_hcd *xhci;
>> +
>> +	xhci = hcd_to_xhci(hcd);
>> +	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>> +	buf_len = urb->transfer_buffer_length;
>> +
>> +	temp = kzalloc_node(buf_len, GFP_ATOMIC,
>> +			    dev_to_node(hcd->self.sysdev));
>> +
>> +	if (usb_urb_dir_out(urb))
>> +		len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
>> +					 temp, buf_len, 0);
>> +
>> +	urb->transfer_buffer = temp;
>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
>> +					   urb->transfer_buffer,
>> +					   urb->transfer_buffer_length,
>> +					   dir);
>> +
>> +	if (dma_mapping_error(hcd->self.sysdev,
>> +			      urb->transfer_dma)) {
>> +		ret = -EAGAIN;
>> +		kfree(temp);
>> +	} else {
>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
> 
> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
> appropriate.
> 
> If Greg or Alan don't object then it's fine for me as well.
> 
> 
> 
@Greg/Alan do you suggest any change for the flag here?
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
>> +					  struct urb *urb)
>> +{
>> +	bool ret = false;
>> +	unsigned int i;
>> +	unsigned int len = 0;
>> +	unsigned int buf_len;
>> +	unsigned int trb_size;
>> +	unsigned int max_pkt;
>> +	struct scatterlist *sg;
>> +	struct scatterlist *tail_sg;
>> +
>> +	sg = urb->sg;
>> +	tail_sg = urb->sg;
>> +	buf_len = urb->transfer_buffer_length;
>> +	max_pkt = usb_endpoint_maxp(&urb->ep->desc);
>> +
>> +	if (!urb->num_sgs)
>> +		return ret;
>> +
>> +	if (urb->dev->speed >= USB_SPEED_SUPER)
>> +		trb_size = TRB_CACHE_SIZE_SS;
>> +	else
>> +		trb_size = TRB_CACHE_SIZE_HS;
>> +
>> +	if (urb->transfer_buffer_length != 0 &&
>> +	    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
>> +		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
>> +			len = len + sg->length;
>> +			if (i > trb_size - 2) {
>> +				len = len - tail_sg->length;
>> +				if (len < max_pkt) {
>> +					ret = true;
>> +					break;
>> +				}
>> +
>> +				tail_sg = sg_next(tail_sg);
>> +			}
>> +		}
>> +	}
>> +	return ret;
>> +}
>> +
>> +static void xhci_unmap_temp_buf(struct urb *urb)
>> +{
>> +	struct scatterlist *sg;
>> +	unsigned int len;
>> +	unsigned int buf_len;
>> +
>> +	sg = urb->sg;
>> +	buf_len = urb->transfer_buffer_length;
>> +
>> +	if (usb_urb_dir_in(urb)) {
>> +		len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
>> +					   urb->transfer_buffer,
>> +					   buf_len,
>> +					   0);
>> +	}
>> +
>> +	kfree(urb->transfer_buffer);
>> +	urb->transfer_buffer = NULL;
> 
> clear URB_DMA_MAP_SINGLE from urb->transfer_flags?
>
In current implmentation the dma_unmap is done with existing 
function usb_hcd_unmap_urb_for_dma() and then buffer is copied
in the xhci_unmap_temp_buf(), so URB_DMA_MAP_SINGLE flag gets
cleared with usb_hcd_unmap_urb_for_dma().
I think in next patch set I will add logic for dma_unmap 
in xhci_unmap_temp_buf() function.  

 
> Everything else looks good do me.
> -Mathias
> 

Thanks & Regards,
 Tejas Joglekar
Alan Stern May 20, 2020, 4:50 p.m. UTC | #4
On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
> >> +	urb->transfer_buffer = temp;
> >> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
> >> +					   urb->transfer_buffer,
> >> +					   urb->transfer_buffer_length,
> >> +					   dir);
> >> +
> >> +	if (dma_mapping_error(hcd->self.sysdev,
> >> +			      urb->transfer_dma)) {
> >> +		ret = -EAGAIN;
> >> +		kfree(temp);
> >> +	} else {
> >> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
> > 
> > Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
> > appropriate.
> > 
> > If Greg or Alan don't object then it's fine for me as well.
> > 
> > 
> > 
> @Greg/Alan do you suggest any change for the flag here?

This requires care, because the core will already have set other flags 
for this URB.  I don't remember the details and I don't have time to 
check them now.  But it wouldn't be at all surprising if the core 
doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.

Alan Stern
Tejas Joglekar May 20, 2020, 5 p.m. UTC | #5
Hi,
On 5/20/2020 10:20 PM, Alan Stern wrote:
> On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
>>>> +	urb->transfer_buffer = temp;
>>>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
>>>> +					   urb->transfer_buffer,
>>>> +					   urb->transfer_buffer_length,
>>>> +					   dir);
>>>> +
>>>> +	if (dma_mapping_error(hcd->self.sysdev,
>>>> +			      urb->transfer_dma)) {
>>>> +		ret = -EAGAIN;
>>>> +		kfree(temp);
>>>> +	} else {
>>>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
>>>
>>> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
>>> appropriate.
>>>
>>> If Greg or Alan don't object then it's fine for me as well.
>>>
>>>
>>>
>> @Greg/Alan do you suggest any change for the flag here?
> 
> This requires care, because the core will already have set other flags 
> for this URB.  I don't remember the details and I don't have time to 
> check them now.  But it wouldn't be at all surprising if the core 
> doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.
> 
When this quirk is enabled and it is required to consolidate SG list to 
single buffer no other flags are set for the URB. Only question here is 
if it is good to use DMA_MAP_SINGLE flag for the consolidated SG list 
into a single temporary buffer.

> Alan Stern
> 
Thanks & Regards,
 Tejas Joglekar
Alan Stern May 20, 2020, 5:44 p.m. UTC | #6
On Wed, May 20, 2020 at 05:00:50PM +0000, Tejas Joglekar wrote:
> Hi,
> On 5/20/2020 10:20 PM, Alan Stern wrote:
> > On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
> >>>> +	urb->transfer_buffer = temp;
> >>>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
> >>>> +					   urb->transfer_buffer,
> >>>> +					   urb->transfer_buffer_length,
> >>>> +					   dir);
> >>>> +
> >>>> +	if (dma_mapping_error(hcd->self.sysdev,
> >>>> +			      urb->transfer_dma)) {
> >>>> +		ret = -EAGAIN;
> >>>> +		kfree(temp);
> >>>> +	} else {
> >>>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
> >>>
> >>> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
> >>> appropriate.
> >>>
> >>> If Greg or Alan don't object then it's fine for me as well.
> >>>
> >>>
> >>>
> >> @Greg/Alan do you suggest any change for the flag here?
> > 
> > This requires care, because the core will already have set other flags 
> > for this URB.  I don't remember the details and I don't have time to 
> > check them now.  But it wouldn't be at all surprising if the core 
> > doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.
> > 
> When this quirk is enabled and it is required to consolidate SG list to 
> single buffer no other flags are set for the URB.

I don't believe that.  What about URB_DMA_MAP_SG or 
URB_DMA_MAP_PAGE?  The USB core certainly would have set one of them 
already.  Not only would you need to clear that flag, you want also need 
to undo the mapping.

> Only question here is 
> if it is good to use DMA_MAP_SINGLE flag for the consolidated SG list 
> into a single temporary buffer.

If you call dma_map_single() then USB_DMA_MAP_SINGLE is the appropriate 
flag to sest.

Alan Stern
Tejas Joglekar May 22, 2020, 1:58 a.m. UTC | #7
Hi,
On 5/20/2020 11:14 PM, Alan Stern wrote:
> On Wed, May 20, 2020 at 05:00:50PM +0000, Tejas Joglekar wrote:
>> Hi,
>> On 5/20/2020 10:20 PM, Alan Stern wrote:
>>> On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
>>>>>> +	urb->transfer_buffer = temp;
>>>>>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
>>>>>> +					   urb->transfer_buffer,
>>>>>> +					   urb->transfer_buffer_length,
>>>>>> +					   dir);
>>>>>> +
>>>>>> +	if (dma_mapping_error(hcd->self.sysdev,
>>>>>> +			      urb->transfer_dma)) {
>>>>>> +		ret = -EAGAIN;
>>>>>> +		kfree(temp);
>>>>>> +	} else {
>>>>>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
>>>>>
>>>>> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
>>>>> appropriate.
>>>>>
>>>>> If Greg or Alan don't object then it's fine for me as well.
>>>>>
>>>>>
>>>>>
>>>> @Greg/Alan do you suggest any change for the flag here?
>>>
>>> This requires care, because the core will already have set other flags 
>>> for this URB.  I don't remember the details and I don't have time to 
>>> check them now.  But it wouldn't be at all surprising if the core 
>>> doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.
>>>
>> When this quirk is enabled and it is required to consolidate SG list to 
>> single buffer no other flags are set for the URB.
> 
> I don't believe that.  What about URB_DMA_MAP_SG or 
> URB_DMA_MAP_PAGE?  The USB core certainly would have set one of them 
> already.  Not only would you need to clear that flag, you want also need 
> to undo the mapping.
>
URB_DMA_MAP_SG or URB_DMA_MAP_PAGE flags are set for an URB in core within
function usb_hcd_map_urb_for_dma(), with my patch the xhci_map_urb_for_dma()
does not call usb_hcd_map_urb_for_dma() when SG list consolidation is required
but it calls the newly added function xhci_map_temp_buffer() within that function
only dma_map_single() is called to map consolidated SG list.
 
>> Only question here is 
>> if it is good to use DMA_MAP_SINGLE flag for the consolidated SG list 
>> into a single temporary buffer.
> 
> If you call dma_map_single() then USB_DMA_MAP_SINGLE is the appropriate 
> flag to sest.
> 
Yes, within xhci_map_temp_buffer(), after consolidating the SG list in a temp
buffer I call dma_map_single() to map the temp buffer.

> Alan Stern
> 

Thanks & Regards,
 Tejas Joglekar
Alan Stern May 22, 2020, 2:21 p.m. UTC | #8
On Fri, May 22, 2020 at 01:58:43AM +0000, Tejas Joglekar wrote:
> Hi,
> On 5/20/2020 11:14 PM, Alan Stern wrote:
> > On Wed, May 20, 2020 at 05:00:50PM +0000, Tejas Joglekar wrote:
> >> Hi,
> >> On 5/20/2020 10:20 PM, Alan Stern wrote:
> >>> On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
> >>>>>> +	urb->transfer_buffer = temp;
> >>>>>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
> >>>>>> +					   urb->transfer_buffer,
> >>>>>> +					   urb->transfer_buffer_length,
> >>>>>> +					   dir);
> >>>>>> +
> >>>>>> +	if (dma_mapping_error(hcd->self.sysdev,
> >>>>>> +			      urb->transfer_dma)) {
> >>>>>> +		ret = -EAGAIN;
> >>>>>> +		kfree(temp);
> >>>>>> +	} else {
> >>>>>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
> >>>>>
> >>>>> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
> >>>>> appropriate.
> >>>>>
> >>>>> If Greg or Alan don't object then it's fine for me as well.
> >>>>>
> >>>>>
> >>>>>
> >>>> @Greg/Alan do you suggest any change for the flag here?
> >>>
> >>> This requires care, because the core will already have set other flags 
> >>> for this URB.  I don't remember the details and I don't have time to 
> >>> check them now.  But it wouldn't be at all surprising if the core 
> >>> doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.
> >>>
> >> When this quirk is enabled and it is required to consolidate SG list to 
> >> single buffer no other flags are set for the URB.
> > 
> > I don't believe that.  What about URB_DMA_MAP_SG or 
> > URB_DMA_MAP_PAGE?  The USB core certainly would have set one of them 
> > already.  Not only would you need to clear that flag, you want also need 
> > to undo the mapping.
> >
> URB_DMA_MAP_SG or URB_DMA_MAP_PAGE flags are set for an URB in core within
> function usb_hcd_map_urb_for_dma(), with my patch the xhci_map_urb_for_dma()
> does not call usb_hcd_map_urb_for_dma() when SG list consolidation is required
> but it calls the newly added function xhci_map_temp_buffer() within that function
> only dma_map_single() is called to map consolidated SG list.

Ah, in your patch xhci-hcd's map_urb_for_dma() routine bypasses 
usb_hcd_map_urb_for_dma().  I hadn't noticed that.

Then yes, your use of the URB flags is okay.

Alan Stern

> >> Only question here is 
> >> if it is good to use DMA_MAP_SINGLE flag for the consolidated SG list 
> >> into a single temporary buffer.
> > 
> > If you call dma_map_single() then USB_DMA_MAP_SINGLE is the appropriate 
> > flag to sest.
> > 
> Yes, within xhci_map_temp_buffer(), after consolidating the SG list in a temp
> buffer I call dma_map_single() to map the temp buffer.
> 
> > Alan Stern
> > 
> 
> Thanks & Regards,
>  Tejas Joglekar
Tejas Joglekar May 22, 2020, 5:22 p.m. UTC | #9
Hi,
On 5/22/2020 7:51 PM, Alan Stern wrote:
> On Fri, May 22, 2020 at 01:58:43AM +0000, Tejas Joglekar wrote:
>> Hi,
>> On 5/20/2020 11:14 PM, Alan Stern wrote:
>>> On Wed, May 20, 2020 at 05:00:50PM +0000, Tejas Joglekar wrote:
>>>> Hi,
>>>> On 5/20/2020 10:20 PM, Alan Stern wrote:
>>>>> On Wed, May 20, 2020 at 06:41:16AM +0000, Tejas Joglekar wrote:
>>>>>>>> +	urb->transfer_buffer = temp;
>>>>>>>> +	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
>>>>>>>> +					   urb->transfer_buffer,
>>>>>>>> +					   urb->transfer_buffer_length,
>>>>>>>> +					   dir);
>>>>>>>> +
>>>>>>>> +	if (dma_mapping_error(hcd->self.sysdev,
>>>>>>>> +			      urb->transfer_dma)) {
>>>>>>>> +		ret = -EAGAIN;
>>>>>>>> +		kfree(temp);
>>>>>>>> +	} else {
>>>>>>>> +		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
>>>>>>>
>>>>>>> Not sure if using URB_DMA_MAP_SINGLE to flag that this urb is boucebuffered is
>>>>>>> appropriate.
>>>>>>>
>>>>>>> If Greg or Alan don't object then it's fine for me as well.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>> @Greg/Alan do you suggest any change for the flag here?
>>>>>
>>>>> This requires care, because the core will already have set other flags 
>>>>> for this URB.  I don't remember the details and I don't have time to 
>>>>> check them now.  But it wouldn't be at all surprising if the core 
>>>>> doesn't expect an URB both to use SG and to use DMA_MAP_SINGLE.
>>>>>
>>>> When this quirk is enabled and it is required to consolidate SG list to 
>>>> single buffer no other flags are set for the URB.
>>>
>>> I don't believe that.  What about URB_DMA_MAP_SG or 
>>> URB_DMA_MAP_PAGE?  The USB core certainly would have set one of them 
>>> already.  Not only would you need to clear that flag, you want also need 
>>> to undo the mapping.
>>>
>> URB_DMA_MAP_SG or URB_DMA_MAP_PAGE flags are set for an URB in core within
>> function usb_hcd_map_urb_for_dma(), with my patch the xhci_map_urb_for_dma()
>> does not call usb_hcd_map_urb_for_dma() when SG list consolidation is required
>> but it calls the newly added function xhci_map_temp_buffer() within that function
>> only dma_map_single() is called to map consolidated SG list.
> 
> Ah, in your patch xhci-hcd's map_urb_for_dma() routine bypasses 
> usb_hcd_map_urb_for_dma().  I hadn't noticed that.
> 
> Then yes, your use of the URB flags is okay.
> 
> Alan Stern
> 
Thanks for the review Alan
@Mathias: I will send v3 after updating the unmap function.

Thanks & Regards,
Tejas Joglekar
diff mbox series

Patch

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a78787bb5133..2fad9474912a 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3291,7 +3291,7 @@  int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 	full_len = urb->transfer_buffer_length;
 	/* If we have scatter/gather list, we use it. */
-	if (urb->num_sgs) {
+	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
 		num_sgs = urb->num_mapped_sgs;
 		sg = urb->sg;
 		addr = (u64) sg_dma_address(sg);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index fe38275363e0..15f06bc6b1ad 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1256,6 +1256,106 @@  EXPORT_SYMBOL_GPL(xhci_resume);
 
 /*-------------------------------------------------------------------------*/
 
+static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
+{
+	void *temp;
+	int ret = 0;
+	unsigned int len;
+	unsigned int buf_len;
+	enum dma_data_direction dir;
+	struct xhci_hcd *xhci;
+
+	xhci = hcd_to_xhci(hcd);
+	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	buf_len = urb->transfer_buffer_length;
+
+	temp = kzalloc_node(buf_len, GFP_ATOMIC,
+			    dev_to_node(hcd->self.sysdev));
+
+	if (usb_urb_dir_out(urb))
+		len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
+					 temp, buf_len, 0);
+
+	urb->transfer_buffer = temp;
+	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
+					   urb->transfer_buffer,
+					   urb->transfer_buffer_length,
+					   dir);
+
+	if (dma_mapping_error(hcd->self.sysdev,
+			      urb->transfer_dma)) {
+		ret = -EAGAIN;
+		kfree(temp);
+	} else {
+		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
+	}
+
+	return ret;
+}
+
+static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
+					  struct urb *urb)
+{
+	bool ret = false;
+	unsigned int i;
+	unsigned int len = 0;
+	unsigned int buf_len;
+	unsigned int trb_size;
+	unsigned int max_pkt;
+	struct scatterlist *sg;
+	struct scatterlist *tail_sg;
+
+	sg = urb->sg;
+	tail_sg = urb->sg;
+	buf_len = urb->transfer_buffer_length;
+	max_pkt = usb_endpoint_maxp(&urb->ep->desc);
+
+	if (!urb->num_sgs)
+		return ret;
+
+	if (urb->dev->speed >= USB_SPEED_SUPER)
+		trb_size = TRB_CACHE_SIZE_SS;
+	else
+		trb_size = TRB_CACHE_SIZE_HS;
+
+	if (urb->transfer_buffer_length != 0 &&
+	    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
+		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
+			len = len + sg->length;
+			if (i > trb_size - 2) {
+				len = len - tail_sg->length;
+				if (len < max_pkt) {
+					ret = true;
+					break;
+				}
+
+				tail_sg = sg_next(tail_sg);
+			}
+		}
+	}
+	return ret;
+}
+
+static void xhci_unmap_temp_buf(struct urb *urb)
+{
+	struct scatterlist *sg;
+	unsigned int len;
+	unsigned int buf_len;
+
+	sg = urb->sg;
+	buf_len = urb->transfer_buffer_length;
+
+	if (usb_urb_dir_in(urb)) {
+		len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
+					   urb->transfer_buffer,
+					   buf_len,
+					   0);
+	}
+
+	kfree(urb->transfer_buffer);
+	urb->transfer_buffer = NULL;
+}
+
 /*
  * Bypass the DMA mapping if URB is suitable for Immediate Transfer (IDT),
  * we'll copy the actual data into the TRB address register. This is limited to
@@ -1265,12 +1365,36 @@  EXPORT_SYMBOL_GPL(xhci_resume);
 static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 				gfp_t mem_flags)
 {
+	struct xhci_hcd *xhci;
+
+	xhci = hcd_to_xhci(hcd);
+
 	if (xhci_urb_suitable_for_idt(urb))
 		return 0;
 
+	if (xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) {
+		if (xhci_urb_temp_buffer_required(hcd, urb))
+			return xhci_map_temp_buffer(hcd, urb);
+	}
 	return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
 }
 
+static void xhci_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct xhci_hcd *xhci;
+	bool unmap_temp_buf = false;
+
+	xhci = hcd_to_xhci(hcd);
+
+	if (urb->num_sgs && (urb->transfer_flags & URB_DMA_MAP_SINGLE))
+		unmap_temp_buf = true;
+
+	usb_hcd_unmap_urb_for_dma(hcd, urb);
+
+	if ((xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) && unmap_temp_buf)
+		xhci_unmap_temp_buf(urb);
+}
+
 /**
  * xhci_get_endpoint_index - Used for passing endpoint bitmasks between the core and
  * HCDs.  Find the index for an endpoint given its descriptor.  Use the return
@@ -5315,6 +5439,7 @@  static const struct hc_driver xhci_hc_driver = {
 	 * managing i/o requests and associated device resources
 	 */
 	.map_urb_for_dma =      xhci_map_urb_for_dma,
+	.unmap_urb_for_dma =    xhci_unmap_urb_for_dma,
 	.urb_enqueue =		xhci_urb_enqueue,
 	.urb_dequeue =		xhci_urb_dequeue,
 	.alloc_dev =		xhci_alloc_dev,
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 4db825459b40..14600214188f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1330,6 +1330,10 @@  enum xhci_setup_dev {
 #define TRB_SIA			(1<<31)
 #define TRB_FRAME_ID(p)		(((p) & 0x7ff) << 20)
 
+/* TRB cache size for xHC with TRB cache */
+#define TRB_CACHE_SIZE_HS	8
+#define TRB_CACHE_SIZE_SS	16
+
 struct xhci_generic_trb {
 	__le32 field[4];
 };