diff mbox series

[net,v4,1/2] net/ps3_gelic_net: Fix RX sk_buff length

Message ID 4150b1589ed367e18855c16762ff160e9d73a42f.1675632296.git.geoff@infradead.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/ps3_gelic_net: DMA related fixes | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers fail 2 blamed authors not CCed: jeff@garzik.org mokuno@sm.sony.co.jp; 8 maintainers not CCed: pabeni@redhat.com npiggin@gmail.com christophe.leroy@csgroup.eu linuxppc-dev@lists.ozlabs.org edumazet@google.com jeff@garzik.org mpe@ellerman.id.au mokuno@sm.sony.co.jp
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: Please use correct Fixes: style 'Fixes: <12 chars of sha1> ("<title line>")' - ie: 'Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3")'
netdev/kdoc success Errors and warnings before: 3 this patch: 3
netdev/source_inline success Was 0 now: 0

Commit Message

Geoff Levand Feb. 5, 2023, 10:10 p.m. UTC
The Gelic Ethernet device needs to have the RX sk_buffs aligned to
GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a multiple of
GELIC_NET_RXBUF_ALIGN.

The current Gelic Ethernet driver was not allocating sk_buffs large enough to
allow for this alignment.

Fixes various randomly occurring runtime network errors.

Fixes: 02c1889166b4 (ps3: gigabit ethernet driver for PS3, take3)
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 56 ++++++++++++--------
 1 file changed, 34 insertions(+), 22 deletions(-)

Comments

Alexander Duyck Feb. 6, 2023, 4:25 p.m. UTC | #1
On Sun, 2023-02-05 at 22:10 +0000, Geoff Levand wrote:
> The Gelic Ethernet device needs to have the RX sk_buffs aligned to
> GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a multiple of
> GELIC_NET_RXBUF_ALIGN.
> 
> The current Gelic Ethernet driver was not allocating sk_buffs large enough to
> allow for this alignment.
> 
> Fixes various randomly occurring runtime network errors.
> 
> Fixes: 02c1889166b4 (ps3: gigabit ethernet driver for PS3, take3)
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  drivers/net/ethernet/toshiba/ps3_gelic_net.c | 56 ++++++++++++--------
>  1 file changed, 34 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> index cf8de8a7a8a1..7a8b5e1e77a6 100644
> --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> @@ -365,51 +365,63 @@ static int gelic_card_init_chain(struct gelic_card *card,
>   *
>   * allocates a new rx skb, iommu-maps it and attaches it to the descriptor.
>   * Activate the descriptor state-wise
> + *
> + * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length
> + * must be a multiple of GELIC_NET_RXBUF_ALIGN.
>   */
>  static int gelic_descr_prepare_rx(struct gelic_card *card,
>  				  struct gelic_descr *descr)
>  {
> -	int offset;
> -	unsigned int bufsize;
> +	struct device *dev = ctodev(card);
> +	struct {
> +		unsigned int total_bytes;
> +		unsigned int offset;
> +	} aligned_buf;
> +	dma_addr_t cpu_addr;
>  
>  	if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
>  		dev_info(ctodev(card), "%s: ERROR status\n", __func__);
> -	/* we need to round up the buffer size to a multiple of 128 */
> -	bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
>  
> -	/* and we need to have it 128 byte aligned, therefore we allocate a
> -	 * bit more */
> -	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
> +	aligned_buf.total_bytes = (GELIC_NET_RXBUF_ALIGN - 1) +
> +		GELIC_NET_MAX_MTU + (GELIC_NET_RXBUF_ALIGN - 1);
> +

This value isn't aligned to anything as there have been no steps taken
to align it. In fact it is guaranteed to be off by 2. Did you maybe
mean to use an "&" somewhere?

> +	descr->skb = dev_alloc_skb(aligned_buf.total_bytes);
> +
>  	if (!descr->skb) {
> -		descr->buf_addr = 0; /* tell DMAC don't touch memory */
> +		descr->buf_addr = 0;
>  		return -ENOMEM;

Why remove this comment?

>  	}
> -	descr->buf_size = cpu_to_be32(bufsize);
> +
> +	aligned_buf.offset =
> +		PTR_ALIGN(descr->skb->data, GELIC_NET_RXBUF_ALIGN) -
> +			descr->skb->data;
> +
> +	descr->buf_size = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);

Originally this was being written using cpu_to_be32. WIth this you are
writing it raw w/ the cpu endianness. Is there a byte ordering issue
here?

>  	descr->dmac_cmd_status = 0;
>  	descr->result_size = 0;
>  	descr->valid_size = 0;
>  	descr->data_error = 0;
>  
> -	offset = ((unsigned long)descr->skb->data) &
> -		(GELIC_NET_RXBUF_ALIGN - 1);
> -	if (offset)
> -		skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);

Rather than messing with all this it might be easier to just drop
offset in favor of NET_SKB_PAD since that should be offset in all cases
where dev_alloc_skb is being used. With that the reserve could just be
a constant.

> -	/* io-mmu-map the skb */
> -	descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
> -						     descr->skb->data,
> -						     GELIC_NET_MAX_MTU,
> -						     DMA_FROM_DEVICE));
> +	skb_reserve(descr->skb, aligned_buf.offset);
> +
> +	cpu_addr = dma_map_single(dev, descr->skb->data, descr->buf_size,
> +		DMA_FROM_DEVICE);
> +
> +	descr->buf_addr = cpu_to_be32(cpu_addr);
> +
>  	if (!descr->buf_addr) {

This check should be for dma_mapping_error based on "cpu_addr". There
are some configs that don't return NULL to indicate a mapping error.

>  		dev_kfree_skb_any(descr->skb);
> +		descr->buf_addr = 0;
> +		descr->buf_size = 0;
>  		descr->skb = NULL;
> -		dev_info(ctodev(card),
> +		dev_info(dev,
>  			 "%s:Could not iommu-map rx buffer\n", __func__);
>  		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
>  		return -ENOMEM;
> -	} else {
> -		gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
> -		return 0;
>  	}
> +
> +	gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
> +	return 0;
>  }
>  
>  /**
Geoff Levand Feb. 12, 2023, 5:06 p.m. UTC | #2
Hi Alexander,

Thanks for the review.

On 2/6/23 08:25, Alexander H Duyck wrote:
> On Sun, 2023-02-05 at 22:10 +0000, Geoff Levand wrote:
>> The Gelic Ethernet device needs to have the RX sk_buffs aligned to
>> GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a multiple of
>> GELIC_NET_RXBUF_ALIGN.
>>

>>  static int gelic_descr_prepare_rx(struct gelic_card *card,
>>  				  struct gelic_descr *descr)
>>  {
>> -	int offset;
>> -	unsigned int bufsize;
>> +	struct device *dev = ctodev(card);
>> +	struct {
>> +		unsigned int total_bytes;
>> +		unsigned int offset;
>> +	} aligned_buf;
>> +	dma_addr_t cpu_addr;
>>  
>>  	if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
>>  		dev_info(ctodev(card), "%s: ERROR status\n", __func__);
>> -	/* we need to round up the buffer size to a multiple of 128 */
>> -	bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
>>  
>> -	/* and we need to have it 128 byte aligned, therefore we allocate a
>> -	 * bit more */
>> -	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
>> +	aligned_buf.total_bytes = (GELIC_NET_RXBUF_ALIGN - 1) +
>> +		GELIC_NET_MAX_MTU + (GELIC_NET_RXBUF_ALIGN - 1);
>> +
> 
> This value isn't aligned to anything as there have been no steps taken
> to align it. In fact it is guaranteed to be off by 2. Did you maybe
> mean to use an "&" somewhere?

total_bytes here means the total number of bytes to allocate that will
allow for the desired alignment.  This value a bit too much though since
we really just need it to end on a GELIC_NET_RXBUF_ALIGN boundary, so 
adding ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN) should be enough.
I'll fix that in the next patch version.

>> +	descr->skb = dev_alloc_skb(aligned_buf.total_bytes);
>> +
>>  	if (!descr->skb) {
>> -		descr->buf_addr = 0; /* tell DMAC don't touch memory */
>> +		descr->buf_addr = 0;
>>  		return -ENOMEM;
> 
> Why remove this comment?

If we return -ENOMEM this descriptor shouldn't be used.

>>  	}
>> -	descr->buf_size = cpu_to_be32(bufsize);
>> +
>> +	aligned_buf.offset =
>> +		PTR_ALIGN(descr->skb->data, GELIC_NET_RXBUF_ALIGN) -
>> +			descr->skb->data;
>> +
>> +	descr->buf_size = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
> 
> Originally this was being written using cpu_to_be32. WIth this you are
> writing it raw w/ the cpu endianness. Is there a byte ordering issue
> here?

No. The PS3 has a big endian CPU, so we really don't need any
of the endian conversions.

> 
>>  	descr->dmac_cmd_status = 0;
>>  	descr->result_size = 0;
>>  	descr->valid_size = 0;
>>  	descr->data_error = 0;
>>  
>> -	offset = ((unsigned long)descr->skb->data) &
>> -		(GELIC_NET_RXBUF_ALIGN - 1);
>> -	if (offset)
>> -		skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
> 
> Rather than messing with all this it might be easier to just drop
> offset in favor of NET_SKB_PAD since that should be offset in all cases
> where dev_alloc_skb is being used. With that the reserve could just be
> a constant.

GELIC_NET_RXBUF_ALIGN is a property of the gelic hardware device.  I
would think if NET_SKB_PAD would work it would just be by coincidence.

>> -	/* io-mmu-map the skb */
>> -	descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
>> -						     descr->skb->data,
>> -						     GELIC_NET_MAX_MTU,
>> -						     DMA_FROM_DEVICE));
>> +	skb_reserve(descr->skb, aligned_buf.offset);
>> +
>> +	cpu_addr = dma_map_single(dev, descr->skb->data, descr->buf_size,
>> +		DMA_FROM_DEVICE);
>> +
>> +	descr->buf_addr = cpu_to_be32(cpu_addr);
>> +
>>  	if (!descr->buf_addr) {
> 
> This check should be for dma_mapping_error based on "cpu_addr". There
> are some configs that don't return NULL to indicate a mapping error.

As was requested, I have put those corrections into the second patch
of this series.

-Geoff
Alexander Duyck Feb. 13, 2023, 3:14 p.m. UTC | #3
On Sun, Feb 12, 2023 at 9:06 AM Geoff Levand <geoff@infradead.org> wrote:
>
> Hi Alexander,
>
> Thanks for the review.
>
> On 2/6/23 08:25, Alexander H Duyck wrote:
> > On Sun, 2023-02-05 at 22:10 +0000, Geoff Levand wrote:
> >> The Gelic Ethernet device needs to have the RX sk_buffs aligned to
> >> GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a multiple of
> >> GELIC_NET_RXBUF_ALIGN.
> >>
>
> >>  static int gelic_descr_prepare_rx(struct gelic_card *card,
> >>                                struct gelic_descr *descr)
> >>  {
> >> -    int offset;
> >> -    unsigned int bufsize;
> >> +    struct device *dev = ctodev(card);
> >> +    struct {
> >> +            unsigned int total_bytes;
> >> +            unsigned int offset;
> >> +    } aligned_buf;
> >> +    dma_addr_t cpu_addr;
> >>
> >>      if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
> >>              dev_info(ctodev(card), "%s: ERROR status\n", __func__);
> >> -    /* we need to round up the buffer size to a multiple of 128 */
> >> -    bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
> >>
> >> -    /* and we need to have it 128 byte aligned, therefore we allocate a
> >> -     * bit more */
> >> -    descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
> >> +    aligned_buf.total_bytes = (GELIC_NET_RXBUF_ALIGN - 1) +
> >> +            GELIC_NET_MAX_MTU + (GELIC_NET_RXBUF_ALIGN - 1);
> >> +
> >
> > This value isn't aligned to anything as there have been no steps taken
> > to align it. In fact it is guaranteed to be off by 2. Did you maybe
> > mean to use an "&" somewhere?
>
> total_bytes here means the total number of bytes to allocate that will
> allow for the desired alignment.  This value a bit too much though since
> we really just need it to end on a GELIC_NET_RXBUF_ALIGN boundary, so
> adding ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN) should be enough.
> I'll fix that in the next patch version.
>
> >> +    descr->skb = dev_alloc_skb(aligned_buf.total_bytes);
> >> +
> >>      if (!descr->skb) {
> >> -            descr->buf_addr = 0; /* tell DMAC don't touch memory */
> >> +            descr->buf_addr = 0;
> >>              return -ENOMEM;
> >
> > Why remove this comment?
>
> If we return -ENOMEM this descriptor shouldn't be used.

Right. But the comment is essentially saying that. The question is why
remove the documentation?

> >>      }
> >> -    descr->buf_size = cpu_to_be32(bufsize);
> >> +
> >> +    aligned_buf.offset =
> >> +            PTR_ALIGN(descr->skb->data, GELIC_NET_RXBUF_ALIGN) -
> >> +                    descr->skb->data;
> >> +
> >> +    descr->buf_size = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
> >
> > Originally this was being written using cpu_to_be32. WIth this you are
> > writing it raw w/ the cpu endianness. Is there a byte ordering issue
> > here?
>
> No. The PS3 has a big endian CPU, so we really don't need any
> of the endian conversions.

This doesn't introduce an ordering error, but it introduces a type
error. The bufsize variable was defined as a CPU ordered variable
whereas buf_size is a __be32. If you enable sparse checking it should
return an error.

I would recommend keeping the cpu_to_be32. If your architecture is big
endian it will do nothing and add no overhead. If at some point in the
future someone were to plug this IP into another architecture it would
take care of sorting out the byte ordering for you.

> >
> >>      descr->dmac_cmd_status = 0;
> >>      descr->result_size = 0;
> >>      descr->valid_size = 0;
> >>      descr->data_error = 0;
> >>
> >> -    offset = ((unsigned long)descr->skb->data) &
> >> -            (GELIC_NET_RXBUF_ALIGN - 1);
> >> -    if (offset)
> >> -            skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
> >
> > Rather than messing with all this it might be easier to just drop
> > offset in favor of NET_SKB_PAD since that should be offset in all cases
> > where dev_alloc_skb is being used. With that the reserve could just be
> > a constant.
>
> GELIC_NET_RXBUF_ALIGN is a property of the gelic hardware device.  I
> would think if NET_SKB_PAD would work it would just be by coincidence.

NET_SKB_PAD is the alignment generated when you use dev_alloc_skb.
What I was getting at is that "offset" could probably be removed in
favor of just using NET_SKB_PAD.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index cf8de8a7a8a1..7a8b5e1e77a6 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -365,51 +365,63 @@  static int gelic_card_init_chain(struct gelic_card *card,
  *
  * allocates a new rx skb, iommu-maps it and attaches it to the descriptor.
  * Activate the descriptor state-wise
+ *
+ * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length
+ * must be a multiple of GELIC_NET_RXBUF_ALIGN.
  */
 static int gelic_descr_prepare_rx(struct gelic_card *card,
 				  struct gelic_descr *descr)
 {
-	int offset;
-	unsigned int bufsize;
+	struct device *dev = ctodev(card);
+	struct {
+		unsigned int total_bytes;
+		unsigned int offset;
+	} aligned_buf;
+	dma_addr_t cpu_addr;
 
 	if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
 		dev_info(ctodev(card), "%s: ERROR status\n", __func__);
-	/* we need to round up the buffer size to a multiple of 128 */
-	bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
 
-	/* and we need to have it 128 byte aligned, therefore we allocate a
-	 * bit more */
-	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
+	aligned_buf.total_bytes = (GELIC_NET_RXBUF_ALIGN - 1) +
+		GELIC_NET_MAX_MTU + (GELIC_NET_RXBUF_ALIGN - 1);
+
+	descr->skb = dev_alloc_skb(aligned_buf.total_bytes);
+
 	if (!descr->skb) {
-		descr->buf_addr = 0; /* tell DMAC don't touch memory */
+		descr->buf_addr = 0;
 		return -ENOMEM;
 	}
-	descr->buf_size = cpu_to_be32(bufsize);
+
+	aligned_buf.offset =
+		PTR_ALIGN(descr->skb->data, GELIC_NET_RXBUF_ALIGN) -
+			descr->skb->data;
+
+	descr->buf_size = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
 	descr->dmac_cmd_status = 0;
 	descr->result_size = 0;
 	descr->valid_size = 0;
 	descr->data_error = 0;
 
-	offset = ((unsigned long)descr->skb->data) &
-		(GELIC_NET_RXBUF_ALIGN - 1);
-	if (offset)
-		skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
-	/* io-mmu-map the skb */
-	descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
-						     descr->skb->data,
-						     GELIC_NET_MAX_MTU,
-						     DMA_FROM_DEVICE));
+	skb_reserve(descr->skb, aligned_buf.offset);
+
+	cpu_addr = dma_map_single(dev, descr->skb->data, descr->buf_size,
+		DMA_FROM_DEVICE);
+
+	descr->buf_addr = cpu_to_be32(cpu_addr);
+
 	if (!descr->buf_addr) {
 		dev_kfree_skb_any(descr->skb);
+		descr->buf_addr = 0;
+		descr->buf_size = 0;
 		descr->skb = NULL;
-		dev_info(ctodev(card),
+		dev_info(dev,
 			 "%s:Could not iommu-map rx buffer\n", __func__);
 		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
 		return -ENOMEM;
-	} else {
-		gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
-		return 0;
 	}
+
+	gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
+	return 0;
 }
 
 /**