diff mbox series

[net-next,v6,1/2] net: axienet: Be more careful about updating tx_bd_tail

Message ID 20220511184432.1131256-2-robert.hancock@calian.com (mailing list archive)
State New, archived
Headers show
Series axienet NAPI improvements | expand

Commit Message

Robert Hancock May 11, 2022, 6:44 p.m. UTC
The axienet_start_xmit function was updating the tx_bd_tail variable
multiple times, with potential rollbacks on error or invalid
intermediate positions, even though this variable is also used in the
TX completion path. Use READ_ONCE and WRITE_ONCE to make this update
more atomic, and move the write before the MMIO write to start the
transfer, so it is protected by that implicit write barrier.

Signed-off-by: Robert Hancock <robert.hancock@calian.com>
---
 .../net/ethernet/xilinx/xilinx_axienet_main.c | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

Comments

Jakub Kicinski May 11, 2022, 8 p.m. UTC | #1
On Wed, 11 May 2022 12:44:31 -0600 Robert Hancock wrote:
> The axienet_start_xmit function was updating the tx_bd_tail variable
> multiple times, with potential rollbacks on error or invalid
> intermediate positions, even though this variable is also used in the
> TX completion path. Use READ_ONCE and WRITE_ONCE to make this update
> more atomic, and move the write before the MMIO write to start the
> transfer, so it is protected by that implicit write barrier.
> 
> Signed-off-by: Robert Hancock <robert.hancock@calian.com>
> ---
>  .../net/ethernet/xilinx/xilinx_axienet_main.c | 23 +++++++++++--------
>  1 file changed, 13 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index d6fc3f7acdf0..2f39eb4de249 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -807,12 +807,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	u32 csum_index_off;
>  	skb_frag_t *frag;
>  	dma_addr_t tail_p, phys;
> +	u32 orig_tail_ptr, new_tail_ptr;
>  	struct axienet_local *lp = netdev_priv(ndev);
>  	struct axidma_bd *cur_p;
> -	u32 orig_tail_ptr = lp->tx_bd_tail;
> +
> +	orig_tail_ptr = READ_ONCE(lp->tx_bd_tail);

This one does not need READ_ONCE().

We only need to protect reads and writes which may race with each other.
This read can't race with any write. We need WRITE_ONCE() in
axienet_start_xmit() and READ_ONCE() in xienet_check_tx_bd_space().

BTW I'm slightly murky on what the rmb() in xienet_check_tx_bd_space()
does. Memory barrier is a fence, not a flush, I don't see what two
accesses that rmb() is separating.

> +	new_tail_ptr = orig_tail_ptr;
>  
>  	num_frag = skb_shinfo(skb)->nr_frags;
> -	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
> +	cur_p = &lp->tx_bd_v[orig_tail_ptr];
>  
>  	if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
>  		/* Should not happen as last start_xmit call should have
Robert Hancock May 11, 2022, 8:40 p.m. UTC | #2
On Wed, 2022-05-11 at 13:00 -0700, Jakub Kicinski wrote:
> On Wed, 11 May 2022 12:44:31 -0600 Robert Hancock wrote:
> > The axienet_start_xmit function was updating the tx_bd_tail variable
> > multiple times, with potential rollbacks on error or invalid
> > intermediate positions, even though this variable is also used in the
> > TX completion path. Use READ_ONCE and WRITE_ONCE to make this update
> > more atomic, and move the write before the MMIO write to start the
> > transfer, so it is protected by that implicit write barrier.
> > 
> > Signed-off-by: Robert Hancock <robert.hancock@calian.com>
> > ---
> >  .../net/ethernet/xilinx/xilinx_axienet_main.c | 23 +++++++++++--------
> >  1 file changed, 13 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > index d6fc3f7acdf0..2f39eb4de249 100644
> > --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > @@ -807,12 +807,15 @@ axienet_start_xmit(struct sk_buff *skb, struct
> > net_device *ndev)
> >  	u32 csum_index_off;
> >  	skb_frag_t *frag;
> >  	dma_addr_t tail_p, phys;
> > +	u32 orig_tail_ptr, new_tail_ptr;
> >  	struct axienet_local *lp = netdev_priv(ndev);
> >  	struct axidma_bd *cur_p;
> > -	u32 orig_tail_ptr = lp->tx_bd_tail;
> > +
> > +	orig_tail_ptr = READ_ONCE(lp->tx_bd_tail);
> 
> This one does not need READ_ONCE().
> 
> We only need to protect reads and writes which may race with each other.
> This read can't race with any write. We need WRITE_ONCE() in
> axienet_start_xmit() and READ_ONCE() in xienet_check_tx_bd_space().

Makes sense, can fix that up.

> 
> BTW I'm slightly murky on what the rmb() in xienet_check_tx_bd_space()
> does. Memory barrier is a fence, not a flush, I don't see what two
> accesses that rmb() is separating.

I believe the idea is to ensure that we're seeing a complete descriptor update
from the hardware (i.e. what dma_rmb does), and also that the last write to
tx_bd_tail will be visible (basically pairing with the implicit write barrier
from the IO write in axienet_start_xmit)?

> 
> > +	new_tail_ptr = orig_tail_ptr;
> >  
> >  	num_frag = skb_shinfo(skb)->nr_frags;
> > -	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
> > +	cur_p = &lp->tx_bd_v[orig_tail_ptr];
> >  
> >  	if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
> >  		/* Should not happen as last start_xmit call should have
diff mbox series

Patch

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index d6fc3f7acdf0..2f39eb4de249 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -807,12 +807,15 @@  axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	u32 csum_index_off;
 	skb_frag_t *frag;
 	dma_addr_t tail_p, phys;
+	u32 orig_tail_ptr, new_tail_ptr;
 	struct axienet_local *lp = netdev_priv(ndev);
 	struct axidma_bd *cur_p;
-	u32 orig_tail_ptr = lp->tx_bd_tail;
+
+	orig_tail_ptr = READ_ONCE(lp->tx_bd_tail);
+	new_tail_ptr = orig_tail_ptr;
 
 	num_frag = skb_shinfo(skb)->nr_frags;
-	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+	cur_p = &lp->tx_bd_v[orig_tail_ptr];
 
 	if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
 		/* Should not happen as last start_xmit call should have
@@ -852,9 +855,9 @@  axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cur_p->cntrl = skb_headlen(skb) | XAXIDMA_BD_CTRL_TXSOF_MASK;
 
 	for (ii = 0; ii < num_frag; ii++) {
-		if (++lp->tx_bd_tail >= lp->tx_bd_num)
-			lp->tx_bd_tail = 0;
-		cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+		if (++new_tail_ptr >= lp->tx_bd_num)
+			new_tail_ptr = 0;
+		cur_p = &lp->tx_bd_v[new_tail_ptr];
 		frag = &skb_shinfo(skb)->frags[ii];
 		phys = dma_map_single(lp->dev,
 				      skb_frag_address(frag),
@@ -866,8 +869,6 @@  axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 			ndev->stats.tx_dropped++;
 			axienet_free_tx_chain(ndev, orig_tail_ptr, ii + 1,
 					      NULL);
-			lp->tx_bd_tail = orig_tail_ptr;
-
 			return NETDEV_TX_OK;
 		}
 		desc_set_phys_addr(lp, phys, cur_p);
@@ -877,11 +878,13 @@  axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK;
 	cur_p->skb = skb;
 
-	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
+	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * new_tail_ptr;
+	if (++new_tail_ptr >= lp->tx_bd_num)
+		new_tail_ptr = 0;
+	WRITE_ONCE(lp->tx_bd_tail, new_tail_ptr);
+
 	/* Start the transfer */
 	axienet_dma_out_addr(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p);
-	if (++lp->tx_bd_tail >= lp->tx_bd_num)
-		lp->tx_bd_tail = 0;
 
 	/* Stop queue if next transmit may not have space */
 	if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {