diff mbox series

[net] net: renesas: rswitch: rework ts tags management

Message ID 20241212062558.436455-1-nikita.yoush@cogentembedded.com (mailing list archive)
State Accepted
Delegated to: Geert Uytterhoeven
Headers show
Series [net] net: renesas: rswitch: rework ts tags management | expand

Commit Message

Nikita Yushchenko Dec. 12, 2024, 6:25 a.m. UTC
The existing linked list based implementation of how ts tags are
assigned and managed is unsafe against concurrency and corner cases:
- element addition in tx processing can race against element removal
  in ts queue completion,
- element removal in ts queue completion can race against element
  removal in device close,
- if a large number of frames gets added to tx queue without ts queue
  completions in between, elements with duplicate tag values can get
  added.

Use a different implementation, based on per-port used tags bitmaps and
saved skb arrays.

Safety for addition in tx processing vs removal in ts completion is
provided by:

    tag = find_first_zero_bit(...);
    smp_mb();
    <write rdev->ts_skb[tag]>
    set_bit(...);

  vs

    <read rdev->ts_skb[tag]>
    smp_mb();
    clear_bit(...);

Safety for removal in ts completion vs removal in device close is
provided by using atomic read-and-clear for rdev->ts_skb[tag]:

    ts_skb = xchg(&rdev->ts_skb[tag], NULL);
    if (ts_skb)
        <handle it>

Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy")
Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
---
 drivers/net/ethernet/renesas/rswitch.c | 74 ++++++++++++++------------
 drivers/net/ethernet/renesas/rswitch.h | 13 ++---
 2 files changed, 42 insertions(+), 45 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Dec. 15, 2024, 11:10 p.m. UTC | #1
Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Thu, 12 Dec 2024 11:25:58 +0500 you wrote:
> The existing linked list based implementation of how ts tags are
> assigned and managed is unsafe against concurrency and corner cases:
> - element addition in tx processing can race against element removal
>   in ts queue completion,
> - element removal in ts queue completion can race against element
>   removal in device close,
> - if a large number of frames gets added to tx queue without ts queue
>   completions in between, elements with duplicate tag values can get
>   added.
> 
> [...]

Here is the summary with links:
  - [net] net: renesas: rswitch: rework ts tags management
    https://git.kernel.org/netdev/net/c/922b4b955a03

You are awesome, thank you!
Yoshihiro Shimoda Dec. 16, 2024, 11:25 a.m. UTC | #2
Hello Nikita-san,

> From: Nikita Yushchenko, Sent: Thursday, December 12, 2024 3:26 PM
> 
> The existing linked list based implementation of how ts tags are
> assigned and managed is unsafe against concurrency and corner cases:
> - element addition in tx processing can race against element removal
>   in ts queue completion,
> - element removal in ts queue completion can race against element
>   removal in device close,
> - if a large number of frames gets added to tx queue without ts queue
>   completions in between, elements with duplicate tag values can get
>   added.
> 
> Use a different implementation, based on per-port used tags bitmaps and
> saved skb arrays.
> 
> Safety for addition in tx processing vs removal in ts completion is
> provided by:
> 
>     tag = find_first_zero_bit(...);
>     smp_mb();
>     <write rdev->ts_skb[tag]>
>     set_bit(...);
> 
>   vs
> 
>     <read rdev->ts_skb[tag]>
>     smp_mb();
>     clear_bit(...);
> 
> Safety for removal in ts completion vs removal in device close is
> provided by using atomic read-and-clear for rdev->ts_skb[tag]:
> 
>     ts_skb = xchg(&rdev->ts_skb[tag], NULL);
>     if (ts_skb)
>         <handle it>
> 
> Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy")
> Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>

Thank you for the patch!

Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

Also, I tested on my environment with modified cmsg_sender.c which enabled
HW TX timestamping. The following command [1] didn't work on the previous code.
However, after I applied this patch, it could work correctly. So,

Tested-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

[1]
cmsg_sender -p u -t -4 -n 128 -S 1000 <ipaddr> <port>

Best regards,
Yoshihiro Shimoda

> ---
>  drivers/net/ethernet/renesas/rswitch.c | 74 ++++++++++++++------------
>  drivers/net/ethernet/renesas/rswitch.h | 13 ++---
>  2 files changed, 42 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
> index dbbbf024e7ab..9ac6e2aad18f 100644
> --- a/drivers/net/ethernet/renesas/rswitch.c
> +++ b/drivers/net/ethernet/renesas/rswitch.c
> @@ -547,7 +547,6 @@ static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv)
>  	desc = &gq->ts_ring[gq->ring_size];
>  	desc->desc.die_dt = DT_LINKFIX;
>  	rswitch_desc_set_dptr(&desc->desc, gq->ring_dma);
> -	INIT_LIST_HEAD(&priv->gwca.ts_info_list);
> 
>  	return 0;
>  }
> @@ -1003,9 +1002,10 @@ static int rswitch_gwca_request_irqs(struct rswitch_private *priv)
>  static void rswitch_ts(struct rswitch_private *priv)
>  {
>  	struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue;
> -	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
>  	struct skb_shared_hwtstamps shhwtstamps;
>  	struct rswitch_ts_desc *desc;
> +	struct rswitch_device *rdev;
> +	struct sk_buff *ts_skb;
>  	struct timespec64 ts;
>  	unsigned int num;
>  	u32 tag, port;
> @@ -1015,23 +1015,28 @@ static void rswitch_ts(struct rswitch_private *priv)
>  		dma_rmb();
> 
>  		port = TS_DESC_DPN(__le32_to_cpu(desc->desc.dptrl));
> -		tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
> -
> -		list_for_each_entry_safe(ts_info, ts_info2, &priv->gwca.ts_info_list, list) {
> -			if (!(ts_info->port == port && ts_info->tag == tag))
> -				continue;
> -
> -			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
> -			ts.tv_sec = __le32_to_cpu(desc->ts_sec);
> -			ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
> -			shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
> -			skb_tstamp_tx(ts_info->skb, &shhwtstamps);
> -			dev_consume_skb_irq(ts_info->skb);
> -			list_del(&ts_info->list);
> -			kfree(ts_info);
> -			break;
> -		}
> +		if (unlikely(port >= RSWITCH_NUM_PORTS))
> +			goto next;
> +		rdev = priv->rdev[port];
> 
> +		tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
> +		if (unlikely(tag >= TS_TAGS_PER_PORT))
> +			goto next;
> +		ts_skb = xchg(&rdev->ts_skb[tag], NULL);
> +		smp_mb(); /* order rdev->ts_skb[] read before bitmap update */
> +		clear_bit(tag, rdev->ts_skb_used);
> +
> +		if (unlikely(!ts_skb))
> +			goto next;
> +
> +		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
> +		ts.tv_sec = __le32_to_cpu(desc->ts_sec);
> +		ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
> +		shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
> +		skb_tstamp_tx(ts_skb, &shhwtstamps);
> +		dev_consume_skb_irq(ts_skb);
> +
> +next:
>  		gq->cur = rswitch_next_queue_index(gq, true, 1);
>  		desc = &gq->ts_ring[gq->cur];
>  	}
> @@ -1576,8 +1581,9 @@ static int rswitch_open(struct net_device *ndev)
>  static int rswitch_stop(struct net_device *ndev)
>  {
>  	struct rswitch_device *rdev = netdev_priv(ndev);
> -	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
> +	struct sk_buff *ts_skb;
>  	unsigned long flags;
> +	unsigned int tag;
> 
>  	netif_tx_stop_all_queues(ndev);
> 
> @@ -1594,12 +1600,13 @@ static int rswitch_stop(struct net_device *ndev)
>  	if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
>  		iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
> 
> -	list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) {
> -		if (ts_info->port != rdev->port)
> -			continue;
> -		dev_kfree_skb_irq(ts_info->skb);
> -		list_del(&ts_info->list);
> -		kfree(ts_info);
> +	for (tag = find_first_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
> +	     tag < TS_TAGS_PER_PORT;
> +	     tag = find_next_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT, tag + 1)) {
> +		ts_skb = xchg(&rdev->ts_skb[tag], NULL);
> +		clear_bit(tag, rdev->ts_skb_used);
> +		if (ts_skb)
> +			dev_kfree_skb(ts_skb);
>  	}
> 
>  	return 0;
> @@ -1612,20 +1619,17 @@ static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev,
>  	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
>  				  INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
>  	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
> -		struct rswitch_gwca_ts_info *ts_info;
> +		unsigned int tag;
> 
> -		ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC);
> -		if (!ts_info)
> +		tag = find_first_zero_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
> +		if (tag == TS_TAGS_PER_PORT)
>  			return false;
> +		smp_mb(); /* order bitmap read before rdev->ts_skb[] write */
> +		rdev->ts_skb[tag] = skb_get(skb);
> +		set_bit(tag, rdev->ts_skb_used);
> 
>  		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> -		rdev->ts_tag++;
> -		desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC);
> -
> -		ts_info->skb = skb_get(skb);
> -		ts_info->port = rdev->port;
> -		ts_info->tag = rdev->ts_tag;
> -		list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list);
> +		desc->info1 |= cpu_to_le64(INFO1_TSUN(tag) | INFO1_TXC);
> 
>  		skb_tx_timestamp(skb);
>  	}
> diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
> index e020800dcc57..d8d4ed7d7f8b 100644
> --- a/drivers/net/ethernet/renesas/rswitch.h
> +++ b/drivers/net/ethernet/renesas/rswitch.h
> @@ -972,14 +972,6 @@ struct rswitch_gwca_queue {
>  	};
>  };
> 
> -struct rswitch_gwca_ts_info {
> -	struct sk_buff *skb;
> -	struct list_head list;
> -
> -	int port;
> -	u8 tag;
> -};
> -
>  #define RSWITCH_NUM_IRQ_REGS	(RSWITCH_MAX_NUM_QUEUES / BITS_PER_TYPE(u32))
>  struct rswitch_gwca {
>  	unsigned int index;
> @@ -989,7 +981,6 @@ struct rswitch_gwca {
>  	struct rswitch_gwca_queue *queues;
>  	int num_queues;
>  	struct rswitch_gwca_queue ts_queue;
> -	struct list_head ts_info_list;
>  	DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES);
>  	u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS];
>  	u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS];
> @@ -997,6 +988,7 @@ struct rswitch_gwca {
>  };
> 
>  #define NUM_QUEUES_PER_NDEV	2
> +#define TS_TAGS_PER_PORT	256
>  struct rswitch_device {
>  	struct rswitch_private *priv;
>  	struct net_device *ndev;
> @@ -1004,7 +996,8 @@ struct rswitch_device {
>  	void __iomem *addr;
>  	struct rswitch_gwca_queue *tx_queue;
>  	struct rswitch_gwca_queue *rx_queue;
> -	u8 ts_tag;
> +	struct sk_buff *ts_skb[TS_TAGS_PER_PORT];
> +	DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT);
>  	bool disabled;
> 
>  	int port;
> --
> 2.39.5
diff mbox series

Patch

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index dbbbf024e7ab..9ac6e2aad18f 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -547,7 +547,6 @@  static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv)
 	desc = &gq->ts_ring[gq->ring_size];
 	desc->desc.die_dt = DT_LINKFIX;
 	rswitch_desc_set_dptr(&desc->desc, gq->ring_dma);
-	INIT_LIST_HEAD(&priv->gwca.ts_info_list);
 
 	return 0;
 }
@@ -1003,9 +1002,10 @@  static int rswitch_gwca_request_irqs(struct rswitch_private *priv)
 static void rswitch_ts(struct rswitch_private *priv)
 {
 	struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue;
-	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
 	struct skb_shared_hwtstamps shhwtstamps;
 	struct rswitch_ts_desc *desc;
+	struct rswitch_device *rdev;
+	struct sk_buff *ts_skb;
 	struct timespec64 ts;
 	unsigned int num;
 	u32 tag, port;
@@ -1015,23 +1015,28 @@  static void rswitch_ts(struct rswitch_private *priv)
 		dma_rmb();
 
 		port = TS_DESC_DPN(__le32_to_cpu(desc->desc.dptrl));
-		tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
-
-		list_for_each_entry_safe(ts_info, ts_info2, &priv->gwca.ts_info_list, list) {
-			if (!(ts_info->port == port && ts_info->tag == tag))
-				continue;
-
-			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-			ts.tv_sec = __le32_to_cpu(desc->ts_sec);
-			ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
-			shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
-			skb_tstamp_tx(ts_info->skb, &shhwtstamps);
-			dev_consume_skb_irq(ts_info->skb);
-			list_del(&ts_info->list);
-			kfree(ts_info);
-			break;
-		}
+		if (unlikely(port >= RSWITCH_NUM_PORTS))
+			goto next;
+		rdev = priv->rdev[port];
 
+		tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl));
+		if (unlikely(tag >= TS_TAGS_PER_PORT))
+			goto next;
+		ts_skb = xchg(&rdev->ts_skb[tag], NULL);
+		smp_mb(); /* order rdev->ts_skb[] read before bitmap update */
+		clear_bit(tag, rdev->ts_skb_used);
+
+		if (unlikely(!ts_skb))
+			goto next;
+
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		ts.tv_sec = __le32_to_cpu(desc->ts_sec);
+		ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff));
+		shhwtstamps.hwtstamp = timespec64_to_ktime(ts);
+		skb_tstamp_tx(ts_skb, &shhwtstamps);
+		dev_consume_skb_irq(ts_skb);
+
+next:
 		gq->cur = rswitch_next_queue_index(gq, true, 1);
 		desc = &gq->ts_ring[gq->cur];
 	}
@@ -1576,8 +1581,9 @@  static int rswitch_open(struct net_device *ndev)
 static int rswitch_stop(struct net_device *ndev)
 {
 	struct rswitch_device *rdev = netdev_priv(ndev);
-	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
+	struct sk_buff *ts_skb;
 	unsigned long flags;
+	unsigned int tag;
 
 	netif_tx_stop_all_queues(ndev);
 
@@ -1594,12 +1600,13 @@  static int rswitch_stop(struct net_device *ndev)
 	if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
 		iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
 
-	list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) {
-		if (ts_info->port != rdev->port)
-			continue;
-		dev_kfree_skb_irq(ts_info->skb);
-		list_del(&ts_info->list);
-		kfree(ts_info);
+	for (tag = find_first_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
+	     tag < TS_TAGS_PER_PORT;
+	     tag = find_next_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT, tag + 1)) {
+		ts_skb = xchg(&rdev->ts_skb[tag], NULL);
+		clear_bit(tag, rdev->ts_skb_used);
+		if (ts_skb)
+			dev_kfree_skb(ts_skb);
 	}
 
 	return 0;
@@ -1612,20 +1619,17 @@  static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev,
 	desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) |
 				  INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT);
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
-		struct rswitch_gwca_ts_info *ts_info;
+		unsigned int tag;
 
-		ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC);
-		if (!ts_info)
+		tag = find_first_zero_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT);
+		if (tag == TS_TAGS_PER_PORT)
 			return false;
+		smp_mb(); /* order bitmap read before rdev->ts_skb[] write */
+		rdev->ts_skb[tag] = skb_get(skb);
+		set_bit(tag, rdev->ts_skb_used);
 
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		rdev->ts_tag++;
-		desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC);
-
-		ts_info->skb = skb_get(skb);
-		ts_info->port = rdev->port;
-		ts_info->tag = rdev->ts_tag;
-		list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list);
+		desc->info1 |= cpu_to_le64(INFO1_TSUN(tag) | INFO1_TXC);
 
 		skb_tx_timestamp(skb);
 	}
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index e020800dcc57..d8d4ed7d7f8b 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -972,14 +972,6 @@  struct rswitch_gwca_queue {
 	};
 };
 
-struct rswitch_gwca_ts_info {
-	struct sk_buff *skb;
-	struct list_head list;
-
-	int port;
-	u8 tag;
-};
-
 #define RSWITCH_NUM_IRQ_REGS	(RSWITCH_MAX_NUM_QUEUES / BITS_PER_TYPE(u32))
 struct rswitch_gwca {
 	unsigned int index;
@@ -989,7 +981,6 @@  struct rswitch_gwca {
 	struct rswitch_gwca_queue *queues;
 	int num_queues;
 	struct rswitch_gwca_queue ts_queue;
-	struct list_head ts_info_list;
 	DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES);
 	u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS];
 	u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS];
@@ -997,6 +988,7 @@  struct rswitch_gwca {
 };
 
 #define NUM_QUEUES_PER_NDEV	2
+#define TS_TAGS_PER_PORT	256
 struct rswitch_device {
 	struct rswitch_private *priv;
 	struct net_device *ndev;
@@ -1004,7 +996,8 @@  struct rswitch_device {
 	void __iomem *addr;
 	struct rswitch_gwca_queue *tx_queue;
 	struct rswitch_gwca_queue *rx_queue;
-	u8 ts_tag;
+	struct sk_buff *ts_skb[TS_TAGS_PER_PORT];
+	DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT);
 	bool disabled;
 
 	int port;