Message ID | 20240328170309.2172584-7-edumazet@google.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: rps: misc changes | expand |
Hi Eric, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/net-move-kick_defer_list_purge-to-net-core-dev-h/20240329-011413 base: net-next/main patch link: https://lore.kernel.org/r/20240328170309.2172584-7-edumazet%40google.com patch subject: [PATCH net-next 6/8] net: rps: change input_queue_tail_incr_save() config: sh-allmodconfig (https://download.01.org/0day-ci/archive/20240329/202403291901.lqImStGD-lkp@intel.com/config) compiler: sh4-linux-gcc (GCC) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/202403291901.lqImStGD-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202403291901.lqImStGD-lkp@intel.com/ All errors (new ones prefixed by >>): net/core/dev.c: In function 'enqueue_to_backlog': >> net/core/dev.c:4819:24: error: implicit declaration of function 'rps_input_queue_tail_incr' [-Werror=implicit-function-declaration] 4819 | tail = rps_input_queue_tail_incr(sd); | ^~~~~~~~~~~~~~~~~~~~~~~~~ >> net/core/dev.c:4823:17: error: implicit declaration of function 'rps_input_queue_tail_save' [-Werror=implicit-function-declaration] 4823 | rps_input_queue_tail_save(qtail, tail); | ^~~~~~~~~~~~~~~~~~~~~~~~~ net/core/dev.c: In function 'flush_backlog': >> net/core/dev.c:5901:25: error: implicit declaration of function 'rps_input_queue_head_incr' [-Werror=implicit-function-declaration] 5901 | rps_input_queue_head_incr(sd); | ^~~~~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +/rps_input_queue_tail_incr +4819 net/core/dev.c 4781 4782 /* 4783 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 4784 * queue (may be a remote CPU queue). 4785 */ 4786 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, 4787 unsigned int *qtail) 4788 { 4789 enum skb_drop_reason reason; 4790 struct softnet_data *sd; 4791 unsigned long flags; 4792 unsigned int qlen; 4793 int max_backlog; 4794 u32 tail; 4795 4796 reason = SKB_DROP_REASON_DEV_READY; 4797 if (!netif_running(skb->dev)) 4798 goto bad_dev; 4799 4800 reason = SKB_DROP_REASON_CPU_BACKLOG; 4801 sd = &per_cpu(softnet_data, cpu); 4802 4803 qlen = skb_queue_len_lockless(&sd->input_pkt_queue); 4804 max_backlog = READ_ONCE(net_hotdata.max_backlog); 4805 if (unlikely(qlen > max_backlog)) 4806 goto cpu_backlog_drop; 4807 backlog_lock_irq_save(sd, &flags); 4808 qlen = skb_queue_len(&sd->input_pkt_queue); 4809 if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) { 4810 if (!qlen) { 4811 /* Schedule NAPI for backlog device. We can use 4812 * non atomic operation as we own the queue lock. 4813 */ 4814 if (!__test_and_set_bit(NAPI_STATE_SCHED, 4815 &sd->backlog.state)) 4816 napi_schedule_rps(sd); 4817 } 4818 __skb_queue_tail(&sd->input_pkt_queue, skb); > 4819 tail = rps_input_queue_tail_incr(sd); 4820 backlog_unlock_irq_restore(sd, &flags); 4821 4822 /* save the tail outside of the critical section */ > 4823 rps_input_queue_tail_save(qtail, tail); 4824 return NET_RX_SUCCESS; 4825 } 4826 4827 backlog_unlock_irq_restore(sd, &flags); 4828 4829 cpu_backlog_drop: 4830 atomic_inc(&sd->dropped); 4831 bad_dev: 4832 dev_core_stats_rx_dropped_inc(skb->dev); 4833 kfree_skb_reason(skb, reason); 4834 return NET_RX_DROP; 4835 } 4836
Hi Eric, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/net-move-kick_defer_list_purge-to-net-core-dev-h/20240329-011413 base: net-next/main patch link: https://lore.kernel.org/r/20240328170309.2172584-7-edumazet%40google.com patch subject: [PATCH net-next 6/8] net: rps: change input_queue_tail_incr_save() config: um-x86_64_defconfig (https://download.01.org/0day-ci/archive/20240329/202403292036.FloftiJL-lkp@intel.com/config) compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project.git 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/202403292036.FloftiJL-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202403292036.FloftiJL-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from net/core/dev.c:89: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __raw_readb(PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) ^ In file included from net/core/dev.c:89: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) ^ In file included from net/core/dev.c:89: In file included from include/linux/interrupt.h:11: In file included from include/linux/hardirq.h:11: In file included from arch/um/include/asm/hardirq.h:5: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/um/include/asm/io.h:24: include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writeb(value, PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsb(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsw(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] readsl(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesb(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesw(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] writesl(PCI_IOBASE + addr, buffer, count); ~~~~~~~~~~ ^ >> net/core/dev.c:4819:10: error: call to undeclared function 'rps_input_queue_tail_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] tail = rps_input_queue_tail_incr(sd); ^ >> net/core/dev.c:4823:3: error: call to undeclared function 'rps_input_queue_tail_save'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_tail_save(qtail, tail); ^ >> net/core/dev.c:5901:4: error: call to undeclared function 'rps_input_queue_head_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_head_incr(sd); ^ net/core/dev.c:5910:4: error: call to undeclared function 'rps_input_queue_head_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_head_incr(sd); ^ net/core/dev.c:6038:4: error: call to undeclared function 'rps_input_queue_head_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_head_incr(sd); ^ net/core/dev.c:11452:3: error: call to undeclared function 'rps_input_queue_head_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_head_incr(oldsd); ^ net/core/dev.c:11456:3: error: call to undeclared function 'rps_input_queue_head_incr'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration] rps_input_queue_head_incr(oldsd); ^ 12 warnings and 7 errors generated. vim +/rps_input_queue_tail_incr +4819 net/core/dev.c 4781 4782 /* 4783 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 4784 * queue (may be a remote CPU queue). 4785 */ 4786 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, 4787 unsigned int *qtail) 4788 { 4789 enum skb_drop_reason reason; 4790 struct softnet_data *sd; 4791 unsigned long flags; 4792 unsigned int qlen; 4793 int max_backlog; 4794 u32 tail; 4795 4796 reason = SKB_DROP_REASON_DEV_READY; 4797 if (!netif_running(skb->dev)) 4798 goto bad_dev; 4799 4800 reason = SKB_DROP_REASON_CPU_BACKLOG; 4801 sd = &per_cpu(softnet_data, cpu); 4802 4803 qlen = skb_queue_len_lockless(&sd->input_pkt_queue); 4804 max_backlog = READ_ONCE(net_hotdata.max_backlog); 4805 if (unlikely(qlen > max_backlog)) 4806 goto cpu_backlog_drop; 4807 backlog_lock_irq_save(sd, &flags); 4808 qlen = skb_queue_len(&sd->input_pkt_queue); 4809 if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) { 4810 if (!qlen) { 4811 /* Schedule NAPI for backlog device. We can use 4812 * non atomic operation as we own the queue lock. 4813 */ 4814 if (!__test_and_set_bit(NAPI_STATE_SCHED, 4815 &sd->backlog.state)) 4816 napi_schedule_rps(sd); 4817 } 4818 __skb_queue_tail(&sd->input_pkt_queue, skb); > 4819 tail = rps_input_queue_tail_incr(sd); 4820 backlog_unlock_irq_restore(sd, &flags); 4821 4822 /* save the tail outside of the critical section */ > 4823 rps_input_queue_tail_save(qtail, tail); 4824 return NET_RX_SUCCESS; 4825 } 4826 4827 backlog_unlock_irq_restore(sd, &flags); 4828 4829 cpu_backlog_drop: 4830 atomic_inc(&sd->dropped); 4831 bad_dev: 4832 dev_core_stats_rx_dropped_inc(skb->dev); 4833 kfree_skb_reason(skb, reason); 4834 return NET_RX_DROP; 4835 } 4836
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1c31cd2691d32064613836141fbdeeebc831b21f..14f19cc2616452d7e6afbbaa52f8ad3e61a419e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3249,21 +3249,6 @@ struct softnet_data { call_single_data_t defer_csd; }; -static inline void input_queue_head_incr(struct softnet_data *sd) -{ -#ifdef CONFIG_RPS - sd->input_queue_head++; -#endif -} - -static inline void input_queue_tail_incr_save(struct softnet_data *sd, - unsigned int *qtail) -{ -#ifdef CONFIG_RPS - *qtail = ++sd->input_queue_tail; -#endif -} - DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); static inline int dev_recursion_level(void) diff --git a/include/net/rps.h b/include/net/rps.h index 7660243e905b92651a41292e04caf72c5f12f26e..c13f829b8556fda63e76544c332f2c089f0d6ea4 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -35,6 +35,29 @@ struct rps_dev_flow { }; #define RPS_NO_FILTER 0xffff +static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + return ++sd->input_queue_tail; +#else + return 0; +#endif +} + +static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) +{ +#ifdef CONFIG_RPS + WRITE_ONCE(*dest, tail); +#endif +} + +static inline void rps_input_queue_head_incr(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + sd->input_queue_head++; +#endif +} + /* * The rps_dev_flow_table structure contains a table of flow mappings. */ diff --git a/net/core/dev.c b/net/core/dev.c index 4e52745f23412bac6d3ff1b9f4d9f2ce4a2eb666..1fe7c6b10793d45a03461ee581d240d2442f9e17 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4601,7 +4601,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { + READ_ONCE(rflow->last_qtail))) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } @@ -4656,7 +4656,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, cpu = READ_ONCE(rflow->cpu); if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - - rflow->last_qtail) < + READ_ONCE(rflow->last_qtail)) < (int)(10 * flow_table->mask))) expire = false; } @@ -4791,6 +4791,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned long flags; unsigned int qlen; int max_backlog; + u32 tail; reason = SKB_DROP_REASON_DEV_READY; if (!netif_running(skb->dev)) @@ -4815,8 +4816,11 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, napi_schedule_rps(sd); } __skb_queue_tail(&sd->input_pkt_queue, skb); - input_queue_tail_incr_save(sd, qtail); + tail = rps_input_queue_tail_incr(sd); backlog_unlock_irq_restore(sd, &flags); + + /* save the tail outside of the critical section */ + rps_input_queue_tail_save(qtail, tail); return NET_RX_SUCCESS; } @@ -5894,7 +5898,7 @@ static void flush_backlog(struct work_struct *work) if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); dev_kfree_skb_irq(skb); - input_queue_head_incr(sd); + rps_input_queue_head_incr(sd); } } backlog_unlock_irq_enable(sd); @@ -5903,7 +5907,7 @@ static void flush_backlog(struct work_struct *work) if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); - input_queue_head_incr(sd); + rps_input_queue_head_incr(sd); } } local_bh_enable(); @@ -6031,7 +6035,7 @@ static int process_backlog(struct napi_struct *napi, int quota) rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); - input_queue_head_incr(sd); + rps_input_queue_head_incr(sd); if (++work >= quota) return work; @@ -11445,11 +11449,11 @@ static int dev_cpu_dead(unsigned int oldcpu) /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + rps_input_queue_head_incr(oldsd); } while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + rps_input_queue_head_incr(oldsd); } return 0;
input_queue_tail_incr_save() is incrementing the sd queue_tail and save it in the flow last_qtail. Two issues here : - no lock protects the write on last_qtail, we should use appropriate annotations. - We can perform this write after releasing the per-cpu backlog lock, to decrease this lock hold duration (move away the cache line miss) Also move input_queue_head_incr() and rps helpers to include/net/rps.h, while adding rps_ prefix to better reflect their role. Signed-off-by: Eric Dumazet <edumazet@google.com> --- include/linux/netdevice.h | 15 --------------- include/net/rps.h | 23 +++++++++++++++++++++++ net/core/dev.c | 20 ++++++++++++-------- 3 files changed, 35 insertions(+), 23 deletions(-)