Message ID | 20230925020528.777578-2-yury.norov@gmail.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | sched: drop for_each_numa_hop_mask() | expand |
On 9/24/2023 7:05 PM, Yury Norov wrote: > The function duplicates existing cpumask_local_spread(), and it's O(N), > while cpumask_local_spread() implementation is based on bsearch, and > thus is O(log n), so drop mlx5_cpumask_default_spread() and use generic > cpumask_local_spread(). > > Signed-off-by: Yury Norov <yury.norov@gmail.com> > Signed-off-by: Yury Norov <ynorov@nvidia.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/eq.c | 28 ++------------------ > 1 file changed, 2 insertions(+), 26 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > index ea0405e0a43f..bd9f857cc52d 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > @@ -828,30 +828,6 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) > mlx5_irq_release_vector(irq); > } > Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> > -static int mlx5_cpumask_default_spread(int numa_node, int index) > -{ > - const struct cpumask *prev = cpu_none_mask; > - const struct cpumask *mask; > - int found_cpu = 0; > - int i = 0; > - int cpu; > - > - rcu_read_lock(); > - for_each_numa_hop_mask(mask, numa_node) { > - for_each_cpu_andnot(cpu, mask, prev) { > - if (i++ == index) { > - found_cpu = cpu; > - goto spread_done; > - } > - } > - prev = mask; > - } > - > -spread_done: > - rcu_read_unlock(); > - return found_cpu; > -} > - > static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) > { > #ifdef CONFIG_RFS_ACCEL > @@ -873,7 +849,7 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) > int cpu; > > rmap = mlx5_eq_table_get_pci_rmap(dev); > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); > + cpu = cpumask_local_spread(vecidx, dev->priv.numa_node); > irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); > if (IS_ERR(irq)) > return PTR_ERR(irq); > @@ -1125,7 +1101,7 @@ int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) > if (mask) > cpu = cpumask_first(mask); > else > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); > + cpu = cpumask_local_spread(vector, dev->priv.numa_node); > > return cpu; > }
On Sun, 2023-09-24 at 19:05 -0700, Yury Norov wrote: > The function duplicates existing cpumask_local_spread(), and it's O(N), > while cpumask_local_spread() implementation is based on bsearch, and > thus is O(log n), so drop mlx5_cpumask_default_spread() and use generic > cpumask_local_spread(). > > Signed-off-by: Yury Norov <yury.norov@gmail.com> > Signed-off-by: Yury Norov <ynorov@nvidia.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/eq.c | 28 ++------------------ > 1 file changed, 2 insertions(+), 26 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > index ea0405e0a43f..bd9f857cc52d 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > @@ -828,30 +828,6 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) > mlx5_irq_release_vector(irq); > } > > -static int mlx5_cpumask_default_spread(int numa_node, int index) > -{ > - const struct cpumask *prev = cpu_none_mask; > - const struct cpumask *mask; > - int found_cpu = 0; > - int i = 0; > - int cpu; > - > - rcu_read_lock(); > - for_each_numa_hop_mask(mask, numa_node) { > - for_each_cpu_andnot(cpu, mask, prev) { > - if (i++ == index) { > - found_cpu = cpu; > - goto spread_done; > - } > - } > - prev = mask; > - } > - > -spread_done: > - rcu_read_unlock(); > - return found_cpu; > -} > - > static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) > { > #ifdef CONFIG_RFS_ACCEL > @@ -873,7 +849,7 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) > int cpu; > > rmap = mlx5_eq_table_get_pci_rmap(dev); > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); > + cpu = cpumask_local_spread(vecidx, dev->priv.numa_node); > irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); > if (IS_ERR(irq)) > return PTR_ERR(irq); > @@ -1125,7 +1101,7 @@ int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) > if (mask) > cpu = cpumask_first(mask); > else > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); > + cpu = cpumask_local_spread(vector, dev->priv.numa_node); > > return cpu; > } It looks like this series is going to cause some later conflicts regardless of the target tree. I think the whole series could go via the net-next tree, am I missing any relevant point? Thanks! Paolo
On Tue, Oct 03, 2023 at 12:04:01PM +0200, Paolo Abeni wrote: > On Sun, 2023-09-24 at 19:05 -0700, Yury Norov wrote: > > The function duplicates existing cpumask_local_spread(), and it's O(N), > > while cpumask_local_spread() implementation is based on bsearch, and > > thus is O(log n), so drop mlx5_cpumask_default_spread() and use generic > > cpumask_local_spread(). > > > > Signed-off-by: Yury Norov <yury.norov@gmail.com> > > Signed-off-by: Yury Norov <ynorov@nvidia.com> > > --- > > drivers/net/ethernet/mellanox/mlx5/core/eq.c | 28 ++------------------ > > 1 file changed, 2 insertions(+), 26 deletions(-) > > > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > > index ea0405e0a43f..bd9f857cc52d 100644 > > --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c > > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c > > @@ -828,30 +828,6 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) > > mlx5_irq_release_vector(irq); > > } > > > > -static int mlx5_cpumask_default_spread(int numa_node, int index) > > -{ > > - const struct cpumask *prev = cpu_none_mask; > > - const struct cpumask *mask; > > - int found_cpu = 0; > > - int i = 0; > > - int cpu; > > - > > - rcu_read_lock(); > > - for_each_numa_hop_mask(mask, numa_node) { > > - for_each_cpu_andnot(cpu, mask, prev) { > > - if (i++ == index) { > > - found_cpu = cpu; > > - goto spread_done; > > - } > > - } > > - prev = mask; > > - } > > - > > -spread_done: > > - rcu_read_unlock(); > > - return found_cpu; > > -} > > - > > static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) > > { > > #ifdef CONFIG_RFS_ACCEL > > @@ -873,7 +849,7 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) > > int cpu; > > > > rmap = mlx5_eq_table_get_pci_rmap(dev); > > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); > > + cpu = cpumask_local_spread(vecidx, dev->priv.numa_node); > > irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); > > if (IS_ERR(irq)) > > return PTR_ERR(irq); > > @@ -1125,7 +1101,7 @@ int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) > > if (mask) > > cpu = cpumask_first(mask); > > else > > - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); > > + cpu = cpumask_local_spread(vector, dev->priv.numa_node); > > > > return cpu; > > } > > It looks like this series is going to cause some later conflicts > regardless of the target tree. I think the whole series could go via > the net-next tree, am I missing any relevant point? Hi Paolo, Can you elaborate on the conflicts you see? For me it applies cleanly on current master, and with some 3-way merging on latest -next... Thanks, Yury
On Tue, 3 Oct 2023 06:46:17 -0700 Yury Norov wrote: > Can you elaborate on the conflicts you see? For me it applies cleanly > on current master, and with some 3-way merging on latest -next... We're half way thru the release cycle the conflicts can still come in. There's no dependency for the first patch. The most normal way to handle this would be to send patch 1 to the networking tree and send the rest in the subsequent merge window.
On Tue, Oct 03, 2023 at 03:20:30PM -0700, Jakub Kicinski wrote: > On Tue, 3 Oct 2023 06:46:17 -0700 Yury Norov wrote: > > Can you elaborate on the conflicts you see? For me it applies cleanly > > on current master, and with some 3-way merging on latest -next... > > We're half way thru the release cycle the conflicts can still come in. > > There's no dependency for the first patch. The most normal way to > handle this would be to send patch 1 to the networking tree and send > the rest in the subsequent merge window. Ah, I understand now. I didn't plan to move it in current merge window. In fact, I'll be more comfortable to keep it in -next for longer and merge it in v6.7. But it's up to you. If you think it's better, I can resend 1st patch separately. Thanks, Yury
On Tue, 3 Oct 2023 18:31:28 -0700 Yury Norov wrote: > > We're half way thru the release cycle the conflicts can still come in. > > > > There's no dependency for the first patch. The most normal way to > > handle this would be to send patch 1 to the networking tree and send > > the rest in the subsequent merge window. > > Ah, I understand now. I didn't plan to move it in current merge > window. In fact, I'll be more comfortable to keep it in -next for > longer and merge it in v6.7. > > But it's up to you. If you think it's better, I can resend 1st patch > separately. Let's see if Saeed can help us. Saeed, could you pick up patch 1 from this series for the mlx5 tree?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea0405e0a43f..bd9f857cc52d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -828,30 +828,6 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) mlx5_irq_release_vector(irq); } -static int mlx5_cpumask_default_spread(int numa_node, int index) -{ - const struct cpumask *prev = cpu_none_mask; - const struct cpumask *mask; - int found_cpu = 0; - int i = 0; - int cpu; - - rcu_read_lock(); - for_each_numa_hop_mask(mask, numa_node) { - for_each_cpu_andnot(cpu, mask, prev) { - if (i++ == index) { - found_cpu = cpu; - goto spread_done; - } - } - prev = mask; - } - -spread_done: - rcu_read_unlock(); - return found_cpu; -} - static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) { #ifdef CONFIG_RFS_ACCEL @@ -873,7 +849,7 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) int cpu; rmap = mlx5_eq_table_get_pci_rmap(dev); - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); + cpu = cpumask_local_spread(vecidx, dev->priv.numa_node); irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); if (IS_ERR(irq)) return PTR_ERR(irq); @@ -1125,7 +1101,7 @@ int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) if (mask) cpu = cpumask_first(mask); else - cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); + cpu = cpumask_local_spread(vector, dev->priv.numa_node); return cpu; }