diff mbox

[v2] net: dsa: drop some VLAs in switch.c

Message ID 1525706596-13601-1-git-send-email-s.mesoraca16@gmail.com (mailing list archive)
State Mainlined
Headers show

Commit Message

Salvatore Mesoraca May 7, 2018, 3:23 p.m. UTC
We avoid 2 VLAs by using a pre-allocated field in dsa_switch.
We also try to avoid dynamic allocation whenever possible.

Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch

Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
---
 include/net/dsa.h |  3 +++
 net/dsa/dsa2.c    | 14 ++++++++++++++
 net/dsa/switch.c  | 22 ++++++++++------------
 3 files changed, 27 insertions(+), 12 deletions(-)

Comments

Florian Fainelli May 7, 2018, 6:14 p.m. UTC | #1
On 05/07/2018 08:23 AM, Salvatore Mesoraca wrote:
> We avoid 2 VLAs by using a pre-allocated field in dsa_switch.
> We also try to avoid dynamic allocation whenever possible.
> 
> Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
> Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
> 
> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
> ---
>  include/net/dsa.h |  3 +++
>  net/dsa/dsa2.c    | 14 ++++++++++++++
>  net/dsa/switch.c  | 22 ++++++++++------------
>  3 files changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 60fb4ec..576791d 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -256,6 +256,9 @@ struct dsa_switch {
>  	/* Number of switch port queues */
>  	unsigned int		num_tx_queues;
>  
> +	unsigned long		*bitmap;
> +	unsigned long		_bitmap;
> +
>  	/* Dynamically allocated ports, keep last */
>  	size_t num_ports;
>  	struct dsa_port ports[];
> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
> index adf50fb..cebf35f0 100644
> --- a/net/dsa/dsa2.c
> +++ b/net/dsa/dsa2.c
> @@ -748,6 +748,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
>  	if (!ds)
>  		return NULL;
>  
> +	/* We avoid allocating memory outside dsa_switch
> +	 * if it is not needed.
> +	 */
> +	if (n <= sizeof(ds->_bitmap) * 8) {
> +		ds->bitmap = &ds->_bitmap;

Should not this be / BITS_PER_BYTE? If the sizeof(unsigned long) is <=
8, then you don't need to allocate it, otherwise, you have to.

I would actually just always dynamically allocate the bitmap, optimizing
for the case where we have fewer than or 8 ports is not worth IMHO.
Salvatore Mesoraca May 7, 2018, 7:02 p.m. UTC | #2
2018-05-07 20:14 GMT+02:00 Florian Fainelli <f.fainelli@gmail.com>:
> On 05/07/2018 08:23 AM, Salvatore Mesoraca wrote:
>> We avoid 2 VLAs by using a pre-allocated field in dsa_switch.
>> We also try to avoid dynamic allocation whenever possible.
>>
>> Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
>> Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
>>
>> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
>> ---
>>  include/net/dsa.h |  3 +++
>>  net/dsa/dsa2.c    | 14 ++++++++++++++
>>  net/dsa/switch.c  | 22 ++++++++++------------
>>  3 files changed, 27 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>> index 60fb4ec..576791d 100644
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -256,6 +256,9 @@ struct dsa_switch {
>>       /* Number of switch port queues */
>>       unsigned int            num_tx_queues;
>>
>> +     unsigned long           *bitmap;
>> +     unsigned long           _bitmap;
>> +
>>       /* Dynamically allocated ports, keep last */
>>       size_t num_ports;
>>       struct dsa_port ports[];
>> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
>> index adf50fb..cebf35f0 100644
>> --- a/net/dsa/dsa2.c
>> +++ b/net/dsa/dsa2.c
>> @@ -748,6 +748,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
>>       if (!ds)
>>               return NULL;
>>
>> +     /* We avoid allocating memory outside dsa_switch
>> +      * if it is not needed.
>> +      */
>> +     if (n <= sizeof(ds->_bitmap) * 8) {
>> +             ds->bitmap = &ds->_bitmap;
>
> Should not this be / BITS_PER_BYTE? If the sizeof(unsigned long) is <=
> 8, then you don't need to allocate it, otherwise, you have to.

No.
We need one 1 bit per port, of course sizeof() returns size in bytes,
hence the multiplication to get the number of bits.
I might multiply per BITS_PER_BYTE instead of 8, but I doubt that
Linux supports implementations where a byte is not an octet.

> I would actually just always dynamically allocate the bitmap, optimizing
> for the case where we have fewer than or 8 ports is not worth IMHO.

This optimization will save us an allocation when number of ports is
less than 32 or 64 (depending on arch).
IMHO it's useful, if you consider that, right now, DSA works only with
12-ports switches.

Thank you for your time,

Salvatore
Andrew Lunn May 7, 2018, 7:26 p.m. UTC | #3
> >> +++ b/include/net/dsa.h
> >> @@ -256,6 +256,9 @@ struct dsa_switch {
> >>       /* Number of switch port queues */
> >>       unsigned int            num_tx_queues;
> >>
> >> +     unsigned long           *bitmap;
> >> +     unsigned long           _bitmap;
> >> +
> >>       /* Dynamically allocated ports, keep last */
> >>       size_t num_ports;
> >>       struct dsa_port ports[];
> >> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
> >> index adf50fb..cebf35f0 100644
> >> --- a/net/dsa/dsa2.c
> >> +++ b/net/dsa/dsa2.c
> >> @@ -748,6 +748,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
> >>       if (!ds)
> >>               return NULL;
> >>
> >> +     /* We avoid allocating memory outside dsa_switch
> >> +      * if it is not needed.
> >> +      */
> >> +     if (n <= sizeof(ds->_bitmap) * 8) {
> >> +             ds->bitmap = &ds->_bitmap;
> >
> > Should not this be / BITS_PER_BYTE? If the sizeof(unsigned long) is <=
> > 8, then you don't need to allocate it, otherwise, you have to.

> This optimization will save us an allocation when number of ports is
> less than 32 or 64 (depending on arch).
> IMHO it's useful, if you consider that, right now, DSA works only with
> 12-ports switches.

Do you have a feeling for the savings? I don't see it being very
large, and given the extra code, it might actually be negative.

    Andrew
David Laight May 8, 2018, 9:39 a.m. UTC | #4
From: Salvatore Mesoraca

> Sent: 07 May 2018 20:03

...
> This optimization will save us an allocation when number of ports is

> less than 32 or 64 (depending on arch).

> IMHO it's useful, if you consider that, right now, DSA works only with

> 12-ports switches.


Why not just error out if the number of ports is greater than the compile-time
limit?

Worry about dynamic allocation if you need a lot more than 64 ports.

	David
Salvatore Mesoraca May 8, 2018, 10:27 a.m. UTC | #5
2018-05-07 21:26 GMT+02:00 Andrew Lunn <andrew@lunn.ch>:
>> >> +++ b/include/net/dsa.h
>> >> @@ -256,6 +256,9 @@ struct dsa_switch {
>> >>       /* Number of switch port queues */
>> >>       unsigned int            num_tx_queues;
>> >>
>> >> +     unsigned long           *bitmap;
>> >> +     unsigned long           _bitmap;
>> >> +
>> >>       /* Dynamically allocated ports, keep last */
>> >>       size_t num_ports;
>> >>       struct dsa_port ports[];
>> >> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
>> >> index adf50fb..cebf35f0 100644
>> >> --- a/net/dsa/dsa2.c
>> >> +++ b/net/dsa/dsa2.c
>> >> @@ -748,6 +748,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
>> >>       if (!ds)
>> >>               return NULL;
>> >>
>> >> +     /* We avoid allocating memory outside dsa_switch
>> >> +      * if it is not needed.
>> >> +      */
>> >> +     if (n <= sizeof(ds->_bitmap) * 8) {
>> >> +             ds->bitmap = &ds->_bitmap;
>> >
>> > Should not this be / BITS_PER_BYTE? If the sizeof(unsigned long) is <=
>> > 8, then you don't need to allocate it, otherwise, you have to.
>
>> This optimization will save us an allocation when number of ports is
>> less than 32 or 64 (depending on arch).
>> IMHO it's useful, if you consider that, right now, DSA works only with
>> 12-ports switches.
>
> Do you have a feeling for the savings? I don't see it being very
> large, and given the extra code, it might actually be negative.

I think that a "compare" and a "jump" costs nothing compared to
devm_kmalloc, its eventual free, and, *maybe*, the cache miss you
get every time you access the bitmask.
This is not necessarily relevant if this code it's invoked rarely,
but, IMHO, it seems strange to always go for dynamic allocation for
something that will be, almost always, as big as a pointer.

Salvatore
Salvatore Mesoraca May 8, 2018, 10:32 a.m. UTC | #6
2018-05-08 11:39 GMT+02:00 David Laight <David.Laight@aculab.com>:
> From: Salvatore Mesoraca
>> Sent: 07 May 2018 20:03
> ...
>> This optimization will save us an allocation when number of ports is
>> less than 32 or 64 (depending on arch).
>> IMHO it's useful, if you consider that, right now, DSA works only with
>> 12-ports switches.
>
> Why not just error out if the number of ports is greater than the compile-time
> limit?
>
> Worry about dynamic allocation if you need a lot more than 64 ports.

v1 has been NAK-ed by maintainers because they don't want limits on how
many ports a switch can have.

Salvatore
Kees Cook June 20, 2018, 4:43 a.m. UTC | #7
On Mon, May 7, 2018 at 8:23 AM, Salvatore Mesoraca
<s.mesoraca16@gmail.com> wrote:
> We avoid 2 VLAs by using a pre-allocated field in dsa_switch.
> We also try to avoid dynamic allocation whenever possible.
>
> Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
> Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
>
> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>

Friendly ping. What's needed to take this into the tree? It looks like
all the issues in v1 were addressed here.

Thanks!

-Kees

> ---
>  include/net/dsa.h |  3 +++
>  net/dsa/dsa2.c    | 14 ++++++++++++++
>  net/dsa/switch.c  | 22 ++++++++++------------
>  3 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 60fb4ec..576791d 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -256,6 +256,9 @@ struct dsa_switch {
>         /* Number of switch port queues */
>         unsigned int            num_tx_queues;
>
> +       unsigned long           *bitmap;
> +       unsigned long           _bitmap;
> +
>         /* Dynamically allocated ports, keep last */
>         size_t num_ports;
>         struct dsa_port ports[];
> diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
> index adf50fb..cebf35f0 100644
> --- a/net/dsa/dsa2.c
> +++ b/net/dsa/dsa2.c
> @@ -748,6 +748,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
>         if (!ds)
>                 return NULL;
>
> +       /* We avoid allocating memory outside dsa_switch
> +        * if it is not needed.
> +        */
> +       if (n <= sizeof(ds->_bitmap) * 8) {
> +               ds->bitmap = &ds->_bitmap;
> +       } else {
> +               ds->bitmap = devm_kzalloc(dev,
> +                                         BITS_TO_LONGS(n) *
> +                                               sizeof(unsigned long),
> +                                         GFP_KERNEL);
> +               if (unlikely(!ds->bitmap))
> +                       return NULL;
> +       }
> +
>         ds->dev = dev;
>         ds->num_ports = n;
>
> diff --git a/net/dsa/switch.c b/net/dsa/switch.c
> index b935117..142b294 100644
> --- a/net/dsa/switch.c
> +++ b/net/dsa/switch.c
> @@ -136,21 +136,20 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
>  {
>         const struct switchdev_obj_port_mdb *mdb = info->mdb;
>         struct switchdev_trans *trans = info->trans;
> -       DECLARE_BITMAP(group, ds->num_ports);
>         int port;
>
>         /* Build a mask of Multicast group members */
> -       bitmap_zero(group, ds->num_ports);
> +       bitmap_zero(ds->bitmap, ds->num_ports);
>         if (ds->index == info->sw_index)
> -               set_bit(info->port, group);
> +               set_bit(info->port, ds->bitmap);
>         for (port = 0; port < ds->num_ports; port++)
>                 if (dsa_is_dsa_port(ds, port))
> -                       set_bit(port, group);
> +                       set_bit(port, ds->bitmap);
>
>         if (switchdev_trans_ph_prepare(trans))
> -               return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
> +               return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
>
> -       dsa_switch_mdb_add_bitmap(ds, mdb, group);
> +       dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
>
>         return 0;
>  }
> @@ -204,21 +203,20 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
>  {
>         const struct switchdev_obj_port_vlan *vlan = info->vlan;
>         struct switchdev_trans *trans = info->trans;
> -       DECLARE_BITMAP(members, ds->num_ports);
>         int port;
>
>         /* Build a mask of VLAN members */
> -       bitmap_zero(members, ds->num_ports);
> +       bitmap_zero(ds->bitmap, ds->num_ports);
>         if (ds->index == info->sw_index)
> -               set_bit(info->port, members);
> +               set_bit(info->port, ds->bitmap);
>         for (port = 0; port < ds->num_ports; port++)
>                 if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
> -                       set_bit(port, members);
> +                       set_bit(port, ds->bitmap);
>
>         if (switchdev_trans_ph_prepare(trans))
> -               return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
> +               return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
>
> -       dsa_switch_vlan_add_bitmap(ds, vlan, members);
> +       dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
>
>         return 0;
>  }
> --
> 1.9.1
>
diff mbox

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 60fb4ec..576791d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -256,6 +256,9 @@  struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
+	unsigned long		*bitmap;
+	unsigned long		_bitmap;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index adf50fb..cebf35f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -748,6 +748,20 @@  struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 	if (!ds)
 		return NULL;
 
+	/* We avoid allocating memory outside dsa_switch
+	 * if it is not needed.
+	 */
+	if (n <= sizeof(ds->_bitmap) * 8) {
+		ds->bitmap = &ds->_bitmap;
+	} else {
+		ds->bitmap = devm_kzalloc(dev,
+					  BITS_TO_LONGS(n) *
+						sizeof(unsigned long),
+					  GFP_KERNEL);
+		if (unlikely(!ds->bitmap))
+			return NULL;
+	}
+
 	ds->dev = dev;
 	ds->num_ports = n;
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index b935117..142b294 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -136,21 +136,20 @@  static int dsa_switch_mdb_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_mdb *mdb = info->mdb;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(group, ds->num_ports);
 	int port;
 
 	/* Build a mask of Multicast group members */
-	bitmap_zero(group, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, group);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, group);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
+		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
 
-	dsa_switch_mdb_add_bitmap(ds, mdb, group);
+	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
 
 	return 0;
 }
@@ -204,21 +203,20 @@  static int dsa_switch_vlan_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_vlan *vlan = info->vlan;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(members, ds->num_ports);
 	int port;
 
 	/* Build a mask of VLAN members */
-	bitmap_zero(members, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, members);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, members);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
+		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
 
-	dsa_switch_vlan_add_bitmap(ds, vlan, members);
+	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
 
 	return 0;
 }