diff mbox series

[net-next,v2,1/6] net: dcb: add new pcp selector to app object

Message ID 20220929185207.2183473-2-daniel.machon@microchip.com (mailing list archive)
State New, archived
Headers show
Series Add new PCP and APPTRUST attributes to dcbnl | expand

Commit Message

Daniel Machon Sept. 29, 2022, 6:52 p.m. UTC
Add new PCP selector for the 8021Qaz APP managed object.

As the PCP selector is not part of the 8021Qaz standard, a new non-std
extension attribute DCB_ATTR_DCB_APP has been introduced. Also two
helper functions to translate between selector and app attribute type
has been added.

The purpose of adding the PCP selector, is to be able to offload
PCP-based queue classification to the 8021Q Priority Code Point table,
see 6.9.3 of IEEE Std 802.1Q-2018.

PCP and DEI is encoded in the protocol field as 8*dei+pcp, so that a
mapping of PCP 2 and DEI 1 to priority 3 is encoded as {255, 10, 3}.

Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
---
 include/uapi/linux/dcbnl.h |  6 +++++
 net/dcb/dcbnl.c            | 49 ++++++++++++++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 4 deletions(-)

--
2.34.1

Comments

Petr Machata Sept. 30, 2022, 12:20 p.m. UTC | #1
Daniel Machon <daniel.machon@microchip.com> writes:

> Add new PCP selector for the 8021Qaz APP managed object.
>
> As the PCP selector is not part of the 8021Qaz standard, a new non-std
> extension attribute DCB_ATTR_DCB_APP has been introduced. Also two
> helper functions to translate between selector and app attribute type
> has been added.
>
> The purpose of adding the PCP selector, is to be able to offload
> PCP-based queue classification to the 8021Q Priority Code Point table,
> see 6.9.3 of IEEE Std 802.1Q-2018.

Just a note: the "dcb app" block deals with packet prioritization.
Classification is handled through "dcb ets prio-tc", or offloaded egress
qdiscs or whatever, regardless of how the priority was derived.

> PCP and DEI is encoded in the protocol field as 8*dei+pcp, so that a
> mapping of PCP 2 and DEI 1 to priority 3 is encoded as {255, 10, 3}.

It would be good to shout out that the new selector value is 255.
Also it would be good to be explicit about how the same struct dcb_app
is used for both standard and non-standard attributes, because there
currently is no overlap between the two namespaces.

> Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
> ---
>  include/uapi/linux/dcbnl.h |  6 +++++
>  net/dcb/dcbnl.c            | 49 ++++++++++++++++++++++++++++++++++----
>  2 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
> index a791a94013a6..9f68dc501cc1 100644
> --- a/include/uapi/linux/dcbnl.h
> +++ b/include/uapi/linux/dcbnl.h
> @@ -218,6 +218,9 @@ struct cee_pfc {
>  #define IEEE_8021QAZ_APP_SEL_ANY	4
>  #define IEEE_8021QAZ_APP_SEL_DSCP       5
>
> +/* Non-std selector values */
> +#define DCB_APP_SEL_PCP		24
> +
>  /* This structure contains the IEEE 802.1Qaz APP managed object. This
>   * object is also used for the CEE std as well.
>   *
> @@ -247,6 +250,8 @@ struct dcb_app {
>  	__u16	protocol;
>  };
>
> +#define IEEE_8021QAZ_APP_SEL_MAX 255

This is only necessary for the trust table code, so I would punt this
into the next patch.

> +
>  /**
>   * struct dcb_peer_app_info - APP feature information sent by the peer
>   *
> @@ -425,6 +430,7 @@ enum ieee_attrs {
>  enum ieee_attrs_app {
>  	DCB_ATTR_IEEE_APP_UNSPEC,
>  	DCB_ATTR_IEEE_APP,
> +	DCB_ATTR_DCB_APP,
>  	__DCB_ATTR_IEEE_APP_MAX
>  };
>  #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
> diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
> index dc4fb699b56c..580d26acfc84 100644
> --- a/net/dcb/dcbnl.c
> +++ b/net/dcb/dcbnl.c
> @@ -179,6 +179,46 @@ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
>  static LIST_HEAD(dcb_app_list);
>  static DEFINE_SPINLOCK(dcb_lock);
>
> +static int dcbnl_app_attr_type_get(u8 selector)

The return value can be directly enum ieee_attrs_app;

> +{
> +	enum ieee_attrs_app type;
> +
> +	switch (selector) {
> +	case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
> +	case IEEE_8021QAZ_APP_SEL_STREAM:
> +	case IEEE_8021QAZ_APP_SEL_DGRAM:
> +	case IEEE_8021QAZ_APP_SEL_ANY:
> +	case IEEE_8021QAZ_APP_SEL_DSCP:
> +		type = DCB_ATTR_IEEE_APP;
> +		break;

Just return DCB_ATTR_IEEE_APP? Similarly below.

> +	case DCB_APP_SEL_PCP:
> +		type = DCB_ATTR_DCB_APP;
> +		break;
> +	default:
> +		type = DCB_ATTR_IEEE_APP_UNSPEC;
> +		break;
> +	}
> +
> +	return type;
> +}
> +
> +static int dcbnl_app_attr_type_validate(enum ieee_attrs_app type)
> +{
> +	bool ret;
> +
> +	switch (type) {
> +	case DCB_ATTR_IEEE_APP:
> +	case DCB_ATTR_DCB_APP:
> +		ret = true;
> +		break;
> +	default:
> +		ret = false;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
>  static struct sk_buff *dcbnl_newmsg(int type, u8 cmd, u32 port, u32 seq,
>  				    u32 flags, struct nlmsghdr **nlhp)
>  {
> @@ -1116,8 +1156,9 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
>  	spin_lock_bh(&dcb_lock);
>  	list_for_each_entry(itr, &dcb_app_list, list) {
>  		if (itr->ifindex == netdev->ifindex) {
> -			err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
> -					 &itr->app);
> +			enum ieee_attrs_app type =
> +				dcbnl_app_attr_type_get(itr->app.selector);
> +			err = nla_put(skb, type, sizeof(itr->app), &itr->app);
>  			if (err) {
>  				spin_unlock_bh(&dcb_lock);
>  				return -EMSGSIZE;
> @@ -1495,7 +1536,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
>  		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
>  			struct dcb_app *app_data;
>
> -			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
> +			if (!dcbnl_app_attr_type_validate(nla_type(attr)))

Oh no! It wasn't validating the DCB_ATTR_IEEE_APP_TABLE nest against a
policy! Instead it was just skipping whatever is not DCB_ATTR_IEEE_APP.

So userspace was permitted to shove random crap down here, and it would
just quietly be ignored. We can't start reinterpreting some of that crap
as information. We also can't start bouncing it.

This needs to be done differently.

One API "hole" that I see is that payload with size < struct dcb_app
gets bounced.

We can pack the new stuff into a smaller payload. The inner attribute
would not be DCB_ATTR_DCB_APP, but say DCB_ATTR_DCB_PCP, which would
imply the selector. The payload can be struct { u8 prio; u16 proto; }.
This would have been bounced by the old UAPI, so we know no userspace
makes use of that.

We can treat the output similarly.

>  				continue;
>
>  			if (nla_len(attr) < sizeof(struct dcb_app)) {
> @@ -1556,7 +1597,7 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
>  		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
>  			struct dcb_app *app_data;
>
> -			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
> +			if (!dcbnl_app_attr_type_validate(nla_type(attr)))

Likewise here, unfortunately.

>  				continue;
>  			app_data = nla_data(attr);
>  			if (ops->ieee_delapp)
Petr Machata Sept. 30, 2022, 3:41 p.m. UTC | #2
Petr Machata <petrm@nvidia.com> writes:

> We can pack the new stuff into a smaller payload. The inner attribute
> would not be DCB_ATTR_DCB_APP, but say DCB_ATTR_DCB_PCP, which would
> imply the selector. The payload can be struct { u8 prio; u16 proto; }.
> This would have been bounced by the old UAPI, so we know no userspace
> makes use of that.

Except this will end up having size of 4 anyway due to alignment. We'll
have to make it a struct { ... } __attribute__((__packed__));
Jakub Kicinski Oct. 1, 2022, 12:54 a.m. UTC | #3
On Fri, 30 Sep 2022 14:20:50 +0200 Petr Machata wrote:
> > @@ -1495,7 +1536,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
> >  		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
> >  			struct dcb_app *app_data;
> >
> > -			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
> > +			if (!dcbnl_app_attr_type_validate(nla_type(attr)))  
> 
> Oh no! It wasn't validating the DCB_ATTR_IEEE_APP_TABLE nest against a
> policy! Instead it was just skipping whatever is not DCB_ATTR_IEEE_APP.
> 
> So userspace was permitted to shove random crap down here, and it would
> just quietly be ignored. We can't start reinterpreting some of that crap
> as information. We also can't start bouncing it.

Are you saying that we can't start interpreting new attr types?

"Traditionally" netlink ignored new attr types so from that perspective
starting to interpret new types is pretty "run of the mill" for netlink.
IOW *_deprecated() parsing routines do not use NL_VALIDATE_MAXTYPE.

That does put netlink in a bit of a special category when it comes to
input validation, but really putting in a random but valid attr is much
harder than not initializing a struct member. Is there user space which
does that?

Sorry if I'm misinterpreting the situation.

> This needs to be done differently.
> 
> One API "hole" that I see is that payload with size < struct dcb_app
> gets bounced.
> 
> We can pack the new stuff into a smaller payload. The inner attribute
> would not be DCB_ATTR_DCB_APP, but say DCB_ATTR_DCB_PCP, which would
> imply the selector. The payload can be struct { u8 prio; u16 proto; }.
> This would have been bounced by the old UAPI, so we know no userspace
> makes use of that.
> 
> We can treat the output similarly.
Daniel Machon Oct. 3, 2022, 6:48 a.m. UTC | #4
> > Add new PCP selector for the 8021Qaz APP managed object.
> >
> > As the PCP selector is not part of the 8021Qaz standard, a new non-std
> > extension attribute DCB_ATTR_DCB_APP has been introduced. Also two
> > helper functions to translate between selector and app attribute type
> > has been added.
> >
> > The purpose of adding the PCP selector, is to be able to offload
> > PCP-based queue classification to the 8021Q Priority Code Point table,
> > see 6.9.3 of IEEE Std 802.1Q-2018.
> 
> Just a note: the "dcb app" block deals with packet prioritization.
> Classification is handled through "dcb ets prio-tc", or offloaded egress
> qdiscs or whatever, regardless of how the priority was derived.
> 
> > PCP and DEI is encoded in the protocol field as 8*dei+pcp, so that a
> > mapping of PCP 2 and DEI 1 to priority 3 is encoded as {255, 10, 3}.
> 
> It would be good to shout out that the new selector value is 255.
> Also it would be good to be explicit about how the same struct dcb_app
> is used for both standard and non-standard attributes, because there
> currently is no overlap between the two namespaces.
> 
> > Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
> > ---
> >  include/uapi/linux/dcbnl.h |  6 +++++
> >  net/dcb/dcbnl.c            | 49 ++++++++++++++++++++++++++++++++++----
> >  2 files changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
> > index a791a94013a6..9f68dc501cc1 100644
> > --- a/include/uapi/linux/dcbnl.h
> > +++ b/include/uapi/linux/dcbnl.h
> > @@ -218,6 +218,9 @@ struct cee_pfc {
> >  #define IEEE_8021QAZ_APP_SEL_ANY     4
> >  #define IEEE_8021QAZ_APP_SEL_DSCP       5
> >
> > +/* Non-std selector values */
> > +#define DCB_APP_SEL_PCP              24
> > +
> >  /* This structure contains the IEEE 802.1Qaz APP managed object. This
> >   * object is also used for the CEE std as well.
> >   *
> > @@ -247,6 +250,8 @@ struct dcb_app {
> >       __u16   protocol;
> >  };
> >
> > +#define IEEE_8021QAZ_APP_SEL_MAX 255
> 
> This is only necessary for the trust table code, so I would punt this
> into the next patch.

Will be fixed in next v.

> 
> > +
> >  /**
> >   * struct dcb_peer_app_info - APP feature information sent by the peer
> >   *
> > @@ -425,6 +430,7 @@ enum ieee_attrs {
> >  enum ieee_attrs_app {
> >       DCB_ATTR_IEEE_APP_UNSPEC,
> >       DCB_ATTR_IEEE_APP,
> > +     DCB_ATTR_DCB_APP,
> >       __DCB_ATTR_IEEE_APP_MAX
> >  };
> >  #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
> > diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
> > index dc4fb699b56c..580d26acfc84 100644
> > --- a/net/dcb/dcbnl.c
> > +++ b/net/dcb/dcbnl.c
> > @@ -179,6 +179,46 @@ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
> >  static LIST_HEAD(dcb_app_list);
> >  static DEFINE_SPINLOCK(dcb_lock);
> >
> > +static int dcbnl_app_attr_type_get(u8 selector)
> 
> The return value can be directly enum ieee_attrs_app;

Will be fixed in next v.

> 
> > +{
> > +     enum ieee_attrs_app type;
> > +
> > +     switch (selector) {
> > +     case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
> > +     case IEEE_8021QAZ_APP_SEL_STREAM:
> > +     case IEEE_8021QAZ_APP_SEL_DGRAM:
> > +     case IEEE_8021QAZ_APP_SEL_ANY:
> > +     case IEEE_8021QAZ_APP_SEL_DSCP:
> > +             type = DCB_ATTR_IEEE_APP;
> > +             break;
> 
> Just return DCB_ATTR_IEEE_APP? Similarly below.

That's fine.

> 
> > +     case DCB_APP_SEL_PCP:
> > +             type = DCB_ATTR_DCB_APP;
> > +             break;
> > +     default:
> > +             type = DCB_ATTR_IEEE_APP_UNSPEC;
> > +             break;
> > +     }
> > +
> > +     return type;
> > +}
> > +
> > +static int dcbnl_app_attr_type_validate(enum ieee_attrs_app type)
> > +{
> > +     bool ret;
> > +
> > +     switch (type) {
> > +     case DCB_ATTR_IEEE_APP:
> > +     case DCB_ATTR_DCB_APP:
> > +             ret = true;
> > +             break;
> > +     default:
> > +             ret = false;
> > +             break;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> >  static struct sk_buff *dcbnl_newmsg(int type, u8 cmd, u32 port, u32 seq,
> >                                   u32 flags, struct nlmsghdr **nlhp)
> >  {
> > @@ -1116,8 +1156,9 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
> >       spin_lock_bh(&dcb_lock);
> >       list_for_each_entry(itr, &dcb_app_list, list) {
> >               if (itr->ifindex == netdev->ifindex) {
> > -                     err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
> > -                                      &itr->app);
> > +                     enum ieee_attrs_app type =
> > +                             dcbnl_app_attr_type_get(itr->app.selector);
> > +                     err = nla_put(skb, type, sizeof(itr->app), &itr->app);
> >                       if (err) {
> >                               spin_unlock_bh(&dcb_lock);
> >                               return -EMSGSIZE;
> > @@ -1495,7 +1536,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
> >               nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
> >                       struct dcb_app *app_data;
> >
> > -                     if (nla_type(attr) != DCB_ATTR_IEEE_APP)
> > +                     if (!dcbnl_app_attr_type_validate(nla_type(attr)))
> 
> Oh no! It wasn't validating the DCB_ATTR_IEEE_APP_TABLE nest against a
> policy! Instead it was just skipping whatever is not DCB_ATTR_IEEE_APP.
> 
> So userspace was permitted to shove random crap down here, and it would
> just quietly be ignored. We can't start reinterpreting some of that crap
> as information. We also can't start bouncing it.
> 
> This needs to be done differently.
> 
> One API "hole" that I see is that payload with size < struct dcb_app
> gets bounced.
> 
> We can pack the new stuff into a smaller payload. The inner attribute
> would not be DCB_ATTR_DCB_APP, but say DCB_ATTR_DCB_PCP, which would
> imply the selector. The payload can be struct { u8 prio; u16 proto; }.
> This would have been bounced by the old UAPI, so we know no userspace
> makes use of that.

Right, I see your point. But. First thought; this starts to look a little
hackish.

Looking through the 802.1Q-2018 std again, sel bits 0, 6 and 7 are 
reserved (implicit for future standard implementation?). Do we know of
any cases, where a new standard version would introduce new values beyond
what was reserved in the first place for future use? I dont know myself.

I am just trying to raise a question of whether using the std APP attr
with a new high (255) selector, really could be preferred over this new
non-std APP attr with new packed payload.
Petr Machata Oct. 3, 2022, 7:52 a.m. UTC | #5
Jakub Kicinski <kuba@kernel.org> writes:

> On Fri, 30 Sep 2022 14:20:50 +0200 Petr Machata wrote:
>> > @@ -1495,7 +1536,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
>> >  		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
>> >  			struct dcb_app *app_data;
>> >
>> > -			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
>> > +			if (!dcbnl_app_attr_type_validate(nla_type(attr)))  
>> 
>> Oh no! It wasn't validating the DCB_ATTR_IEEE_APP_TABLE nest against a
>> policy! Instead it was just skipping whatever is not DCB_ATTR_IEEE_APP.
>> 
>> So userspace was permitted to shove random crap down here, and it would
>> just quietly be ignored. We can't start reinterpreting some of that crap
>> as information. We also can't start bouncing it.
>
> Are you saying that we can't start interpreting new attr types?
>
> "Traditionally" netlink ignored new attr types so from that perspective
> starting to interpret new types is pretty "run of the mill" for netlink.
> IOW *_deprecated() parsing routines do not use NL_VALIDATE_MAXTYPE.
>
> That does put netlink in a bit of a special category when it comes to
> input validation, but really putting in a random but valid attr is much
> harder than not initializing a struct member. Is there user space which
> does that?
>
> Sorry if I'm misinterpreting the situation.

I assumed the policy is much more strict with changes like this. If you
think it's OK, I'm fine with it as well.

The userspace (lldpad in particular) is doing the opposite thing BTW:
assuming everything in the nest is a DCB_ATTR_IEEE_APP. When we start
emitting the new attribute, it will get confused.
Petr Machata Oct. 3, 2022, 8:22 a.m. UTC | #6
<Daniel.Machon@microchip.com> writes:

> Right, I see your point. But. First thought; this starts to look a little
> hackish.

So it is. That's what poking backward compatible holes in an existing
API gets you. Look at modern C++ syntax for an extreme example :)

But read Jakub's email. It looks like we don't actually need to worry
about this.

> Looking through the 802.1Q-2018 std again, sel bits 0, 6 and 7 are 
> reserved (implicit for future standard implementation?). Do we know of
> any cases, where a new standard version would introduce new values beyond
> what was reserved in the first place for future use? I dont know myself.
>
> I am just trying to raise a question of whether using the std APP attr
> with a new high (255) selector, really could be preferred over this new
> non-std APP attr with new packed payload.

Yeah. We'll need to patch lldpad anyway. We can basically choose which
way we patch it. And BTW, using the too-short attribute payload of
course breaks it _as well_, because they don't do any payload size
validation.
Daniel Machon Oct. 3, 2022, 9:33 a.m. UTC | #7
Den Mon, Oct 03, 2022 at 10:22:48AM +0200 skrev Petr Machata:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> <Daniel.Machon@microchip.com> writes:
> 
> > Right, I see your point. But. First thought; this starts to look a little
> > hackish.
> 
> So it is. That's what poking backward compatible holes in an existing
> API gets you. Look at modern C++ syntax for an extreme example :)
> 
> But read Jakub's email. It looks like we don't actually need to worry
> about this.
> 
> > Looking through the 802.1Q-2018 std again, sel bits 0, 6 and 7 are
> > reserved (implicit for future standard implementation?). Do we know of
> > any cases, where a new standard version would introduce new values beyond
> > what was reserved in the first place for future use? I dont know myself.
> >
> > I am just trying to raise a question of whether using the std APP attr
> > with a new high (255) selector, really could be preferred over this new
> > non-std APP attr with new packed payload.
> 
> Yeah. We'll need to patch lldpad anyway. We can basically choose which
> way we patch it. And BTW, using the too-short attribute payload of
> course breaks it _as well_, because they don't do any payload size
> validation.

Right, unless we reconstruct std app entry payload from the "too-short"
attribute payload, before adding it the the app list or passing it to the 
driver.

Anyway. Considering Jakub's mail. I think this patch version with a non-std
attribute to do non-std app table contributions separates non-std from std
stuff nicely and is preffered over just adding the new selector. So if we can 
agree on this, I will prepare a new v. with the other changes suggested.

Wrt. lldpad we can then patch it to react on attrs or selectors > 7.
Jakub Kicinski Oct. 3, 2022, 4:25 p.m. UTC | #8
On Mon, 3 Oct 2022 09:52:59 +0200 Petr Machata wrote:
> I assumed the policy is much more strict with changes like this. If you
> think it's OK, I'm fine with it as well.
> 
> The userspace (lldpad in particular) is doing the opposite thing BTW:
> assuming everything in the nest is a DCB_ATTR_IEEE_APP. When we start
> emitting the new attribute, it will get confused.

Can you add an attribute or a flag to the request which would turn
emitting the new attrs on?
Daniel Machon Oct. 3, 2022, 9:59 p.m. UTC | #9
> On Mon, 3 Oct 2022 09:52:59 +0200 Petr Machata wrote:
> > I assumed the policy is much more strict with changes like this. If you
> > think it's OK, I'm fine with it as well.
> >
> > The userspace (lldpad in particular) is doing the opposite thing BTW:
> > assuming everything in the nest is a DCB_ATTR_IEEE_APP. When we start
> > emitting the new attribute, it will get confused.
> 
> Can you add an attribute or a flag to the request which would turn
> emitting the new attrs on?

As for this sparx5 impl. the new attrs or any other app attr, will not be
emitted by lldpad, since APP tx cannot be enabled when set/get_dcbx is not 
supported. IIUIC.

If lldpad was idd able to emit the new pcp app entries, they would be
emitted as invalid TLV's (assuming 255 or 24 selector value), because the
selector would be either zero or seven, which is currently not used for
any selector by the std. We then have time to patch lldpad to do whatever
with the new attr. Wouldn't this be acceptable?

As for iproute2/dcb, the new attr will just get ignored with a warning.
Jakub Kicinski Oct. 3, 2022, 11:34 p.m. UTC | #10
On Mon, 3 Oct 2022 21:59:49 +0000 Daniel.Machon@microchip.com wrote:
> If lldpad was idd able to emit the new pcp app entries, they would be

idd?

> emitted as invalid TLV's (assuming 255 or 24 selector value), because the
> selector would be either zero or seven, which is currently not used for
> any selector by the std. We then have time to patch lldpad to do whatever
> with the new attr. Wouldn't this be acceptable?

I'm not sure I can provide sensible advice given I don't really know
how the information flow looks in case of DCB.

First off - we're talking about netlink TLVs not LLDP / DCB wire message
TLVs?

What I was saying is that if lldpad dumps the information from the
kernel and gets confused by certain TLVs - we can add an opt-in
attribute to whatever Netlink request lldpad uses, and only add the new
attrs if that opt-in attribute is present. Normal GET or DUMP requests
can both take input attributes.

Old lldpad will not send this attribute to the kernel - the kernel will
not respond with confusing attrs. The new lldpad can be patched to send
the attribute and will get all the attrs (if it actually cares).
Petr Machata Oct. 4, 2022, 10:20 a.m. UTC | #11
Jakub Kicinski <kuba@kernel.org> writes:

> On Mon, 3 Oct 2022 09:52:59 +0200 Petr Machata wrote:
>> I assumed the policy is much more strict with changes like this. If you
>> think it's OK, I'm fine with it as well.
>> 
>> The userspace (lldpad in particular) is doing the opposite thing BTW:
>> assuming everything in the nest is a DCB_ATTR_IEEE_APP. When we start
>> emitting the new attribute, it will get confused.
>
> Can you add an attribute or a flag to the request which would turn
> emitting the new attrs on?

The question is whether it's better to do it anyway. My opinion is that
if a userspace decides to make assumptions about the contents of a TLV,
and neglects to validate the actual TLV type, it's on them, and I'm not
obligated to keep them working. We know about this case, but really any
attribute addition at all could potentially trip some userspace if they
expected something else at this offset.
Petr Machata Oct. 4, 2022, 10:52 a.m. UTC | #12
Petr Machata <petrm@nvidia.com> writes:

> Jakub Kicinski <kuba@kernel.org> writes:
>
>> On Mon, 3 Oct 2022 09:52:59 +0200 Petr Machata wrote:
>>> I assumed the policy is much more strict with changes like this. If you
>>> think it's OK, I'm fine with it as well.
>>> 
>>> The userspace (lldpad in particular) is doing the opposite thing BTW:
>>> assuming everything in the nest is a DCB_ATTR_IEEE_APP. When we start
>>> emitting the new attribute, it will get confused.
>>
>> Can you add an attribute or a flag to the request which would turn
>> emitting the new attrs on?
>
> The question is whether it's better to do it anyway. My opinion is that
> if a userspace decides to make assumptions about the contents of a TLV,
> and neglects to validate the actual TLV type, it's on them, and I'm not
> obligated to keep them working. We know about this case, but really any
> attribute addition at all could potentially trip some userspace if they
> expected something else at this offset.

And re the flag: I think struct dcbmsg.dcb_pad was meant to be the place
to keep flags when the need arises, but it is not validated anywhere, so
we cannot use it. It could be a new command, but I'm not a fan. So if we
need to discriminate userspaces, I think it should be a new attribute.
Petr Machata Oct. 4, 2022, 10:56 a.m. UTC | #13
Jakub Kicinski <kuba@kernel.org> writes:

> On Mon, 3 Oct 2022 21:59:49 +0000 Daniel.Machon@microchip.com wrote:
>> If lldpad was idd able to emit the new pcp app entries, they would be
>
> idd?
>
>> emitted as invalid TLV's (assuming 255 or 24 selector value), because the
>> selector would be either zero or seven, which is currently not used for
>> any selector by the std. We then have time to patch lldpad to do whatever
>> with the new attr. Wouldn't this be acceptable?
>
> I'm not sure I can provide sensible advice given I don't really know
> how the information flow looks in case of DCB.
>
> First off - we're talking about netlink TLVs not LLDP / DCB wire message
> TLVs?

DCB wire message in this case.

> What I was saying is that if lldpad dumps the information from the
> kernel and gets confused by certain TLVs - we can add an opt-in
> attribute to whatever Netlink request lldpad uses, and only add the new
> attrs if that opt-in attribute is present. Normal GET or DUMP requests
> can both take input attributes.
>
> Old lldpad will not send this attribute to the kernel - the kernel will
> not respond with confusing attrs. The new lldpad can be patched to send
> the attribute and will get all the attrs (if it actually cares).

Another aspect is that lldpad will never create these entries on its
own, until it gets support for it, at which point these issues would
presumably get fixed as well. The only scenario in which it breaks is
when an admin messes with the APP entries through iproute2, but then
uses lldpad. Which doesn't make sense to me as a use case.
Jakub Kicinski Oct. 4, 2022, 7:51 p.m. UTC | #14
On Tue, 4 Oct 2022 12:52:35 +0200 Petr Machata wrote:
> > The question is whether it's better to do it anyway. My opinion is that
> > if a userspace decides to make assumptions about the contents of a TLV,
> > and neglects to validate the actual TLV type, it's on them, and I'm not
> > obligated to keep them working. We know about this case, but really any
> > attribute addition at all could potentially trip some userspace if they
> > expected something else at this offset.  
> 
> And re the flag: I think struct dcbmsg.dcb_pad was meant to be the place
> to keep flags when the need arises, but it is not validated anywhere, so
> we cannot use it. It could be a new command, but I'm not a fan. So if we
> need to discriminate userspaces, I think it should be a new attribute.

All good points. I'm fine with not gating the attributes if that's your
preference. Your call.
Petr Machata Oct. 5, 2022, 10:09 a.m. UTC | #15
<Daniel.Machon@microchip.com> writes:

> Den Mon, Oct 03, 2022 at 10:22:48AM +0200 skrev Petr Machata:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>> 
>> <Daniel.Machon@microchip.com> writes:
>> 
>> > Right, I see your point. But. First thought; this starts to look a little
>> > hackish.
>> 
>> So it is. That's what poking backward compatible holes in an existing
>> API gets you. Look at modern C++ syntax for an extreme example :)
>> 
>> But read Jakub's email. It looks like we don't actually need to worry
>> about this.
>> 
>> > Looking through the 802.1Q-2018 std again, sel bits 0, 6 and 7 are
>> > reserved (implicit for future standard implementation?). Do we know of
>> > any cases, where a new standard version would introduce new values beyond
>> > what was reserved in the first place for future use? I dont know myself.
>> >
>> > I am just trying to raise a question of whether using the std APP attr
>> > with a new high (255) selector, really could be preferred over this new
>> > non-std APP attr with new packed payload.
>> 
>> Yeah. We'll need to patch lldpad anyway. We can basically choose which
>> way we patch it. And BTW, using the too-short attribute payload of
>> course breaks it _as well_, because they don't do any payload size
>> validation.
>
> Right, unless we reconstruct std app entry payload from the "too-short"
> attribute payload, before adding it the the app list or passing it to the 
> driver.

The issue is not in drivers, but in lldpad itself. They just iterate the
attribute stream, assume that everything is an APP, and treat is as
such, not even validating the length (I mean, why would they, it's
supposed to be an APP after all...).

So we trip lldpad however we set the API up.

So let's trip it in a way that makes for a reasonable API.

> Anyway. Considering Jakub's mail. I think this patch version with a non-std
> attribute to do non-std app table contributions separates non-std from std
> stuff nicely and is preffered over just adding the new selector. So if we can 
> agree on this, I will prepare a new v. with the other changes suggested.
>
> Wrt. lldpad we can then patch it to react on attrs or selectors > 7.

Yep, agreed.
diff mbox series

Patch

diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index a791a94013a6..9f68dc501cc1 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -218,6 +218,9 @@  struct cee_pfc {
 #define IEEE_8021QAZ_APP_SEL_ANY	4
 #define IEEE_8021QAZ_APP_SEL_DSCP       5

+/* Non-std selector values */
+#define DCB_APP_SEL_PCP		24
+
 /* This structure contains the IEEE 802.1Qaz APP managed object. This
  * object is also used for the CEE std as well.
  *
@@ -247,6 +250,8 @@  struct dcb_app {
 	__u16	protocol;
 };

+#define IEEE_8021QAZ_APP_SEL_MAX 255
+
 /**
  * struct dcb_peer_app_info - APP feature information sent by the peer
  *
@@ -425,6 +430,7 @@  enum ieee_attrs {
 enum ieee_attrs_app {
 	DCB_ATTR_IEEE_APP_UNSPEC,
 	DCB_ATTR_IEEE_APP,
+	DCB_ATTR_DCB_APP,
 	__DCB_ATTR_IEEE_APP_MAX
 };
 #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index dc4fb699b56c..580d26acfc84 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -179,6 +179,46 @@  static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
 static LIST_HEAD(dcb_app_list);
 static DEFINE_SPINLOCK(dcb_lock);

+static int dcbnl_app_attr_type_get(u8 selector)
+{
+	enum ieee_attrs_app type;
+
+	switch (selector) {
+	case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+	case IEEE_8021QAZ_APP_SEL_STREAM:
+	case IEEE_8021QAZ_APP_SEL_DGRAM:
+	case IEEE_8021QAZ_APP_SEL_ANY:
+	case IEEE_8021QAZ_APP_SEL_DSCP:
+		type = DCB_ATTR_IEEE_APP;
+		break;
+	case DCB_APP_SEL_PCP:
+		type = DCB_ATTR_DCB_APP;
+		break;
+	default:
+		type = DCB_ATTR_IEEE_APP_UNSPEC;
+		break;
+	}
+
+	return type;
+}
+
+static int dcbnl_app_attr_type_validate(enum ieee_attrs_app type)
+{
+	bool ret;
+
+	switch (type) {
+	case DCB_ATTR_IEEE_APP:
+	case DCB_ATTR_DCB_APP:
+		ret = true;
+		break;
+	default:
+		ret = false;
+		break;
+	}
+
+	return ret;
+}
+
 static struct sk_buff *dcbnl_newmsg(int type, u8 cmd, u32 port, u32 seq,
 				    u32 flags, struct nlmsghdr **nlhp)
 {
@@ -1116,8 +1156,9 @@  static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 	spin_lock_bh(&dcb_lock);
 	list_for_each_entry(itr, &dcb_app_list, list) {
 		if (itr->ifindex == netdev->ifindex) {
-			err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
-					 &itr->app);
+			enum ieee_attrs_app type =
+				dcbnl_app_attr_type_get(itr->app.selector);
+			err = nla_put(skb, type, sizeof(itr->app), &itr->app);
 			if (err) {
 				spin_unlock_bh(&dcb_lock);
 				return -EMSGSIZE;
@@ -1495,7 +1536,7 @@  static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
 			struct dcb_app *app_data;

-			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+			if (!dcbnl_app_attr_type_validate(nla_type(attr)))
 				continue;

 			if (nla_len(attr) < sizeof(struct dcb_app)) {
@@ -1556,7 +1597,7 @@  static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
 		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
 			struct dcb_app *app_data;

-			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+			if (!dcbnl_app_attr_type_validate(nla_type(attr)))
 				continue;
 			app_data = nla_data(attr);
 			if (ops->ieee_delapp)