diff mbox

[1/5] netlink: extended ACK reporting

Message ID 20170408174900.12820-2-johannes@sipsolutions.net (mailing list archive)
State Superseded
Delegated to: Johannes Berg
Headers show

Commit Message

Johannes Berg April 8, 2017, 5:48 p.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

Add the base infrastructure and UAPI for netlink
extended ACK reporting. All "manual" calls to
netlink_ack() pass NULL for now and thus don't
get extended ACK reporting.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 crypto/crypto_user.c              |  3 +-
 drivers/infiniband/core/netlink.c |  5 +--
 drivers/scsi/scsi_netlink.c       |  2 +-
 include/linux/netlink.h           | 32 ++++++++++++++++-
 include/net/netlink.h             |  3 +-
 include/uapi/linux/netlink.h      | 24 +++++++++++++
 kernel/audit.c                    |  2 +-
 net/core/rtnetlink.c              |  3 +-
 net/core/sock_diag.c              |  3 +-
 net/hsr/hsr_netlink.c             |  4 +--
 net/netfilter/ipset/ip_set_core.c |  2 +-
 net/netfilter/nfnetlink.c         | 22 ++++++------
 net/netlink/af_netlink.c          | 72 ++++++++++++++++++++++++++++++++++-----
 net/netlink/af_netlink.h          |  1 +
 net/netlink/genetlink.c           |  3 +-
 net/xfrm/xfrm_user.c              |  3 +-
 16 files changed, 151 insertions(+), 33 deletions(-)

Comments

David Ahern April 8, 2017, 6:26 p.m. UTC | #1
On 4/8/17 1:48 PM, Johannes Berg wrote:
> @@ -2267,21 +2284,37 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(__netlink_dump_start);
>  
> -void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
> +void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
> +		 const struct netlink_ext_ack *extack)
>  {
>  	struct sk_buff *skb;
>  	struct nlmsghdr *rep;
>  	struct nlmsgerr *errmsg;
>  	size_t payload = sizeof(*errmsg);
> +	size_t acksize = sizeof(*errmsg);
>  	struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
>  
>  	/* Error messages get the original request appened, unless the user
> -	 * requests to cap the error message.
> +	 * requests to cap the error message, and get extra error data if
> +	 * requested.
>  	 */
> -	if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
> -		payload += nlmsg_len(nlh);
> +	if (err) {
> +		if (!(nlk->flags & NETLINK_F_CAP_ACK))
> +			payload += nlmsg_len(nlh);
> +		acksize = payload;
> +		if (nlk->flags & NETLINK_F_EXT_ACK) {
> +			if (extack && extack->_msg)
> +				acksize +=
> +					nla_total_size(strlen(extack->_msg) + 1);
> +			if (extack && extack->bad_attr)
> +				acksize += nla_total_size(sizeof(u32));
> +			if (extack &&
> +			    (extack->missing_attr || extack->bad_attr))
> +				acksize += nla_total_size(sizeof(u16));

If you create a v3, the extack check can by pulled up to the (flags &
NETLINK_F_EXT_ACK) check.


> +		}
> +	}
>  
> -	skb = nlmsg_new(payload, GFP_KERNEL);
> +	skb = nlmsg_new(acksize, GFP_KERNEL);
>  	if (!skb) {
>  		struct sock *sk;
>  
> @@ -2300,14 +2333,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
>  			  NLMSG_ERROR, payload, 0);
>  	errmsg = nlmsg_data(rep);
>  	errmsg->error = err;
> -	memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
> +	memcpy(&errmsg->msg, nlh,
> +	       !(nlk->flags & NETLINK_F_CAP_ACK) ? nlh->nlmsg_len
> +						 : sizeof(*nlh));
> +
> +	if (err && nlk->flags & NETLINK_F_EXT_ACK) {
> +		if (extack && extack->_msg)
> +			WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
> +					       extack->_msg));
> +		if (extack && extack->bad_attr &&
> +		    !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
> +			     (u8 *)extack->bad_attr >= in_skb->data +
> +						       in_skb->len))
> +			WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
> +					    (u8 *)extack->bad_attr -
> +					    in_skb->data));
> +		if (extack && extack->missing_attr)
> +			WARN_ON(nla_put_u16(skb, NLMSGERR_ATTR_ATTR,
> +					    extack->missing_attr));

same here
Jiri Pirko April 8, 2017, 6:34 p.m. UTC | #2
Sat, Apr 08, 2017 at 07:48:56PM CEST, johannes@sipsolutions.net wrote:
>From: Johannes Berg <johannes.berg@intel.com>
>
>Add the base infrastructure and UAPI for netlink
>extended ACK reporting. All "manual" calls to
>netlink_ack() pass NULL for now and thus don't
>get extended ACK reporting.

Why so narrow? :)


>
>Signed-off-by: Johannes Berg <johannes.berg@intel.com>
>---

[...]


>diff --git a/include/linux/netlink.h b/include/linux/netlink.h
>index da14ab61f363..47562e940e9c 100644
>--- a/include/linux/netlink.h
>+++ b/include/linux/netlink.h
>@@ -62,11 +62,41 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
> 	return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
> }
> 
>+/**
>+ * struct netlink_ext_ack - netlink extended ACK report struct
>+ * @_msg: message string to report - don't access directly, use
>+ *	%NL_SET_ERR_MSG
>+ * @bad_attr: attribute with error
>+ * @missing_attr: number of missing attr (or 0)
>+ * @cookie: cookie data to return to userspace (for success)
>+ * @cookie_len: actual cookie data length
>+ */
>+struct netlink_ext_ack {
>+	const char *_msg;
>+	const struct nlattr *bad_attr;
>+	u16 missing_attr;
>+	u8 cookie[NETLINK_MAX_COOKIE_LEN];
>+	u8 cookie_len;
>+};
>+
>+/* Always use this macro, this allows later putting the
>+ * message into a separate section or such for things
>+ * like translation or listing all possible messages.
>+ * Currently string formatting is not supported (due
>+ * to the lack of an output buffer.)

Please use 80 cols.

[...]


>@@ -2267,21 +2284,37 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
> }
> EXPORT_SYMBOL(__netlink_dump_start);
> 
>-void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
>+void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
>+		 const struct netlink_ext_ack *extack)
> {
> 	struct sk_buff *skb;
> 	struct nlmsghdr *rep;
> 	struct nlmsgerr *errmsg;
> 	size_t payload = sizeof(*errmsg);
>+	size_t acksize = sizeof(*errmsg);
> 	struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
> 
> 	/* Error messages get the original request appened, unless the user
>-	 * requests to cap the error message.
>+	 * requests to cap the error message, and get extra error data if
>+	 * requested.
> 	 */
>-	if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
>-		payload += nlmsg_len(nlh);
>+	if (err) {
>+		if (!(nlk->flags & NETLINK_F_CAP_ACK))
>+			payload += nlmsg_len(nlh);
>+		acksize = payload;
>+		if (nlk->flags & NETLINK_F_EXT_ACK) {
>+			if (extack && extack->_msg)
>+				acksize +=
>+					nla_total_size(strlen(extack->_msg) + 1);
>+			if (extack && extack->bad_attr)
>+				acksize += nla_total_size(sizeof(u32));
>+			if (extack &&
>+			    (extack->missing_attr || extack->bad_attr))

Attr could be 0, right? I know that on the most of the places 0 is
UNSPEC, but I'm pretty sure not everywhere.


>+				acksize += nla_total_size(sizeof(u16));
>+		}
>+	}
> 
>-	skb = nlmsg_new(payload, GFP_KERNEL);
>+	skb = nlmsg_new(acksize, GFP_KERNEL);
> 	if (!skb) {
> 		struct sock *sk;
> 

[...]

Look very good. Thanks for taking care of this!
David Ahern April 8, 2017, 6:36 p.m. UTC | #3
On 4/8/17 1:48 PM, Johannes Berg wrote:
> diff --git a/include/linux/netlink.h b/include/linux/netlink.h
> index da14ab61f363..47562e940e9c 100644
> --- a/include/linux/netlink.h
> +++ b/include/linux/netlink.h
> @@ -62,11 +62,41 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
>  	return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
>  }
>  
> +/**
> + * struct netlink_ext_ack - netlink extended ACK report struct
> + * @_msg: message string to report - don't access directly, use
> + *	%NL_SET_ERR_MSG
> + * @bad_attr: attribute with error
> + * @missing_attr: number of missing attr (or 0)
> + * @cookie: cookie data to return to userspace (for success)
> + * @cookie_len: actual cookie data length
> + */
> +struct netlink_ext_ack {
> +	const char *_msg;
> +	const struct nlattr *bad_attr;
> +	u16 missing_attr;
> +	u8 cookie[NETLINK_MAX_COOKIE_LEN];
> +	u8 cookie_len;
> +};
> +

I think v3 is in your future ...

/home/dsa/kernel-4.git/include/linux/netlink.h:78:12: error:
‘NETLINK_MAX_COOKIE_LEN’ undeclared here (not in a function)
  u8 cookie[NETLINK_MAX_COOKIE_LEN];
            ^

it's defined in patch 3; needed in patch 1
Johannes Berg April 8, 2017, 6:37 p.m. UTC | #4
On Sat, 2017-04-08 at 20:34 +0200, Jiri Pirko wrote:
> nla_total_size(sizeof(u32));
> > +			if (extack &&
> > +			    (extack->missing_attr || extack-
> > >bad_attr))
> 
> Attr could be 0, right? I know that on the most of the places 0 is
> UNSPEC, but I'm pretty sure not everywhere.

Yeah, I guess we can't show a missing attribute of 0 now - bad_attr is
a pointer so no problem there.

I think I'll leave it like this - if anyone really wants to say
"attribute 0 is missing" then we can add a flag later... The UAPI does
take this into account by not including the attribute at all if the
data is invalid, so 0 in the userspace API can be done

johannes
Johannes Berg April 8, 2017, 6:37 p.m. UTC | #5
On Sat, 2017-04-08 at 14:36 -0400, David Ahern wrote:
> 
> I think v3 is in your future ...
> 
> /home/dsa/kernel-4.git/include/linux/netlink.h:78:12: error:
> ‘NETLINK_MAX_COOKIE_LEN’ undeclared here (not in a function)
>   u8 cookie[NETLINK_MAX_COOKIE_LEN];
>             ^
> 
> it's defined in patch 3; needed in patch 1

Damn. Ok, that'll have to wait until Monday, unless the airport has
useful wifi.

johannes
Jiri Pirko April 8, 2017, 6:40 p.m. UTC | #6
Sat, Apr 08, 2017 at 08:37:01PM CEST, johannes@sipsolutions.net wrote:
>On Sat, 2017-04-08 at 20:34 +0200, Jiri Pirko wrote:
>> nla_total_size(sizeof(u32));
>> > +			if (extack &&
>> > +			    (extack->missing_attr || extack-
>> > >bad_attr))
>> 
>> Attr could be 0, right? I know that on the most of the places 0 is
>> UNSPEC, but I'm pretty sure not everywhere.
>
>Yeah, I guess we can't show a missing attribute of 0 now - bad_attr is
>a pointer so no problem there.
>
>I think I'll leave it like this - if anyone really wants to say
>"attribute 0 is missing" then we can add a flag later... The UAPI does
>take this into account by not including the attribute at all if the
>data is invalid, so 0 in the userspace API can be done

It a known issue, should be fixed right now. We are in no hurry. This
waited +15 years to be done, no harm in couple more days.


Also, could you please attach a patch to iproute2 for example which
would make use of this. I just want to make sure it clicks.

Thanks!
Johannes Berg April 8, 2017, 8:13 p.m. UTC | #7
On Sat, 2017-04-08 at 20:40 +0200, Jiri Pirko wrote:

> > I think I'll leave it like this - if anyone really wants to say
> > "attribute 0 is missing" then we can add a flag later... The UAPI
> > does
> > take this into account by not including the attribute at all if the
> > data is invalid, so 0 in the userspace API can be done
> 
> It a known issue, should be fixed right now. We are in no hurry. This
> waited +15 years to be done, no harm in couple more days.

It's not about any timing or anything - I simply think the likelihood
that this will be needed is zero, because almost no netlink family uses
attribute zero, those that do use it are older ones unlikely to be
updated to start with, and *then* needing to indicate that attribute 0
is missing? Not going to happen.

The extra code needed to handle this is therefore wasted.

> Also, could you please attach a patch to iproute2 for example which
> would make use of this. I just want to make sure it clicks.

No, I'm not going to do that. If you want it, please do it yourself.
I've done the testing on a slightly modified iw, but even there haven't
done any pretty-printing or parsing, just made sure the attributes show
up properly (by dumping them in hex).

johannes
David Ahern April 8, 2017, 8:14 p.m. UTC | #8
On 4/8/17 1:48 PM, Johannes Berg wrote:
> diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
> index f3946a27bd07..d1564557d645 100644
> --- a/include/uapi/linux/netlink.h
> +++ b/include/uapi/linux/netlink.h
> @@ -101,6 +101,29 @@ struct nlmsghdr {
>  struct nlmsgerr {
>  	int		error;
>  	struct nlmsghdr msg;
> +	/*
> +	 * followed by the message contents unless NETLINK_CAP_ACK was set,
> +	 * message length is aligned with NLMSG_ALIGN()
> +	 */
> +	/*
> +	 * followed by TLVs defined in enum nlmsgerr_attrs
> +	 * if NETLINK_EXT_ACK was set
> +	 */
> +};
> +
> +/**
> + * enum nlmsgerr_attrs - netlink error message attributes
> + * @NLMSGERR_ATTR_UNUSED: unused
> + * @NLMSGERR_ATTR_MSG: error message string (string)
> + * @NLMSGERR_ATTR_OFFS: error offset in the original message (u32)
> + * @NLMSGERR_ATTR_ATTR: top-level attribute that caused the error
> + *	(or is missing, u16)
> + */
> +enum nlmsgerr_attrs {
> +	NLMSGERR_ATTR_UNUSED,
> +	NLMSGERR_ATTR_MSG,
> +	NLMSGERR_ATTR_OFFS,
> +	NLMSGERR_ATTR_ATTR,


can you add __NLMSGERR_ATTR_MAX and
#define NLMSGERR_ATTR_MAX (__NLMSGERR_ATTR_MAX - 1)
David Ahern April 9, 2017, 5:43 p.m. UTC | #9
On 4/8/17 2:40 PM, Jiri Pirko wrote:
> Sat, Apr 08, 2017 at 08:37:01PM CEST, johannes@sipsolutions.net wrote:
>> On Sat, 2017-04-08 at 20:34 +0200, Jiri Pirko wrote:
>>> nla_total_size(sizeof(u32));
>>>> +			if (extack &&
>>>> +			    (extack->missing_attr || extack-
>>>>> bad_attr))
>>>
>>> Attr could be 0, right? I know that on the most of the places 0 is
>>> UNSPEC, but I'm pretty sure not everywhere.

perhaps I misunderstand something, but nla_parse suggests attribute type
can not be 0:

int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
              int len, const struct nla_policy *policy,
              struct netlink_ext_ack *extack)
{
	...

        nla_for_each_attr(nla, head, len, rem) {
                u16 type = nla_type(nla);

                if (type > 0 && type <= maxtype) {
			...
                        tb[type] = (struct nlattr *)nla;
                }
        }


> Also, could you please attach a patch to iproute2 for example which
> would make use of this. I just want to make sure it clicks.

I have follow on patches to Johannes' set that plumbs extack for
rtnetlink doit function and iproute2. will send later.
Johannes Berg April 10, 2017, 6:18 a.m. UTC | #10
> perhaps I misunderstand something, but nla_parse suggests attribute
> type can not be 0:
[...]

Yes, some - very few - families still insist on using attribute 0,
perhaps parsing by hand or so. Like you say though, the entire
infrastructure makes that hard and undesirable, so I don't really see
why we need to invest the extra code/work into making it work *here*,
especially since it's such a corner case as I described in my other
email.

johannes
Nicolas Dichtel April 13, 2017, 1:27 p.m. UTC | #11
Le 10/04/2017 à 08:18, Johannes Berg a écrit :
> 
>> perhaps I misunderstand something, but nla_parse suggests attribute
>> type can not be 0:
> [...]
> 
> Yes, some - very few - families still insist on using attribute 0,
> perhaps parsing by hand or so. Like you say though, the entire
> infrastructure makes that hard and undesirable, so I don't really see
> why we need to invest the extra code/work into making it work *here*,
> especially since it's such a corner case as I described in my other
> email.

Here is an example:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=31e20bad8d58

I also see one in openvswitch (I will send a similar patch), but there are
probably some others.


Regards,
Nicolas
Johannes Berg April 13, 2017, 1:29 p.m. UTC | #12
On Thu, 2017-04-13 at 15:27 +0200, Nicolas Dichtel wrote:
> 
> > Yes, some - very few - families still insist on using attribute 0,
> > perhaps parsing by hand or so. Like you say though, the entire
> > infrastructure makes that hard and undesirable, so I don't really
> > see
> > why we need to invest the extra code/work into making it work
> > *here*,
> > especially since it's such a corner case as I described in my other
> > email.
> 
> Here is an example:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/co
> mmit/?id=31e20bad8d58
> 
> I also see one in openvswitch (I will send a similar patch), but
> there are probably some others.

Yeah. I'm not really sure what the point of such a patch is though -
the API is set now, and can't really be changed.

Anyway, the ones you point out are only used for *output* by the
kernel, so wouldn't be affected by any "missing attribute" reporting
anyway.

johannes
Nicolas Dichtel April 13, 2017, 2:05 p.m. UTC | #13
Le 13/04/2017 à 15:29, Johannes Berg a écrit :
> On Thu, 2017-04-13 at 15:27 +0200, Nicolas Dichtel wrote:
>>
>>> Yes, some - very few - families still insist on using attribute 0,
>>> perhaps parsing by hand or so. Like you say though, the entire
>>> infrastructure makes that hard and undesirable, so I don't really
>>> see
>>> why we need to invest the extra code/work into making it work
>>> *here*,
>>> especially since it's such a corner case as I described in my other
>>> email.
>>
>> Here is an example:
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/co
>> mmit/?id=31e20bad8d58
>>
>> I also see one in openvswitch (I will send a similar patch), but
>> there are probably some others.
> 
> Yeah. I'm not really sure what the point of such a patch is though -
> the API is set now, and can't really be changed.
The goal is to avoid copy and paste error, like it was done in diag subsystem.

> 
> Anyway, the ones you point out are only used for *output* by the
> kernel, so wouldn't be affected by any "missing attribute" reporting
> anyway.
Sure. It was just to mention that attribute 0 exists somewhere.
The other 0 attribute is OVS_TUNNEL_KEY_ATTR_ID. But I agree with you that it
remains a corner case.

Nicolas
Johannes Berg April 13, 2017, 7:24 p.m. UTC | #14
On Thu, 2017-04-13 at 16:05 +0200, Nicolas Dichtel wrote:

> Sure. It was just to mention that attribute 0 exists somewhere.
> The other 0 attribute is OVS_TUNNEL_KEY_ATTR_ID.

That looks like some really awkward hand-grown parsing - with all these
"struct ovs_len_tbl" looking almost like a policy, but not using that
code?

Seems like something somebody should take a hard look at and see if it
can't use more standard infrastructure.

johannes
Jamal Hadi Salim April 16, 2017, 2:40 p.m. UTC | #15
Johannes or anybody else working on it?
i.e one which sends the extack to the bowels of callees
so we can return meaningful messages
example: rtnetlink::doit() or equivalent seems to be a candidate for
taking it as a param
For things that call netlink_ack it may be easier if they create
the extack.
For dumps it makes it tricky since their lifetime is much
longer..

cheers,
jamal
David Ahern April 16, 2017, 2:45 p.m. UTC | #16
On 4/16/17 8:40 AM, Jamal Hadi Salim wrote:
> 
> Johannes or anybody else working on it?
> i.e one which sends the extack to the bowels of callees
> so we can return meaningful messages
> example: rtnetlink::doit() or equivalent seems to be a candidate for
> taking it as a param

I have a patch for the first pass on rtnetlink doit path. plumbs the
extack parameter and covers the nlmsg_parse case.  I'll send it once
Johannes' patches go in.
Jamal Hadi Salim April 16, 2017, 2:48 p.m. UTC | #17
On 17-04-16 10:45 AM, David Ahern wrote:
> On 4/16/17 8:40 AM, Jamal Hadi Salim wrote:
>>
>> Johannes or anybody else working on it?
>> i.e one which sends the extack to the bowels of callees
>> so we can return meaningful messages
>> example: rtnetlink::doit() or equivalent seems to be a candidate for
>> taking it as a param
>
> I have a patch for the first pass on rtnetlink doit path. plumbs the
> extack parameter and covers the nlmsg_parse case.  I'll send it once
> Johannes' patches go in.
>

Excellent. His patches seem to be in. Seems
commit ce07183282975026716107d36fd3f5f93de76668
is a good point?

cheers,
jamal
David Ahern April 16, 2017, 2:50 p.m. UTC | #18
On 4/16/17 8:48 AM, Jamal Hadi Salim wrote:
> Excellent. His patches seem to be in. Seems
> commit ce07183282975026716107d36fd3f5f93de76668
> is a good point?

yes. will send later today.
David Ahern April 16, 2017, 4:55 p.m. UTC | #19
On 4/16/17 8:40 AM, Jamal Hadi Salim wrote:
> 
> Johannes or anybody else working on it?
> i.e one which sends the extack to the bowels of callees
> so we can return meaningful messages
> example: rtnetlink::doit() or equivalent seems to be a candidate for
> taking it as a param
> For things that call netlink_ack it may be easier if they create
> the extack.
> For dumps it makes it tricky since their lifetime is much
> longer..


I have a start to a patch for iproute2 as well. It needs some
improvements before I can send it. Should be able to get to it Monday or
Tuesday.
Johannes Berg April 18, 2017, 9:41 a.m. UTC | #20
On Thu, 2017-04-13 at 14:44 -0700, Joe Stringer wrote
(something that never made it to the list, due to HTML formatting)
> 
> I think that OVS was doing some more elaborate validation than most
> users, so over time we picked up a bunch of extra parsing code that
> layers on top of nla_parse(). I took a look at trying to broaden this
> and make it useful to other users a while ago, but when I posted
> there wasn't much interest from others on it so I just moved on.
> Maybe it's about time to pick that back up.

Ah, ok. I didn't realize it was actually on top of nla_parse(). Some of
this does seem rather useful though, and having more expressive policy
would seem very useful too - I'd love to be able to express nesting
better, for example.

Also, I think we should - at least with the strict checking that Jiri
is proposing - think about checking the actual size, not just against a
minimum.

johannes
Joe Stringer April 18, 2017, 11:46 p.m. UTC | #21
On 18 April 2017 at 02:41, Johannes Berg <johannes@sipsolutions.net> wrote:
> On Thu, 2017-04-13 at 14:44 -0700, Joe Stringer wrote
> (something that never made it to the list, due to HTML formatting)
>>
>> I think that OVS was doing some more elaborate validation than most
>> users, so over time we picked up a bunch of extra parsing code that
>> layers on top of nla_parse(). I took a look at trying to broaden this
>> and make it useful to other users a while ago, but when I posted
>> there wasn't much interest from others on it so I just moved on.
>> Maybe it's about time to pick that back up.
>
> Ah, ok. I didn't realize it was actually on top of nla_parse(). Some of
> this does seem rather useful though, and having more expressive policy
> would seem very useful too - I'd love to be able to express nesting
> better, for example.

Ah, correction - nla_parse() is used in some parts but not all of it.
More expressive policy sounds useful for OVS cases too.
diff mbox

Patch

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index a90404a0c5ff..4a44830741c1 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -483,7 +483,8 @@  static const struct crypto_link {
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
 };
 
-static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       struct netlink_ext_ack *extack)
 {
 	struct nlattr *attrs[CRYPTOCFGA_MAX+1];
 	const struct crypto_link *link;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 10469b0088b5..b784055423c8 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -146,7 +146,8 @@  int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 }
 EXPORT_SYMBOL(ibnl_put_attr);
 
-static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			struct netlink_ext_ack *extack)
 {
 	struct ibnl_client *client;
 	int type = nlh->nlmsg_type;
@@ -209,7 +210,7 @@  static void ibnl_rcv_reply_skb(struct sk_buff *skb)
 		if (nlh->nlmsg_flags & NLM_F_REQUEST)
 			return;
 
-		ibnl_rcv_msg(skb, nlh);
+		ibnl_rcv_msg(skb, nlh, NULL);
 
 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (msglen > skb->len)
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 109802f776ed..50e624fb8307 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -111,7 +111,7 @@  scsi_nl_rcv_msg(struct sk_buff *skb)
 
 next_msg:
 		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
-			netlink_ack(skb, nlh, err);
+			netlink_ack(skb, nlh, err, NULL);
 
 		skb_pull(skb, rlen);
 	}
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index da14ab61f363..47562e940e9c 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -62,11 +62,41 @@  netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
 	return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
 }
 
+/**
+ * struct netlink_ext_ack - netlink extended ACK report struct
+ * @_msg: message string to report - don't access directly, use
+ *	%NL_SET_ERR_MSG
+ * @bad_attr: attribute with error
+ * @missing_attr: number of missing attr (or 0)
+ * @cookie: cookie data to return to userspace (for success)
+ * @cookie_len: actual cookie data length
+ */
+struct netlink_ext_ack {
+	const char *_msg;
+	const struct nlattr *bad_attr;
+	u16 missing_attr;
+	u8 cookie[NETLINK_MAX_COOKIE_LEN];
+	u8 cookie_len;
+};
+
+/* Always use this macro, this allows later putting the
+ * message into a separate section or such for things
+ * like translation or listing all possible messages.
+ * Currently string formatting is not supported (due
+ * to the lack of an output buffer.)
+ */
+#define NL_SET_ERR_MSG(extack, msg) do {	\
+	static const char *_msg = (msg);	\
+						\
+	(extack)->_msg = _msg;			\
+} while (0)
+
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
-extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
+extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
+			const struct netlink_ext_ack *extack);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index b239fcd33d80..a064ec3e2ee1 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -233,7 +233,8 @@  struct nl_info {
 };
 
 int netlink_rcv_skb(struct sk_buff *skb,
-		    int (*cb)(struct sk_buff *, struct nlmsghdr *));
+		    int (*cb)(struct sk_buff *, struct nlmsghdr *,
+			      struct netlink_ext_ack *));
 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
 		 unsigned int group, int report, gfp_t flags);
 
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f3946a27bd07..d1564557d645 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -101,6 +101,29 @@  struct nlmsghdr {
 struct nlmsgerr {
 	int		error;
 	struct nlmsghdr msg;
+	/*
+	 * followed by the message contents unless NETLINK_CAP_ACK was set,
+	 * message length is aligned with NLMSG_ALIGN()
+	 */
+	/*
+	 * followed by TLVs defined in enum nlmsgerr_attrs
+	 * if NETLINK_EXT_ACK was set
+	 */
+};
+
+/**
+ * enum nlmsgerr_attrs - netlink error message attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: error offset in the original message (u32)
+ * @NLMSGERR_ATTR_ATTR: top-level attribute that caused the error
+ *	(or is missing, u16)
+ */
+enum nlmsgerr_attrs {
+	NLMSGERR_ATTR_UNUSED,
+	NLMSGERR_ATTR_MSG,
+	NLMSGERR_ATTR_OFFS,
+	NLMSGERR_ATTR_ATTR,
 };
 
 #define NETLINK_ADD_MEMBERSHIP		1
@@ -115,6 +138,7 @@  struct nlmsgerr {
 #define NETLINK_LISTEN_ALL_NSID		8
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
+#define NETLINK_EXT_ACK			11
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/kernel/audit.c b/kernel/audit.c
index 2f4964cfde0b..d54bf5932374 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1402,7 +1402,7 @@  static void audit_receive_skb(struct sk_buff *skb)
 		err = audit_receive_msg(skb, nlh);
 		/* if err or if this message says it wants a response */
 		if (err || (nlh->nlmsg_flags & NLM_F_ACK))
-			netlink_ack(skb, nlh, err);
+			netlink_ack(skb, nlh, err, NULL);
 
 		nlh = nlmsg_next(nlh, &len);
 	}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b2bd4c9ee860..9788147241f4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4120,7 +4120,8 @@  static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 /* Process one rtnetlink message. */
 
-static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	rtnl_doit_func doit;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index fb9d0e2fd148..217f4e3b82f6 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -238,7 +238,8 @@  static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return err;
 }
 
-static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
 {
 	int ret;
 
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 1ab30e7d3f99..81dac16933fc 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -350,7 +350,7 @@  static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
 	return 0;
 
 invalid:
-	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
 	return 0;
 
 nla_put_failure:
@@ -432,7 +432,7 @@  static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
 	return 0;
 
 invalid:
-	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
 	return 0;
 
 nla_put_failure:
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c296f9b606d4..26356bf8cebf 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1305,7 +1305,7 @@  ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 			 * manually :-(
 			 */
 			if (nlh->nlmsg_flags & NLM_F_ACK)
-				netlink_ack(cb->skb, nlh, ret);
+				netlink_ack(cb->skb, nlh, ret, NULL);
 			return ret;
 		}
 	}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 68eda920160e..181d3bb800e6 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -148,7 +148,8 @@  int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	const struct nfnl_callback *nc;
@@ -261,7 +262,7 @@  static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 	struct nfnl_err *nfnl_err, *next;
 
 	list_for_each_entry_safe(nfnl_err, next, err_list, head) {
-		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
 		nfnl_err_del(nfnl_err);
 	}
 }
@@ -284,13 +285,13 @@  static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err;
 
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
-		return netlink_ack(skb, nlh, -EINVAL);
+		return netlink_ack(skb, nlh, -EINVAL, NULL);
 replay:
 	status = 0;
 
 	skb = netlink_skb_clone(oskb, GFP_KERNEL);
 	if (!skb)
-		return netlink_ack(oskb, nlh, -ENOMEM);
+		return netlink_ack(oskb, nlh, -ENOMEM, NULL);
 
 	nfnl_lock(subsys_id);
 	ss = nfnl_dereference_protected(subsys_id);
@@ -304,20 +305,20 @@  static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 #endif
 		{
 			nfnl_unlock(subsys_id);
-			netlink_ack(oskb, nlh, -EOPNOTSUPP);
+			netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
 			return kfree_skb(skb);
 		}
 	}
 
 	if (!ss->commit || !ss->abort) {
 		nfnl_unlock(subsys_id);
-		netlink_ack(oskb, nlh, -EOPNOTSUPP);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
 		return kfree_skb(skb);
 	}
 
 	if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
 		nfnl_unlock(subsys_id);
-		netlink_ack(oskb, nlh, -ERESTART);
+		netlink_ack(oskb, nlh, -ERESTART, NULL);
 		return kfree_skb(skb);
 	}
 
@@ -407,7 +408,8 @@  static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 				 * pointing to the batch header.
 				 */
 				nfnl_err_reset(&err_list);
-				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
+				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM,
+					    NULL);
 				status |= NFNL_BATCH_FAILURE;
 				goto done;
 			}
@@ -467,7 +469,7 @@  static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy);
 	if (err < 0) {
-		netlink_ack(skb, nlh, err);
+		netlink_ack(skb, nlh, err, NULL);
 		return;
 	}
 	if (cda[NFNL_BATCH_GENID])
@@ -493,7 +495,7 @@  static void nfnetlink_rcv(struct sk_buff *skb)
 		return;
 
 	if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
-		netlink_ack(skb, nlh, -EPERM);
+		netlink_ack(skb, nlh, -EPERM, NULL);
 		return;
 	}
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fc232441cf23..02cffb0a3904 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1652,6 +1652,13 @@  static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			nlk->flags &= ~NETLINK_F_CAP_ACK;
 		err = 0;
 		break;
+	case NETLINK_EXT_ACK:
+		if (val)
+			nlk->flags |= NETLINK_F_EXT_ACK;
+		else
+			nlk->flags &= ~NETLINK_F_EXT_ACK;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1736,6 +1743,16 @@  static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_EXT_ACK:
+		if (len < sizeof(int))
+			return -EINVAL;
+		len = sizeof(int);
+		val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
+		if (put_user(len, optlen) ||
+		    put_user(val, optval))
+			return -EFAULT;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2267,21 +2284,37 @@  int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__netlink_dump_start);
 
-void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
+void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
+		 const struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	struct nlmsghdr *rep;
 	struct nlmsgerr *errmsg;
 	size_t payload = sizeof(*errmsg);
+	size_t acksize = sizeof(*errmsg);
 	struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
 
 	/* Error messages get the original request appened, unless the user
-	 * requests to cap the error message.
+	 * requests to cap the error message, and get extra error data if
+	 * requested.
 	 */
-	if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
-		payload += nlmsg_len(nlh);
+	if (err) {
+		if (!(nlk->flags & NETLINK_F_CAP_ACK))
+			payload += nlmsg_len(nlh);
+		acksize = payload;
+		if (nlk->flags & NETLINK_F_EXT_ACK) {
+			if (extack && extack->_msg)
+				acksize +=
+					nla_total_size(strlen(extack->_msg) + 1);
+			if (extack && extack->bad_attr)
+				acksize += nla_total_size(sizeof(u32));
+			if (extack &&
+			    (extack->missing_attr || extack->bad_attr))
+				acksize += nla_total_size(sizeof(u16));
+		}
+	}
 
-	skb = nlmsg_new(payload, GFP_KERNEL);
+	skb = nlmsg_new(acksize, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
@@ -2300,14 +2333,35 @@  void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 			  NLMSG_ERROR, payload, 0);
 	errmsg = nlmsg_data(rep);
 	errmsg->error = err;
-	memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
+	memcpy(&errmsg->msg, nlh,
+	       !(nlk->flags & NETLINK_F_CAP_ACK) ? nlh->nlmsg_len
+						 : sizeof(*nlh));
+
+	if (err && nlk->flags & NETLINK_F_EXT_ACK) {
+		if (extack && extack->_msg)
+			WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+					       extack->_msg));
+		if (extack && extack->bad_attr &&
+		    !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+			     (u8 *)extack->bad_attr >= in_skb->data +
+						       in_skb->len))
+			WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+					    (u8 *)extack->bad_attr -
+					    in_skb->data));
+		if (extack && extack->missing_attr)
+			WARN_ON(nla_put_u16(skb, NLMSGERR_ATTR_ATTR,
+					    extack->missing_attr));
+	}
+
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
 }
 EXPORT_SYMBOL(netlink_ack);
 
 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
-						     struct nlmsghdr *))
+						   struct nlmsghdr *,
+						   struct netlink_ext_ack *))
 {
+	struct netlink_ext_ack extack = {};
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2328,13 +2382,13 @@  int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
 			goto ack;
 
-		err = cb(skb, nlh);
+		err = cb(skb, nlh, &extack);
 		if (err == -EINTR)
 			goto skip;
 
 ack:
 		if (nlh->nlmsg_flags & NLM_F_ACK || err)
-			netlink_ack(skb, nlh, err);
+			netlink_ack(skb, nlh, err, &extack);
 
 skip:
 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index f792f8d7f982..3490f2430532 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -13,6 +13,7 @@ 
 #define NETLINK_F_RECV_NO_ENOBUFS	0x8
 #define NETLINK_F_LISTEN_ALL_NSID	0x10
 #define NETLINK_F_CAP_ACK		0x20
+#define NETLINK_F_EXT_ACK		0x40
 
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 92e0981f7404..57b2e3648bc0 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -605,7 +605,8 @@  static int genl_family_rcv_msg(const struct genl_family *family,
 	return err;
 }
 
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			struct netlink_ext_ack *extack)
 {
 	const struct genl_family *family;
 	int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 40a8aa39220d..1ba8c115a993 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2448,7 +2448,8 @@  static const struct xfrm_link {
 	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo   },
 };
 
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *attrs[XFRMA_MAX+1];