diff mbox series

NETWORKING: avoid use IPCB in cipso_v4_error

Message ID 6691891549984203@myt5-a323eb993ef7.qloud-c.yandex.net (mailing list archive)
State New, archived
Headers show
Series NETWORKING: avoid use IPCB in cipso_v4_error | expand

Commit Message

Sergey Nazarov Feb. 12, 2019, 3:10 p.m. UTC
Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
above IP layer.
This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.

The original discussion is here:
https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
---
 include/net/icmp.h    |    9 ++++++++-
 net/ipv4/cipso_ipv4.c |   18 ++++++++++++++++--
 net/ipv4/icmp.c       |    7 ++++---
 3 files changed, 28 insertions(+), 6 deletions(-)

--

Comments

Paul Moore Feb. 13, 2019, 9:41 p.m. UTC | #1
On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
> Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> above IP layer.
> This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>
> The original discussion is here:
> https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>
> Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
> ---
>  include/net/icmp.h    |    9 ++++++++-
>  net/ipv4/cipso_ipv4.c |   18 ++++++++++++++++--
>  net/ipv4/icmp.c       |    7 ++++---
>  3 files changed, 28 insertions(+), 6 deletions(-)

Hi Sergey,

Thanks for your work on finding this and putting a fix together.  As
we discussed previously, I think this looks good, but can you describe
the testing you did to verify that this works correctly?

> diff --git a/include/net/icmp.h b/include/net/icmp.h
> index 6ac3a5b..e0f709d 100644
> --- a/include/net/icmp.h
> +++ b/include/net/icmp.h
> @@ -22,6 +22,7 @@
>
>  #include <net/inet_sock.h>
>  #include <net/snmp.h>
> +#include <net/ip.h>
>
>  struct icmp_err {
>    int          errno;
> @@ -39,7 +40,13 @@ struct icmp_err {
>  struct sk_buff;
>  struct net;
>
> -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
> +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
> +                const struct ip_options *opt);
> +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
> +{
> +       __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
> +}
> +
>  int icmp_rcv(struct sk_buff *skb);
>  int icmp_err(struct sk_buff *skb, u32 info);
>  int icmp_init(void);
> diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
> index 777fa3b..234d12e 100644
> --- a/net/ipv4/cipso_ipv4.c
> +++ b/net/ipv4/cipso_ipv4.c
> @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>   */
>  void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>  {
> +       unsigned char optbuf[sizeof(struct ip_options) + 40];
> +       struct ip_options *opt = (struct ip_options *)optbuf;
> +
>         if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>                 return;
>
> +       /*
> +        * We might be called above the IP layer,
> +        * so we can not use icmp_send and IPCB here.
> +        */
> +
> +       memset(opt, 0, sizeof(struct ip_options));
> +       opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
> +       memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
> +       if (ip_options_compile(dev_net(skb->dev), opt, NULL))
> +               return;
> +
>         if (gateway)
> -               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
> +               __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>         else
> -               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
> +               __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>  }
>
>  /**
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 065997f..3f24414 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>   *                     MUST reply to only the first fragment.
>   */
>
> -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
> +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
> +                const struct ip_options *opt)
>  {
>         struct iphdr *iph;
>         int room;
> @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>                                           iph->tos;
>         mark = IP4_REPLY_MARK(net, skb_in->mark);
>
> -       if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
> +       if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>                 goto out_unlock;
>
>
> @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>         local_bh_enable();
>  out:;
>  }
> -EXPORT_SYMBOL(icmp_send);
> +EXPORT_SYMBOL(__icmp_send);
>
>
>  static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
> --
>
David Miller Feb. 14, 2019, 4:43 p.m. UTC | #2
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Tue, 12 Feb 2019 18:10:03 +0300

> Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> above IP layer.
> This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> 
> The original discussion is here:
> https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> 
> Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>

This problem is not unique to Cipso, net/atm/clip.c's error handler
has the same exact issue.

I didn't scan more of the tree, there are probably a couple more
locations as well.
Sergey Nazarov Feb. 14, 2019, 6 p.m. UTC | #3
Hi, Paul!
I've found the problem and testing it with some very specific custom lsm module. The test case was simple:
standard TCP/IP client-server application, where server opens CIPSO labeled TCP socket, and client connecting
to this socket with forbidden labels. After several connections kernel crashing with general memory protection or
kernel cache inconsistent error.
I think, the similar behaviour should be with selinux or smack in the same conditions. But I don't know them
so good to reproduce situation.
After applying patch, I haven't kernel crashes.
But now I've made additional checks and found no response icmp packets. The ip_options_compile requires
CAP_NET_RAW capability when CIPSO option compiling, if skb is NULL. I have no other ideas than returning to
the early patch version with ip_options_compile modified. What do you think about that?

14.02.2019, 00:42, "Paul Moore" <paul@paul-moore.com>:
> On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
>>  Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  above IP layer.
>>  This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>
>>  The original discussion is here:
>>  https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>
>>  Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>  ---
>>   include/net/icmp.h | 9 ++++++++-
>>   net/ipv4/cipso_ipv4.c | 18 ++++++++++++++++--
>>   net/ipv4/icmp.c | 7 ++++---
>>   3 files changed, 28 insertions(+), 6 deletions(-)
>
> Hi Sergey,
>
> Thanks for your work on finding this and putting a fix together. As
> we discussed previously, I think this looks good, but can you describe
> the testing you did to verify that this works correctly?
>
>>  diff --git a/include/net/icmp.h b/include/net/icmp.h
>>  index 6ac3a5b..e0f709d 100644
>>  --- a/include/net/icmp.h
>>  +++ b/include/net/icmp.h
>>  @@ -22,6 +22,7 @@
>>
>>   #include <net/inet_sock.h>
>>   #include <net/snmp.h>
>>  +#include <net/ip.h>
>>
>>   struct icmp_err {
>>     int errno;
>>  @@ -39,7 +40,13 @@ struct icmp_err {
>>   struct sk_buff;
>>   struct net;
>>
>>  -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
>>  +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>  + const struct ip_options *opt);
>>  +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>  +{
>>  + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
>>  +}
>>  +
>>   int icmp_rcv(struct sk_buff *skb);
>>   int icmp_err(struct sk_buff *skb, u32 info);
>>   int icmp_init(void);
>>  diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
>>  index 777fa3b..234d12e 100644
>>  --- a/net/ipv4/cipso_ipv4.c
>>  +++ b/net/ipv4/cipso_ipv4.c
>>  @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>>    */
>>   void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>>   {
>>  + unsigned char optbuf[sizeof(struct ip_options) + 40];
>>  + struct ip_options *opt = (struct ip_options *)optbuf;
>>  +
>>          if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>>                  return;
>>
>>  + /*
>>  + * We might be called above the IP layer,
>>  + * so we can not use icmp_send and IPCB here.
>>  + */
>>  +
>>  + memset(opt, 0, sizeof(struct ip_options));
>>  + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
>>  + memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
>>  + if (ip_options_compile(dev_net(skb->dev), opt, NULL))
>>  + return;
>>  +
>>          if (gateway)
>>  - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
>>  + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>>          else
>>  - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
>>  + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>>   }
>>
>>   /**
>>  diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
>>  index 065997f..3f24414 100644
>>  --- a/net/ipv4/icmp.c
>>  +++ b/net/ipv4/icmp.c
>>  @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>>    * MUST reply to only the first fragment.
>>    */
>>
>>  -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>  +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>  + const struct ip_options *opt)
>>   {
>>          struct iphdr *iph;
>>          int room;
>>  @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>                                            iph->tos;
>>          mark = IP4_REPLY_MARK(net, skb_in->mark);
>>
>>  - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
>>  + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>>                  goto out_unlock;
>>
>>  @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>          local_bh_enable();
>>   out:;
>>   }
>>  -EXPORT_SYMBOL(icmp_send);
>>  +EXPORT_SYMBOL(__icmp_send);
>>
>>   static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
>>  --
>
> --
> paul moore
> www.paul-moore.com
Sergey Nazarov Feb. 14, 2019, 6:14 p.m. UTC | #4
Now the problem comes from TCP layer only. Is the IP over ATM operates over IP layer?

14.02.2019, 19:43, "David Miller" <davem@davemloft.net>:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Tue, 12 Feb 2019 18:10:03 +0300
>
>>  Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  above IP layer.
>>  This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>
>>  The original discussion is here:
>>  https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>
>>  Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>
> This problem is not unique to Cipso, net/atm/clip.c's error handler
> has the same exact issue.
>
> I didn't scan more of the tree, there are probably a couple more
> locations as well.
Stephen Smalley Feb. 14, 2019, 6:59 p.m. UTC | #5
On 2/14/19 1:00 PM, Nazarov Sergey wrote:
> Hi, Paul!
> I've found the problem and testing it with some very specific custom lsm module. The test case was simple:
> standard TCP/IP client-server application, where server opens CIPSO labeled TCP socket, and client connecting
> to this socket with forbidden labels. After several connections kernel crashing with general memory protection or
> kernel cache inconsistent error.
> I think, the similar behaviour should be with selinux or smack in the same conditions. But I don't know them
> so good to reproduce situation.

For SELinux, you can use
https://github.com/SELinuxProject/selinux-testsuite

That includes testing of CIPSO, both connecting from a client with an 
authorized level and from a client with an unauthorized level.

Not sure about Smack; there were some tests in LTP but I don't know if 
they would exercise it.

> After applying patch, I haven't kernel crashes.
> But now I've made additional checks and found no response icmp packets. The ip_options_compile requires
> CAP_NET_RAW capability when CIPSO option compiling, if skb is NULL. I have no other ideas than returning to
> the early patch version with ip_options_compile modified. What do you think about that?


> 
> 14.02.2019, 00:42, "Paul Moore" <paul@paul-moore.com>:
>> On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
>>>   Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>>   icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>>   But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>>   above IP layer.
>>>   This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>>   introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>>
>>>   The original discussion is here:
>>>   https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>>
>>>   Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>>   ---
>>>    include/net/icmp.h | 9 ++++++++-
>>>    net/ipv4/cipso_ipv4.c | 18 ++++++++++++++++--
>>>    net/ipv4/icmp.c | 7 ++++---
>>>    3 files changed, 28 insertions(+), 6 deletions(-)
>>
>> Hi Sergey,
>>
>> Thanks for your work on finding this and putting a fix together. As
>> we discussed previously, I think this looks good, but can you describe
>> the testing you did to verify that this works correctly?
>>
>>>   diff --git a/include/net/icmp.h b/include/net/icmp.h
>>>   index 6ac3a5b..e0f709d 100644
>>>   --- a/include/net/icmp.h
>>>   +++ b/include/net/icmp.h
>>>   @@ -22,6 +22,7 @@
>>>
>>>    #include <net/inet_sock.h>
>>>    #include <net/snmp.h>
>>>   +#include <net/ip.h>
>>>
>>>    struct icmp_err {
>>>      int errno;
>>>   @@ -39,7 +40,13 @@ struct icmp_err {
>>>    struct sk_buff;
>>>    struct net;
>>>
>>>   -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
>>>   +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>>   + const struct ip_options *opt);
>>>   +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>   +{
>>>   + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
>>>   +}
>>>   +
>>>    int icmp_rcv(struct sk_buff *skb);
>>>    int icmp_err(struct sk_buff *skb, u32 info);
>>>    int icmp_init(void);
>>>   diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
>>>   index 777fa3b..234d12e 100644
>>>   --- a/net/ipv4/cipso_ipv4.c
>>>   +++ b/net/ipv4/cipso_ipv4.c
>>>   @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>>>     */
>>>    void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>>>    {
>>>   + unsigned char optbuf[sizeof(struct ip_options) + 40];
>>>   + struct ip_options *opt = (struct ip_options *)optbuf;
>>>   +
>>>           if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>>>                   return;
>>>
>>>   + /*
>>>   + * We might be called above the IP layer,
>>>   + * so we can not use icmp_send and IPCB here.
>>>   + */
>>>   +
>>>   + memset(opt, 0, sizeof(struct ip_options));
>>>   + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
>>>   + memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
>>>   + if (ip_options_compile(dev_net(skb->dev), opt, NULL))
>>>   + return;
>>>   +
>>>           if (gateway)
>>>   - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
>>>   + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>>>           else
>>>   - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
>>>   + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>>>    }
>>>
>>>    /**
>>>   diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
>>>   index 065997f..3f24414 100644
>>>   --- a/net/ipv4/icmp.c
>>>   +++ b/net/ipv4/icmp.c
>>>   @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>>>     * MUST reply to only the first fragment.
>>>     */
>>>
>>>   -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>   +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>>   + const struct ip_options *opt)
>>>    {
>>>           struct iphdr *iph;
>>>           int room;
>>>   @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>                                             iph->tos;
>>>           mark = IP4_REPLY_MARK(net, skb_in->mark);
>>>
>>>   - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
>>>   + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>>>                   goto out_unlock;
>>>
>>>   @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>           local_bh_enable();
>>>    out:;
>>>    }
>>>   -EXPORT_SYMBOL(icmp_send);
>>>   +EXPORT_SYMBOL(__icmp_send);
>>>
>>>    static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
>>>   --
>>
>> --
>> paul moore
>> www.paul-moore.com
Paul Moore Feb. 15, 2019, 7:02 p.m. UTC | #6
On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Tue, 12 Feb 2019 18:10:03 +0300
>
> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> > above IP layer.
> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >
> > The original discussion is here:
> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >
> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>
> This problem is not unique to Cipso, net/atm/clip.c's error handler
> has the same exact issue.
>
> I didn't scan more of the tree, there are probably a couple more
> locations as well.

David, are you happy with Sergey's solution as a fix for this?

If so, would you prefer a respin of this patch to apply the to the
other broken callers (e.g. net/atm/clip.c), or would you rather merge
this patch and deal with the other callers in separate patches?
David Miller Feb. 15, 2019, 8 p.m. UTC | #7
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 15 Feb 2019 14:02:31 -0500

> On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
>> From: Nazarov Sergey <s-nazarov@yandex.ru>
>> Date: Tue, 12 Feb 2019 18:10:03 +0300
>>
>> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>> > above IP layer.
>> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>> >
>> > The original discussion is here:
>> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>> >
>> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>
>> This problem is not unique to Cipso, net/atm/clip.c's error handler
>> has the same exact issue.
>>
>> I didn't scan more of the tree, there are probably a couple more
>> locations as well.
> 
> David, are you happy with Sergey's solution as a fix for this?
> 
> If so, would you prefer a respin of this patch to apply the to the
> other broken callers (e.g. net/atm/clip.c), or would you rather merge
> this patch and deal with the other callers in separate patches?

I'd like the other broken callers to be handled.
Paul Moore Feb. 15, 2019, 8:04 p.m. UTC | #8
On Fri, Feb 15, 2019 at 3:00 PM David Miller <davem@davemloft.net> wrote:
> From: Paul Moore <paul@paul-moore.com>
> Date: Fri, 15 Feb 2019 14:02:31 -0500
>
> > On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> >> From: Nazarov Sergey <s-nazarov@yandex.ru>
> >> Date: Tue, 12 Feb 2019 18:10:03 +0300
> >>
> >> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> >> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> >> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> >> > above IP layer.
> >> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> >> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >> >
> >> > The original discussion is here:
> >> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >> >
> >> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
> >>
> >> This problem is not unique to Cipso, net/atm/clip.c's error handler
> >> has the same exact issue.
> >>
> >> I didn't scan more of the tree, there are probably a couple more
> >> locations as well.
> >
> > David, are you happy with Sergey's solution as a fix for this?
> >
> > If so, would you prefer a respin of this patch to apply the to the
> > other broken callers (e.g. net/atm/clip.c), or would you rather merge
> > this patch and deal with the other callers in separate patches?
>
> I'd like the other broken callers to be handled.

Sergey, do you think you could fix the other callers too, or do you
want some help with that?
Sergey Nazarov Feb. 18, 2019, 1:39 p.m. UTC | #9
I think, it would not be a good solution, if I will analyze all subsystems using icmp_send, because I do not have enough knowledge for this.
I propose to add a new function, for example, ismp_send_safe, something like that:

void icmp_send_safe(struct sk_buff *skb_in, int type, int code, __be32 info)
{
	unsigned char optbuf[sizeof(struct ip_options) + 40];
	struct ip_options *opt = (struct ip_options *)optbuf;

	memset(opt, 0, sizeof(struct ip_options));
	opt->optlen = ip_hdr(skb_in)->ihl*4 - sizeof(struct iphdr);
	memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb_in)[1]), opt->optlen);
	if (ip_options_compile(dev_net(skb_in->dev), opt, NULL))
		return;
	__icmp_send(skb_in, type, code, info, opt);
}

which could be used when the code works at different network stack layers.
And the subsystems developers, knowing their code well, will decide which one to use.

15.02.2019, 23:04, "Paul Moore" <paul@paul-moore.com>:
> On Fri, Feb 15, 2019 at 3:00 PM David Miller <davem@davemloft.net> wrote:
>>  From: Paul Moore <paul@paul-moore.com>
>>  Date: Fri, 15 Feb 2019 14:02:31 -0500
>>
>>  > On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
>>  >> From: Nazarov Sergey <s-nazarov@yandex.ru>
>>  >> Date: Tue, 12 Feb 2019 18:10:03 +0300
>>  >>
>>  >> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  >> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  >> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  >> > above IP layer.
>>  >> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  >> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>  >> >
>>  >> > The original discussion is here:
>>  >> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>  >> >
>>  >> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>  >>
>>  >> This problem is not unique to Cipso, net/atm/clip.c's error handler
>>  >> has the same exact issue.
>>  >>
>>  >> I didn't scan more of the tree, there are probably a couple more
>>  >> locations as well.
>>  >
>>  > David, are you happy with Sergey's solution as a fix for this?
>>  >
>>  > If so, would you prefer a respin of this patch to apply the to the
>>  > other broken callers (e.g. net/atm/clip.c), or would you rather merge
>>  > this patch and deal with the other callers in separate patches?
>>
>>  I'd like the other broken callers to be handled.
>
> Sergey, do you think you could fix the other callers too, or do you
> want some help with that?
>
> --
> paul moore
> www.paul-moore.com
David Miller Feb. 19, 2019, 1:25 a.m. UTC | #10
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 18 Feb 2019 16:39:11 +0300

> I think, it would not be a good solution, if I will analyze all
> subsystems using icmp_send, because I do not have enough knowledge
> for this.  I propose to add a new function, for example,
> ismp_send_safe, something like that:

Please don't do this.

Solve the problem properly by auditing each case, there aren't a lot and
it is not too difficult to see the upcall sites.
Sergey Nazarov Feb. 22, 2019, 4:35 p.m. UTC | #11
I tried to analyze the cases of using icmp_send in kernel. It indirectly used by many protocols:
ARP, IP, UDP, Netfilter, IPVS, IPIP, GRE over IP, CLIP, XFRM, CIPSOv4.
Different IP tunnels and XFRM operating directly over IP layer and if using own skb->cb data,
having IP header data in front of it. CLIP uses icmp_send for packets from arp queue only.
So, If I right, only TCP layer moves IP header data and only CIPSOv4 operates on both IP and
TCP layers now. 

19.02.2019, 04:25, "David Miller" <davem@davemloft.net>:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Mon, 18 Feb 2019 16:39:11 +0300
>
>>  I think, it would not be a good solution, if I will analyze all
>>  subsystems using icmp_send, because I do not have enough knowledge
>>  for this. I propose to add a new function, for example,
>>  ismp_send_safe, something like that:
>
> Please don't do this.
>
> Solve the problem properly by auditing each case, there aren't a lot and
> it is not too difficult to see the upcall sites.
Sergey Nazarov Feb. 22, 2019, 5:43 p.m. UTC | #12
Add __icmp_send function having ip_options struct parameter
---
 include/net/icmp.h    |    9 ++++++++-
 net/ipv4/icmp.c       |    7 ++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6ac3a5b..e0f709d 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,6 +22,7 @@
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -39,7 +40,13 @@ struct icmp_err {
 struct sk_buff;
 struct net;
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt);
+static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+}
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 065997f..3f24414 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
---
Sergey Nazarov Feb. 22, 2019, 5:50 p.m. UTC | #13
Extract IP options in cipso_v4_error and use __icmp_send.
---
 include/net/ip.h      |    2 ++
 net/ipv4/cipso_ipv4.c |   17 +++++++++++++++--
 net/ipv4/ip_options.c |   22 +++++++++++++++++-----
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 8866bfc..f0e8d06 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -667,6 +667,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
 }
 
 void ip_options_fragment(struct sk_buff *skb);
+int __ip_options_compile(struct net *net, struct ip_options *opt,
+			 struct sk_buff *skb, __be32 *info);
 int ip_options_compile(struct net *net, struct ip_options *opt,
 		       struct sk_buff *skb);
 int ip_options_get(struct net *net, struct ip_options_rcu **optp,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b..eff86a7 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1735,13 +1735,26 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d4..32a3504 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
  * If opt == NULL, then skb->data should point to IP header.
  */
 
-int ip_options_compile(struct net *net,
-		       struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+			 struct ip_options *opt, struct sk_buff *skb,
+			 __be32 *info)
 {
 	__be32 spec_dst = htonl(INADDR_ANY);
 	unsigned char *pp_ptr = NULL;
@@ -468,11 +469,22 @@ int ip_options_compile(struct net *net,
 		return 0;
 
 error:
-	if (skb) {
-		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
-	}
+	if (info)
+		*info = htonl((pp_ptr-iph)<<24);
 	return -EINVAL;
 }
+
+int ip_options_compile(struct net *net,
+		       struct ip_options *opt, struct sk_buff *skb)
+{
+	int ret;
+	__be32 info;
+
+	ret = __ip_options_compile(net, opt, skb, &info);
+	if (ret != 0 && skb)
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+	return ret;
+}
 EXPORT_SYMBOL(ip_options_compile);
 
 /*
---
David Miller Feb. 25, 2019, 1:33 a.m. UTC | #14
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Fri, 22 Feb 2019 19:35:29 +0300

> I tried to analyze the cases of using icmp_send in kernel. It
> indirectly used by many protocols: ARP, IP, UDP, Netfilter, IPVS,
> IPIP, GRE over IP, CLIP, XFRM, CIPSOv4.  Different IP tunnels and
> XFRM operating directly over IP layer and if using own skb->cb data,
> having IP header data in front of it. CLIP uses icmp_send for
> packets from arp queue only.  So, If I right, only TCP layer moves
> IP header data and only CIPSOv4 operates on both IP and TCP layers
> now.

Ok.
diff mbox series

Patch

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6ac3a5b..e0f709d 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,6 +22,7 @@ 
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -39,7 +40,13 @@  struct icmp_err {
 struct sk_buff;
 struct net;
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt);
+static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+}
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b..234d12e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1735,13 +1735,27 @@  int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
+	if (ip_options_compile(dev_net(skb->dev), opt, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 065997f..3f24414 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@  static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -691,7 +692,7 @@  void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +743,7 @@  void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)