NETWORKING: avoid use IPCB in cipso_v4_error
diff mbox series

Message ID 6691891549984203@myt5-a323eb993ef7.qloud-c.yandex.net
State New
Headers show
Series
  • NETWORKING: avoid use IPCB in cipso_v4_error
Related show

Commit Message

Nazarov Sergey Feb. 12, 2019, 3:10 p.m. UTC
Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
above IP layer.
This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.

The original discussion is here:
https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
---
 include/net/icmp.h    |    9 ++++++++-
 net/ipv4/cipso_ipv4.c |   18 ++++++++++++++++--
 net/ipv4/icmp.c       |    7 ++++---
 3 files changed, 28 insertions(+), 6 deletions(-)

--

Comments

Paul Moore Feb. 13, 2019, 9:41 p.m. UTC | #1
On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
> Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> above IP layer.
> This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>
> The original discussion is here:
> https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>
> Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
> ---
>  include/net/icmp.h    |    9 ++++++++-
>  net/ipv4/cipso_ipv4.c |   18 ++++++++++++++++--
>  net/ipv4/icmp.c       |    7 ++++---
>  3 files changed, 28 insertions(+), 6 deletions(-)

Hi Sergey,

Thanks for your work on finding this and putting a fix together.  As
we discussed previously, I think this looks good, but can you describe
the testing you did to verify that this works correctly?

> diff --git a/include/net/icmp.h b/include/net/icmp.h
> index 6ac3a5b..e0f709d 100644
> --- a/include/net/icmp.h
> +++ b/include/net/icmp.h
> @@ -22,6 +22,7 @@
>
>  #include <net/inet_sock.h>
>  #include <net/snmp.h>
> +#include <net/ip.h>
>
>  struct icmp_err {
>    int          errno;
> @@ -39,7 +40,13 @@ struct icmp_err {
>  struct sk_buff;
>  struct net;
>
> -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
> +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
> +                const struct ip_options *opt);
> +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
> +{
> +       __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
> +}
> +
>  int icmp_rcv(struct sk_buff *skb);
>  int icmp_err(struct sk_buff *skb, u32 info);
>  int icmp_init(void);
> diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
> index 777fa3b..234d12e 100644
> --- a/net/ipv4/cipso_ipv4.c
> +++ b/net/ipv4/cipso_ipv4.c
> @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>   */
>  void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>  {
> +       unsigned char optbuf[sizeof(struct ip_options) + 40];
> +       struct ip_options *opt = (struct ip_options *)optbuf;
> +
>         if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>                 return;
>
> +       /*
> +        * We might be called above the IP layer,
> +        * so we can not use icmp_send and IPCB here.
> +        */
> +
> +       memset(opt, 0, sizeof(struct ip_options));
> +       opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
> +       memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
> +       if (ip_options_compile(dev_net(skb->dev), opt, NULL))
> +               return;
> +
>         if (gateway)
> -               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
> +               __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>         else
> -               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
> +               __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>  }
>
>  /**
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 065997f..3f24414 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>   *                     MUST reply to only the first fragment.
>   */
>
> -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
> +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
> +                const struct ip_options *opt)
>  {
>         struct iphdr *iph;
>         int room;
> @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>                                           iph->tos;
>         mark = IP4_REPLY_MARK(net, skb_in->mark);
>
> -       if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
> +       if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>                 goto out_unlock;
>
>
> @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>         local_bh_enable();
>  out:;
>  }
> -EXPORT_SYMBOL(icmp_send);
> +EXPORT_SYMBOL(__icmp_send);
>
>
>  static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
> --
>
David Miller Feb. 14, 2019, 4:43 p.m. UTC | #2
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Tue, 12 Feb 2019 18:10:03 +0300

> Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> above IP layer.
> This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> 
> The original discussion is here:
> https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> 
> Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>

This problem is not unique to Cipso, net/atm/clip.c's error handler
has the same exact issue.

I didn't scan more of the tree, there are probably a couple more
locations as well.
Nazarov Sergey Feb. 14, 2019, 6 p.m. UTC | #3
Hi, Paul!
I've found the problem and testing it with some very specific custom lsm module. The test case was simple:
standard TCP/IP client-server application, where server opens CIPSO labeled TCP socket, and client connecting
to this socket with forbidden labels. After several connections kernel crashing with general memory protection or
kernel cache inconsistent error.
I think, the similar behaviour should be with selinux or smack in the same conditions. But I don't know them
so good to reproduce situation.
After applying patch, I haven't kernel crashes.
But now I've made additional checks and found no response icmp packets. The ip_options_compile requires
CAP_NET_RAW capability when CIPSO option compiling, if skb is NULL. I have no other ideas than returning to
the early patch version with ip_options_compile modified. What do you think about that?

14.02.2019, 00:42, "Paul Moore" <paul@paul-moore.com>:
> On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
>>  Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  above IP layer.
>>  This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>
>>  The original discussion is here:
>>  https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>
>>  Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>  ---
>>   include/net/icmp.h | 9 ++++++++-
>>   net/ipv4/cipso_ipv4.c | 18 ++++++++++++++++--
>>   net/ipv4/icmp.c | 7 ++++---
>>   3 files changed, 28 insertions(+), 6 deletions(-)
>
> Hi Sergey,
>
> Thanks for your work on finding this and putting a fix together. As
> we discussed previously, I think this looks good, but can you describe
> the testing you did to verify that this works correctly?
>
>>  diff --git a/include/net/icmp.h b/include/net/icmp.h
>>  index 6ac3a5b..e0f709d 100644
>>  --- a/include/net/icmp.h
>>  +++ b/include/net/icmp.h
>>  @@ -22,6 +22,7 @@
>>
>>   #include <net/inet_sock.h>
>>   #include <net/snmp.h>
>>  +#include <net/ip.h>
>>
>>   struct icmp_err {
>>     int errno;
>>  @@ -39,7 +40,13 @@ struct icmp_err {
>>   struct sk_buff;
>>   struct net;
>>
>>  -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
>>  +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>  + const struct ip_options *opt);
>>  +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>  +{
>>  + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
>>  +}
>>  +
>>   int icmp_rcv(struct sk_buff *skb);
>>   int icmp_err(struct sk_buff *skb, u32 info);
>>   int icmp_init(void);
>>  diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
>>  index 777fa3b..234d12e 100644
>>  --- a/net/ipv4/cipso_ipv4.c
>>  +++ b/net/ipv4/cipso_ipv4.c
>>  @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>>    */
>>   void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>>   {
>>  + unsigned char optbuf[sizeof(struct ip_options) + 40];
>>  + struct ip_options *opt = (struct ip_options *)optbuf;
>>  +
>>          if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>>                  return;
>>
>>  + /*
>>  + * We might be called above the IP layer,
>>  + * so we can not use icmp_send and IPCB here.
>>  + */
>>  +
>>  + memset(opt, 0, sizeof(struct ip_options));
>>  + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
>>  + memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
>>  + if (ip_options_compile(dev_net(skb->dev), opt, NULL))
>>  + return;
>>  +
>>          if (gateway)
>>  - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
>>  + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>>          else
>>  - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
>>  + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>>   }
>>
>>   /**
>>  diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
>>  index 065997f..3f24414 100644
>>  --- a/net/ipv4/icmp.c
>>  +++ b/net/ipv4/icmp.c
>>  @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>>    * MUST reply to only the first fragment.
>>    */
>>
>>  -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>  +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>  + const struct ip_options *opt)
>>   {
>>          struct iphdr *iph;
>>          int room;
>>  @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>                                            iph->tos;
>>          mark = IP4_REPLY_MARK(net, skb_in->mark);
>>
>>  - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
>>  + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>>                  goto out_unlock;
>>
>>  @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>          local_bh_enable();
>>   out:;
>>   }
>>  -EXPORT_SYMBOL(icmp_send);
>>  +EXPORT_SYMBOL(__icmp_send);
>>
>>   static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
>>  --
>
> --
> paul moore
> www.paul-moore.com
Nazarov Sergey Feb. 14, 2019, 6:14 p.m. UTC | #4
Now the problem comes from TCP layer only. Is the IP over ATM operates over IP layer?

14.02.2019, 19:43, "David Miller" <davem@davemloft.net>:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Tue, 12 Feb 2019 18:10:03 +0300
>
>>  Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  above IP layer.
>>  This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>
>>  The original discussion is here:
>>  https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>
>>  Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>
> This problem is not unique to Cipso, net/atm/clip.c's error handler
> has the same exact issue.
>
> I didn't scan more of the tree, there are probably a couple more
> locations as well.
Stephen Smalley Feb. 14, 2019, 6:59 p.m. UTC | #5
On 2/14/19 1:00 PM, Nazarov Sergey wrote:
> Hi, Paul!
> I've found the problem and testing it with some very specific custom lsm module. The test case was simple:
> standard TCP/IP client-server application, where server opens CIPSO labeled TCP socket, and client connecting
> to this socket with forbidden labels. After several connections kernel crashing with general memory protection or
> kernel cache inconsistent error.
> I think, the similar behaviour should be with selinux or smack in the same conditions. But I don't know them
> so good to reproduce situation.

For SELinux, you can use
https://github.com/SELinuxProject/selinux-testsuite

That includes testing of CIPSO, both connecting from a client with an 
authorized level and from a client with an unauthorized level.

Not sure about Smack; there were some tests in LTP but I don't know if 
they would exercise it.

> After applying patch, I haven't kernel crashes.
> But now I've made additional checks and found no response icmp packets. The ip_options_compile requires
> CAP_NET_RAW capability when CIPSO option compiling, if skb is NULL. I have no other ideas than returning to
> the early patch version with ip_options_compile modified. What do you think about that?


> 
> 14.02.2019, 00:42, "Paul Moore" <paul@paul-moore.com>:
>> On Tue, Feb 12, 2019 at 10:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
>>>   Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>>   icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>>   But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>>   above IP layer.
>>>   This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>>   introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>>
>>>   The original discussion is here:
>>>   https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>>
>>>   Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>>   ---
>>>    include/net/icmp.h | 9 ++++++++-
>>>    net/ipv4/cipso_ipv4.c | 18 ++++++++++++++++--
>>>    net/ipv4/icmp.c | 7 ++++---
>>>    3 files changed, 28 insertions(+), 6 deletions(-)
>>
>> Hi Sergey,
>>
>> Thanks for your work on finding this and putting a fix together. As
>> we discussed previously, I think this looks good, but can you describe
>> the testing you did to verify that this works correctly?
>>
>>>   diff --git a/include/net/icmp.h b/include/net/icmp.h
>>>   index 6ac3a5b..e0f709d 100644
>>>   --- a/include/net/icmp.h
>>>   +++ b/include/net/icmp.h
>>>   @@ -22,6 +22,7 @@
>>>
>>>    #include <net/inet_sock.h>
>>>    #include <net/snmp.h>
>>>   +#include <net/ip.h>
>>>
>>>    struct icmp_err {
>>>      int errno;
>>>   @@ -39,7 +40,13 @@ struct icmp_err {
>>>    struct sk_buff;
>>>    struct net;
>>>
>>>   -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
>>>   +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>>   + const struct ip_options *opt);
>>>   +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>   +{
>>>   + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
>>>   +}
>>>   +
>>>    int icmp_rcv(struct sk_buff *skb);
>>>    int icmp_err(struct sk_buff *skb, u32 info);
>>>    int icmp_init(void);
>>>   diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
>>>   index 777fa3b..234d12e 100644
>>>   --- a/net/ipv4/cipso_ipv4.c
>>>   +++ b/net/ipv4/cipso_ipv4.c
>>>   @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
>>>     */
>>>    void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
>>>    {
>>>   + unsigned char optbuf[sizeof(struct ip_options) + 40];
>>>   + struct ip_options *opt = (struct ip_options *)optbuf;
>>>   +
>>>           if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
>>>                   return;
>>>
>>>   + /*
>>>   + * We might be called above the IP layer,
>>>   + * so we can not use icmp_send and IPCB here.
>>>   + */
>>>   +
>>>   + memset(opt, 0, sizeof(struct ip_options));
>>>   + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
>>>   + memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
>>>   + if (ip_options_compile(dev_net(skb->dev), opt, NULL))
>>>   + return;
>>>   +
>>>           if (gateway)
>>>   - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
>>>   + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
>>>           else
>>>   - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
>>>   + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
>>>    }
>>>
>>>    /**
>>>   diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
>>>   index 065997f..3f24414 100644
>>>   --- a/net/ipv4/icmp.c
>>>   +++ b/net/ipv4/icmp.c
>>>   @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
>>>     * MUST reply to only the first fragment.
>>>     */
>>>
>>>   -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>   +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
>>>   + const struct ip_options *opt)
>>>    {
>>>           struct iphdr *iph;
>>>           int room;
>>>   @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>                                             iph->tos;
>>>           mark = IP4_REPLY_MARK(net, skb_in->mark);
>>>
>>>   - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
>>>   + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
>>>                   goto out_unlock;
>>>
>>>   @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>>           local_bh_enable();
>>>    out:;
>>>    }
>>>   -EXPORT_SYMBOL(icmp_send);
>>>   +EXPORT_SYMBOL(__icmp_send);
>>>
>>>    static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
>>>   --
>>
>> --
>> paul moore
>> www.paul-moore.com
Paul Moore Feb. 15, 2019, 7:02 p.m. UTC | #6
On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Tue, 12 Feb 2019 18:10:03 +0300
>
> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> > above IP layer.
> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >
> > The original discussion is here:
> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >
> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>
> This problem is not unique to Cipso, net/atm/clip.c's error handler
> has the same exact issue.
>
> I didn't scan more of the tree, there are probably a couple more
> locations as well.

David, are you happy with Sergey's solution as a fix for this?

If so, would you prefer a respin of this patch to apply the to the
other broken callers (e.g. net/atm/clip.c), or would you rather merge
this patch and deal with the other callers in separate patches?
David Miller Feb. 15, 2019, 8 p.m. UTC | #7
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 15 Feb 2019 14:02:31 -0500

> On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
>> From: Nazarov Sergey <s-nazarov@yandex.ru>
>> Date: Tue, 12 Feb 2019 18:10:03 +0300
>>
>> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>> > above IP layer.
>> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>> >
>> > The original discussion is here:
>> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>> >
>> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>
>> This problem is not unique to Cipso, net/atm/clip.c's error handler
>> has the same exact issue.
>>
>> I didn't scan more of the tree, there are probably a couple more
>> locations as well.
> 
> David, are you happy with Sergey's solution as a fix for this?
> 
> If so, would you prefer a respin of this patch to apply the to the
> other broken callers (e.g. net/atm/clip.c), or would you rather merge
> this patch and deal with the other callers in separate patches?

I'd like the other broken callers to be handled.
Paul Moore Feb. 15, 2019, 8:04 p.m. UTC | #8
On Fri, Feb 15, 2019 at 3:00 PM David Miller <davem@davemloft.net> wrote:
> From: Paul Moore <paul@paul-moore.com>
> Date: Fri, 15 Feb 2019 14:02:31 -0500
>
> > On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> >> From: Nazarov Sergey <s-nazarov@yandex.ru>
> >> Date: Tue, 12 Feb 2019 18:10:03 +0300
> >>
> >> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> >> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> >> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> >> > above IP layer.
> >> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> >> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >> >
> >> > The original discussion is here:
> >> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >> >
> >> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
> >>
> >> This problem is not unique to Cipso, net/atm/clip.c's error handler
> >> has the same exact issue.
> >>
> >> I didn't scan more of the tree, there are probably a couple more
> >> locations as well.
> >
> > David, are you happy with Sergey's solution as a fix for this?
> >
> > If so, would you prefer a respin of this patch to apply the to the
> > other broken callers (e.g. net/atm/clip.c), or would you rather merge
> > this patch and deal with the other callers in separate patches?
>
> I'd like the other broken callers to be handled.

Sergey, do you think you could fix the other callers too, or do you
want some help with that?
Nazarov Sergey Feb. 18, 2019, 1:39 p.m. UTC | #9
I think, it would not be a good solution, if I will analyze all subsystems using icmp_send, because I do not have enough knowledge for this.
I propose to add a new function, for example, ismp_send_safe, something like that:

void icmp_send_safe(struct sk_buff *skb_in, int type, int code, __be32 info)
{
	unsigned char optbuf[sizeof(struct ip_options) + 40];
	struct ip_options *opt = (struct ip_options *)optbuf;

	memset(opt, 0, sizeof(struct ip_options));
	opt->optlen = ip_hdr(skb_in)->ihl*4 - sizeof(struct iphdr);
	memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb_in)[1]), opt->optlen);
	if (ip_options_compile(dev_net(skb_in->dev), opt, NULL))
		return;
	__icmp_send(skb_in, type, code, info, opt);
}

which could be used when the code works at different network stack layers.
And the subsystems developers, knowing their code well, will decide which one to use.

15.02.2019, 23:04, "Paul Moore" <paul@paul-moore.com>:
> On Fri, Feb 15, 2019 at 3:00 PM David Miller <davem@davemloft.net> wrote:
>>  From: Paul Moore <paul@paul-moore.com>
>>  Date: Fri, 15 Feb 2019 14:02:31 -0500
>>
>>  > On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
>>  >> From: Nazarov Sergey <s-nazarov@yandex.ru>
>>  >> Date: Tue, 12 Feb 2019 18:10:03 +0300
>>  >>
>>  >> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>>  >> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>>  >> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>>  >> > above IP layer.
>>  >> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>>  >> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>>  >> >
>>  >> > The original discussion is here:
>>  >> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>>  >> >
>>  >> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>  >>
>>  >> This problem is not unique to Cipso, net/atm/clip.c's error handler
>>  >> has the same exact issue.
>>  >>
>>  >> I didn't scan more of the tree, there are probably a couple more
>>  >> locations as well.
>>  >
>>  > David, are you happy with Sergey's solution as a fix for this?
>>  >
>>  > If so, would you prefer a respin of this patch to apply the to the
>>  > other broken callers (e.g. net/atm/clip.c), or would you rather merge
>>  > this patch and deal with the other callers in separate patches?
>>
>>  I'd like the other broken callers to be handled.
>
> Sergey, do you think you could fix the other callers too, or do you
> want some help with that?
>
> --
> paul moore
> www.paul-moore.com
David Miller Feb. 19, 2019, 1:25 a.m. UTC | #10
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 18 Feb 2019 16:39:11 +0300

> I think, it would not be a good solution, if I will analyze all
> subsystems using icmp_send, because I do not have enough knowledge
> for this.  I propose to add a new function, for example,
> ismp_send_safe, something like that:

Please don't do this.

Solve the problem properly by auditing each case, there aren't a lot and
it is not too difficult to see the upcall sites.

Patch
diff mbox series

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6ac3a5b..e0f709d 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,6 +22,7 @@ 
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -39,7 +40,13 @@  struct icmp_err {
 struct sk_buff;
 struct net;
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt);
+static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+}
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b..234d12e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1735,13 +1735,27 @@  int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen);
+	if (ip_options_compile(dev_net(skb->dev), opt, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 065997f..3f24414 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@  static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -691,7 +692,7 @@  void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +743,7 @@  void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)