From patchwork Fri Jun 12 16:49:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrea Mayer X-Patchwork-Id: 11602043 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 437A8913 for ; Fri, 12 Jun 2020 17:03:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 33BE42084D for ; Fri, 12 Jun 2020 17:03:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726275AbgFLRDK (ORCPT ); Fri, 12 Jun 2020 13:03:10 -0400 Received: from smtp.uniroma2.it ([160.80.6.16]:43544 "EHLO smtp.uniroma2.it" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726290AbgFLRDJ (ORCPT ); Fri, 12 Jun 2020 13:03:09 -0400 X-Greylist: delayed 670 seconds by postgrey-1.27 at vger.kernel.org; Fri, 12 Jun 2020 13:02:58 EDT Received: from localhost.localdomain ([160.80.103.126]) by smtp-2015.uniroma2.it (8.14.4/8.14.4/Debian-8) with ESMTP id 05CGpXZq019363 (version=TLSv1/SSLv3 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT); Fri, 12 Jun 2020 18:51:34 +0200 From: Andrea Mayer To: David Ahern , "David S. Miller" , Shrijeet Mukherjee , Jakub Kicinski , Shuah Khan , netdev@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Cc: Donald Sharp , Roopa Prabhu , Dinesh Dutt , Stefano Salsano , Paolo Lungaroni , Ahmed Abdelsalam , Andrea Mayer Subject: [RFC,net-next, 1/5] l3mdev: add infrastructure for table to VRF mapping Date: Fri, 12 Jun 2020 18:49:33 +0200 Message-Id: <20200612164937.5468-2-andrea.mayer@uniroma2.it> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200612164937.5468-1-andrea.mayer@uniroma2.it> References: <20200612164937.5468-1-andrea.mayer@uniroma2.it> MIME-Version: 1.0 X-Virus-Scanned: clamav-milter 0.100.0 at smtp-2015 X-Virus-Status: Clean Sender: linux-kselftest-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org Add infrastructure to l3mdev (the core code for Layer 3 master devices) in order to find out the corresponding VRF device for a given table id. Therefore, the l3mdev implementations: - can register a callback that returns the device index of the l3mdev associated with a given table id; - can offer the lookup function (table to VRF device). Signed-off-by: Andrea Mayer --- include/net/l3mdev.h | 37 +++++++++++++++++ net/l3mdev/l3mdev.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e942372b077b..37aed117455f 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -10,6 +10,14 @@ #include #include +enum l3mdev_type { + L3MDEV_TYPE_UNSPEC, + L3MDEV_TYPE_VRF, + __L3MDEV_TYPE_MAX +}; + +#define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1) + /** * struct l3mdev_ops - l3mdev operations * @@ -37,6 +45,15 @@ struct l3mdev_ops { #ifdef CONFIG_NET_L3_MASTER_DEV +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + int (*fn)(struct net *net, u32 table_id)); + +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + int (*fn)(struct net *net, u32 table_id)); + +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, + u32 table_id); + int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); @@ -280,6 +297,26 @@ struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) return skb; } +static inline +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + int (*fn)(struct net *net, u32 table_id)) +{ + return -EOPNOTSUPP; +} + +static inline +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + int (*fn)(struct net *net, u32 table_id)) +{ +} + +static inline +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, + u32 table_id) +{ + return -ENODEV; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index f35899d45a9a..6cc1fe7eb039 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -9,6 +9,101 @@ #include #include +DEFINE_SPINLOCK(l3mdev_lock); + +typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d); + +struct l3mdev_handler { + lookup_by_table_id_t dev_lookup; +}; + +static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1]; + +static int l3mdev_check_type(enum l3mdev_type l3type) +{ + if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX) + return -EINVAL; + + return 0; +} + +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ + struct l3mdev_handler *hdlr; + int res; + + res = l3mdev_check_type(l3type); + if (res) + return res; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + if (hdlr->dev_lookup) { + res = -EBUSY; + goto unlock; + } + + hdlr->dev_lookup = fn; + res = 0; + +unlock: + spin_unlock(&l3mdev_lock); + + return res; +} +EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register); + +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ + struct l3mdev_handler *hdlr; + + if (l3mdev_check_type(l3type)) + return; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + if (hdlr->dev_lookup == fn) + hdlr->dev_lookup = NULL; + + spin_unlock(&l3mdev_lock); +} +EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister); + +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, + struct net *net, u32 table_id) +{ + lookup_by_table_id_t lookup; + struct l3mdev_handler *hdlr; + int ifindex = -EINVAL; + int res; + + res = l3mdev_check_type(l3type); + if (res) + return res; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + lookup = hdlr->dev_lookup; + if (!lookup) + goto unlock; + + ifindex = lookup(net, table_id); + +unlock: + spin_unlock(&l3mdev_lock); + + return ifindex; +} +EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id); + /** * l3mdev_master_ifindex - get index of L3 master device * @dev: targeted interface From patchwork Fri Jun 12 16:49:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrea Mayer X-Patchwork-Id: 11602041 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E4FB9913 for ; Fri, 12 Jun 2020 17:03:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CC7B320801 for ; Fri, 12 Jun 2020 17:03:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726496AbgFLRDb (ORCPT ); Fri, 12 Jun 2020 13:03:31 -0400 Received: from smtp.uniroma2.it ([160.80.6.16]:43544 "EHLO smtp.uniroma2.it" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726463AbgFLRDV (ORCPT ); Fri, 12 Jun 2020 13:03:21 -0400 X-Greylist: delayed 670 seconds by postgrey-1.27 at vger.kernel.org; Fri, 12 Jun 2020 13:02:58 EDT Received: from localhost.localdomain ([160.80.103.126]) by smtp-2015.uniroma2.it (8.14.4/8.14.4/Debian-8) with ESMTP id 05CGpXZr019363 (version=TLSv1/SSLv3 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT); Fri, 12 Jun 2020 18:51:34 +0200 From: Andrea Mayer To: David Ahern , "David S. Miller" , Shrijeet Mukherjee , Jakub Kicinski , Shuah Khan , netdev@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Cc: Donald Sharp , Roopa Prabhu , Dinesh Dutt , Stefano Salsano , Paolo Lungaroni , Ahmed Abdelsalam , Andrea Mayer Subject: [RFC,net-next, 2/5] vrf: track associations between VRF devices and tables Date: Fri, 12 Jun 2020 18:49:34 +0200 Message-Id: <20200612164937.5468-3-andrea.mayer@uniroma2.it> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200612164937.5468-1-andrea.mayer@uniroma2.it> References: <20200612164937.5468-1-andrea.mayer@uniroma2.it> MIME-Version: 1.0 X-Virus-Scanned: clamav-milter 0.100.0 at smtp-2015 X-Virus-Status: Clean Sender: linux-kselftest-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org Add the data structures and the processing logic to keep track of the associations between VRF devices and routing tables. When a VRF is instantiated, it needs to refer to a given routing table. For each table, we explicitly keep track of the existing VRFs that refer to the table. Signed-off-by: Andrea Mayer --- drivers/net/vrf.c | 272 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 267 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 43928a1c2f2a..f772aac6a04c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -35,12 +36,74 @@ #include #define DRV_NAME "vrf" -#define DRV_VERSION "1.0" +#define DRV_VERSION "1.1" #define FIB_RULE_PREF 1000 /* default preference for FIB rules */ +#define HT_MAP_BITS 4 +#define HASH_INITVAL ((u32)0xcafef00d) + +struct vrf_map { + DECLARE_HASHTABLE(ht, HT_MAP_BITS); + spinlock_t vmap_lock; + + /* shared_tables: + * count how many distinct tables does not comply with the + * strict mode requirement. + * shared_table value must be 0 in order to switch to strict mode. + * + * example of evolution of shared_table: + * | time + * add vrf0 --> table 100 shared_tables = 0 | t0 + * add vrf1 --> table 101 shared_tables = 0 | t1 + * add vrf2 --> table 100 shared_tables = 1 | t2 + * add vrf3 --> table 100 shared_tables = 1 | t3 + * add vrf4 --> table 101 shared_tables = 2 v t4 + * + * shared_tables is a "step function" (or "staircase function") + * and is increased by one when the second vrf is associated to a table + * + * at t2, vrf0 and vrf2 are bound to table 100: shared_table = 1. + * + * at t3, another dev (vrf3) is bound to the same table 100 but the + * shared_table counters is still 1. + * This means that no matter how many new vrfs will register on the + * table 100, the shared_table will not increase (considering only + * table 100). + * + * at t4, vrf4 is bound to table 101, and shared_table = 2. + * + * Looking at the value of shared_tables we can immediately know if + * the strict_mode can or cannot be enforced. Indeed, strict_mode + * can be enforced iff shared_table = 0. + * + * Conversely, shared_table is decreased when a vrf is de-associated + * from a table with exactly two associated vrfs. + */ + int shared_tables; + + bool strict_mode; +}; + +struct vrf_map_elem { + struct hlist_node hnode; + struct list_head vrf_list; /* VRFs registered to this table */ + + u32 table_id; + int users; + int ifindex; +}; + static unsigned int vrf_net_id; +/* per netns vrf data */ +struct netns_vrf { + /* protected by rtnl lock */ + bool add_fib_rules; + + struct vrf_map vmap; +}; + struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; @@ -48,6 +111,9 @@ struct net_vrf { struct fib6_table *fib6_table; #endif u32 tb_id; + + struct list_head me_list; /* entry in vrf_map_elem */ + int ifindex; }; struct pcpu_dstats { @@ -103,6 +169,173 @@ static void vrf_get_stats64(struct net_device *dev, } } +static struct vrf_map *netns_vrf_map(struct net *net) +{ + struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); + + return &nn_vrf->vmap; +} + +static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev) +{ + return netns_vrf_map(dev_net(dev)); +} + +static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags) +{ + struct vrf_map_elem *me; + + me = kmalloc(sizeof(*me), flags); + if (!me) + return NULL; + + return me; +} + +static void vrf_map_elem_free(struct vrf_map_elem *me) +{ + kfree(me); +} + +static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id, + int ifindex, int users) +{ + me->table_id = table_id; + me->ifindex = ifindex; + me->users = users; + INIT_LIST_HEAD(&me->vrf_list); +} + +static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap, + u32 table_id) +{ + struct vrf_map_elem *me; + u32 key; + + key = jhash_1word(table_id, HASH_INITVAL); + hash_for_each_possible(vmap->ht, me, hnode, key) { + if (me->table_id == table_id) + return me; + } + + return NULL; +} + +static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me) +{ + u32 table_id = me->table_id; + u32 key; + + key = jhash_1word(table_id, HASH_INITVAL); + hash_add(vmap->ht, &me->hnode, key); +} + +static void vrf_map_del_elem(struct vrf_map_elem *me) +{ + hash_del(&me->hnode); +} + +static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock) +{ + spin_lock(&vmap->vmap_lock); +} + +static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock) +{ + spin_unlock(&vmap->vmap_lock); +} + +/* called with rtnl lock held */ +static int +vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack) +{ + struct vrf_map *vmap = netns_vrf_map_by_dev(dev); + struct net_vrf *vrf = netdev_priv(dev); + struct vrf_map_elem *new_me, *me; + u32 table_id = vrf->tb_id; + bool free_new_me = false; + int users; + int res; + + /* we pre-allocate elements used in the spin-locked section (so that we + * keep the spinlock as short as possibile). + */ + new_me = vrf_map_elem_alloc(GFP_KERNEL); + if (!new_me) + return -ENOMEM; + + vrf_map_elem_init(new_me, table_id, dev->ifindex, 0); + + vrf_map_lock(vmap); + + me = vrf_map_lookup_elem(vmap, table_id); + if (!me) { + me = new_me; + vrf_map_add_elem(vmap, me); + goto link_vrf; + } + + /* we already have an entry in the vrf_map, so it means there is (at + * least) a vrf registered on the specific table. + */ + free_new_me = true; + if (vmap->strict_mode) { + /* vrfs cannot share the same table */ + NL_SET_ERR_MSG(extack, "Table is used by another VRF"); + res = -EBUSY; + goto unlock; + } + +link_vrf: + users = ++me->users; + if (users == 2) + ++vmap->shared_tables; + + list_add(&vrf->me_list, &me->vrf_list); + + res = 0; + +unlock: + vrf_map_unlock(vmap); + + /* clean-up, if needed */ + if (free_new_me) + vrf_map_elem_free(new_me); + + return res; +} + +/* called with rtnl lock held */ +static void vrf_map_unregister_dev(struct net_device *dev) +{ + struct vrf_map *vmap = netns_vrf_map_by_dev(dev); + struct net_vrf *vrf = netdev_priv(dev); + u32 table_id = vrf->tb_id; + struct vrf_map_elem *me; + int users; + + vrf_map_lock(vmap); + + me = vrf_map_lookup_elem(vmap, table_id); + if (!me) + goto unlock; + + list_del(&vrf->me_list); + + users = --me->users; + if (users == 1) { + --vmap->shared_tables; + } else if (users == 0) { + vrf_map_del_elem(me); + + /* no one will refer to this element anymore */ + vrf_map_elem_free(me); + } + +unlock: + vrf_map_unlock(vmap); +} + /* by default VRF devices do not have a qdisc and are expected * to be created with only a single queue. */ @@ -1319,6 +1552,8 @@ static void vrf_dellink(struct net_device *dev, struct list_head *head) netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); + vrf_map_unregister_dev(dev); + unregister_netdevice_queue(dev, head); } @@ -1327,6 +1562,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net_vrf *vrf = netdev_priv(dev); + struct netns_vrf *nn_vrf; bool *add_fib_rules; struct net *net; int err; @@ -1349,11 +1585,26 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; + /* mapping between table_id and vrf; + * note: such binding could not be done in the dev init function + * because dev->ifindex id is not available yet. + */ + vrf->ifindex = dev->ifindex; + + err = vrf_map_register_dev(dev, extack); + if (err) { + unregister_netdevice(dev); + goto out; + } + net = dev_net(dev); - add_fib_rules = net_generic(net, vrf_net_id); + nn_vrf = net_generic(net, vrf_net_id); + + add_fib_rules = &nn_vrf->add_fib_rules; if (*add_fib_rules) { err = vrf_add_fib_rules(dev); if (err) { + vrf_map_unregister_dev(dev); unregister_netdevice(dev); goto out; } @@ -1440,12 +1691,23 @@ static struct notifier_block vrf_notifier_block __read_mostly = { .notifier_call = vrf_device_event, }; +static int vrf_map_init(struct vrf_map *vmap) +{ + spin_lock_init(&vmap->vmap_lock); + hash_init(vmap->ht); + + vmap->strict_mode = false; + + return 0; +} + /* Initialize per network namespace state */ static int __net_init vrf_netns_init(struct net *net) { - bool *add_fib_rules = net_generic(net, vrf_net_id); + struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); - *add_fib_rules = true; + nn_vrf->add_fib_rules = true; + vrf_map_init(&nn_vrf->vmap); return 0; } @@ -1453,7 +1715,7 @@ static int __net_init vrf_netns_init(struct net *net) static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, .id = &vrf_net_id, - .size = sizeof(bool), + .size = sizeof(struct netns_vrf), }; static int __init vrf_init_module(void) From patchwork Fri Jun 12 16:49:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrea Mayer X-Patchwork-Id: 11602045 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DF45E618 for ; Fri, 12 Jun 2020 17:03:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CF48120801 for ; Fri, 12 Jun 2020 17:03:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726340AbgFLRDG (ORCPT ); Fri, 12 Jun 2020 13:03:06 -0400 Received: from smtp.uniroma2.it ([160.80.6.16]:43544 "EHLO smtp.uniroma2.it" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726089AbgFLRDD (ORCPT ); Fri, 12 Jun 2020 13:03:03 -0400 X-Greylist: delayed 670 seconds by postgrey-1.27 at vger.kernel.org; Fri, 12 Jun 2020 13:02:58 EDT Received: from localhost.localdomain ([160.80.103.126]) by smtp-2015.uniroma2.it (8.14.4/8.14.4/Debian-8) with ESMTP id 05CGpXZs019363 (version=TLSv1/SSLv3 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT); Fri, 12 Jun 2020 18:51:34 +0200 From: Andrea Mayer To: David Ahern , "David S. Miller" , Shrijeet Mukherjee , Jakub Kicinski , Shuah Khan , netdev@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Cc: Donald Sharp , Roopa Prabhu , Dinesh Dutt , Stefano Salsano , Paolo Lungaroni , Ahmed Abdelsalam , Andrea Mayer Subject: [RFC,net-next, 3/5] vrf: add sysctl parameter for strict mode Date: Fri, 12 Jun 2020 18:49:35 +0200 Message-Id: <20200612164937.5468-4-andrea.mayer@uniroma2.it> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200612164937.5468-1-andrea.mayer@uniroma2.it> References: <20200612164937.5468-1-andrea.mayer@uniroma2.it> MIME-Version: 1.0 X-Virus-Scanned: clamav-milter 0.100.0 at smtp-2015 X-Virus-Status: Clean Sender: linux-kselftest-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org Add net.vrf.strict_mode sysctl parameter. When net.vrf.strict_mode=0 (default) it is possible to associate multiple VRF devices to the same table. Conversely, when net.vrf.strict_mode=1 a table can be associated to a single VRF device. When switching from net.vrf.strict_mode=0 to net.vrf.strict_mode=1, a check is performed to verify that all tables have at most one VRF associated, otherwise the switch is not allowed. The net.vrf.strict_mode parameter is per network namespace. Signed-off-by: Andrea Mayer --- drivers/net/vrf.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f772aac6a04c..bac118b615bc 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -102,6 +102,7 @@ struct netns_vrf { bool add_fib_rules; struct vrf_map vmap; + struct ctl_table_header *ctl_hdr; }; struct net_vrf { @@ -245,6 +246,52 @@ static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock) spin_unlock(&vmap->vmap_lock); } +static bool vrf_strict_mode(struct vrf_map *vmap) +{ + bool strict_mode; + + vrf_map_lock(vmap); + strict_mode = vmap->strict_mode; + vrf_map_unlock(vmap); + + return strict_mode; +} + +static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode) +{ + bool *cur_mode; + int res = 0; + + vrf_map_lock(vmap); + + cur_mode = &vmap->strict_mode; + if (*cur_mode == new_mode) + goto unlock; + + if (*cur_mode) { + /* disable strict mode */ + *cur_mode = false; + } else { + if (vmap->shared_tables) { + /* we cannot allow strict_mode because there are some + * vrfs that share one or more tables. + */ + res = -EBUSY; + goto unlock; + } + + /* no tables are shared among vrfs, so we can go back + * to 1:1 association between a vrf with its table. + */ + *cur_mode = true; + } + +unlock: + vrf_map_unlock(vmap); + + return res; +} + /* called with rtnl lock held */ static int vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack) @@ -1701,19 +1748,91 @@ static int vrf_map_init(struct vrf_map *vmap) return 0; } +static int vrf_shared_table_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = (struct net *)table->extra1; + struct vrf_map *vmap = netns_vrf_map(net); + int proc_strict_mode = 0; + struct ctl_table tmp = { + .procname = table->procname, + .data = &proc_strict_mode, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + int ret; + + if (!write) + proc_strict_mode = vrf_strict_mode(vmap); + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode); + + return ret; +} + +static const struct ctl_table vrf_table[] = { + { + .procname = "strict_mode", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = vrf_shared_table_handler, + /* set by the vrf_netns_init */ + .extra1 = NULL, + }, + { }, +}; + /* Initialize per network namespace state */ static int __net_init vrf_netns_init(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); + struct ctl_table *table; + int res; nn_vrf->add_fib_rules = true; vrf_map_init(&nn_vrf->vmap); + table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + /* init the extra1 parameter with the reference to current netns */ + table[0].extra1 = net; + + nn_vrf->ctl_hdr = register_net_sysctl(net, "net/vrf", table); + if (!nn_vrf->ctl_hdr) { + res = -ENOMEM; + goto free_table; + } + return 0; + +free_table: + kfree(table); + + return res; +} + +static void __net_exit vrf_netns_exit(struct net *net) +{ + struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); + struct ctl_table *table; + + table = nn_vrf->ctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(nn_vrf->ctl_hdr); + kfree(table); } static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, + .exit = vrf_netns_exit, .id = &vrf_net_id, .size = sizeof(struct netns_vrf), }; From patchwork Fri Jun 12 16:49:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrea Mayer X-Patchwork-Id: 11602047 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 11B93138C for ; Fri, 12 Jun 2020 17:03:50 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0318520801 for ; Fri, 12 Jun 2020 17:03:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726219AbgFLRDA (ORCPT ); Fri, 12 Jun 2020 13:03:00 -0400 Received: from smtp.uniroma2.it ([160.80.6.16]:43544 "EHLO smtp.uniroma2.it" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726089AbgFLRDA (ORCPT ); Fri, 12 Jun 2020 13:03:00 -0400 X-Greylist: delayed 670 seconds by postgrey-1.27 at vger.kernel.org; Fri, 12 Jun 2020 13:02:58 EDT Received: from localhost.localdomain ([160.80.103.126]) by smtp-2015.uniroma2.it (8.14.4/8.14.4/Debian-8) with ESMTP id 05CGpXZt019363 (version=TLSv1/SSLv3 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT); Fri, 12 Jun 2020 18:51:34 +0200 From: Andrea Mayer To: David Ahern , "David S. Miller" , Shrijeet Mukherjee , Jakub Kicinski , Shuah Khan , netdev@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Cc: Donald Sharp , Roopa Prabhu , Dinesh Dutt , Stefano Salsano , Paolo Lungaroni , Ahmed Abdelsalam , Andrea Mayer Subject: [RFC,net-next, 4/5] vrf: add l3mdev registration for table to VRF device lookup Date: Fri, 12 Jun 2020 18:49:36 +0200 Message-Id: <20200612164937.5468-5-andrea.mayer@uniroma2.it> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200612164937.5468-1-andrea.mayer@uniroma2.it> References: <20200612164937.5468-1-andrea.mayer@uniroma2.it> MIME-Version: 1.0 X-Virus-Scanned: clamav-milter 0.100.0 at smtp-2015 X-Virus-Status: Clean Sender: linux-kselftest-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org During the initialization phase of the VRF module, the callback for table to VRF device lookup is registered in l3mdev. Signed-off-by: Andrea Mayer --- drivers/net/vrf.c | 59 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index bac118b615bc..65d5f5ff4c67 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -182,6 +182,19 @@ static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev) return netns_vrf_map(dev_net(dev)); } +static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me) +{ + struct list_head *me_head = &me->vrf_list; + struct net_vrf *vrf; + + if (list_empty(me_head)) + return -ENODEV; + + vrf = list_first_entry(me_head, struct net_vrf, me_list); + + return vrf->ifindex; +} + static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags) { struct vrf_map_elem *me; @@ -383,6 +396,34 @@ static void vrf_map_unregister_dev(struct net_device *dev) vrf_map_unlock(vmap); } +/* returns the vrf device index associated with the table_id */ +static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id) +{ + struct vrf_map *vmap = netns_vrf_map(net); + struct vrf_map_elem *me; + int ifindex; + + vrf_map_lock(vmap); + + if (!vmap->strict_mode) { + ifindex = -EPERM; + goto unlock; + } + + me = vrf_map_lookup_elem(vmap, table_id); + if (!me) { + ifindex = -ENODEV; + goto unlock; + } + + ifindex = vrf_map_elem_get_vrf_ifindex(me); + +unlock: + vrf_map_unlock(vmap); + + return ifindex; +} + /* by default VRF devices do not have a qdisc and are expected * to be created with only a single queue. */ @@ -1847,14 +1888,24 @@ static int __init vrf_init_module(void) if (rc < 0) goto error; + rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF, + vrf_ifindex_lookup_by_table_id); + if (rc < 0) + goto unreg_pernet; + rc = rtnl_link_register(&vrf_link_ops); - if (rc < 0) { - unregister_pernet_subsys(&vrf_net_ops); - goto error; - } + if (rc < 0) + goto table_lookup_unreg; return 0; +table_lookup_unreg: + l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF, + vrf_ifindex_lookup_by_table_id); + +unreg_pernet: + unregister_pernet_subsys(&vrf_net_ops); + error: unregister_netdevice_notifier(&vrf_notifier_block); return rc; From patchwork Fri Jun 12 16:49:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrea Mayer X-Patchwork-Id: 11602039 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 381D6618 for ; Fri, 12 Jun 2020 17:03:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1A5BB20882 for ; Fri, 12 Jun 2020 17:03:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726474AbgFLRDY (ORCPT ); Fri, 12 Jun 2020 13:03:24 -0400 Received: from smtp.uniroma2.it ([160.80.6.16]:43544 "EHLO smtp.uniroma2.it" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726432AbgFLRDR (ORCPT ); Fri, 12 Jun 2020 13:03:17 -0400 X-Greylist: delayed 670 seconds by postgrey-1.27 at vger.kernel.org; Fri, 12 Jun 2020 13:02:58 EDT Received: from localhost.localdomain ([160.80.103.126]) by smtp-2015.uniroma2.it (8.14.4/8.14.4/Debian-8) with ESMTP id 05CGpXZu019363 (version=TLSv1/SSLv3 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT); Fri, 12 Jun 2020 18:51:34 +0200 From: Andrea Mayer To: David Ahern , "David S. Miller" , Shrijeet Mukherjee , Jakub Kicinski , Shuah Khan , netdev@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Cc: Donald Sharp , Roopa Prabhu , Dinesh Dutt , Stefano Salsano , Paolo Lungaroni , Ahmed Abdelsalam , Andrea Mayer Subject: [RFC,net-next, 5/5] selftests: add selftest for the VRF strict mode Date: Fri, 12 Jun 2020 18:49:37 +0200 Message-Id: <20200612164937.5468-6-andrea.mayer@uniroma2.it> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200612164937.5468-1-andrea.mayer@uniroma2.it> References: <20200612164937.5468-1-andrea.mayer@uniroma2.it> MIME-Version: 1.0 X-Virus-Scanned: clamav-milter 0.100.0 at smtp-2015 X-Virus-Status: Clean Sender: linux-kselftest-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org The new strict mode functionality is tested in different configurations and on different network namespaces. Signed-off-by: Andrea Mayer --- .../selftests/net/vrf_strict_mode_test.sh | 390 ++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100755 tools/testing/selftests/net/vrf_strict_mode_test.sh diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh new file mode 100755 index 000000000000..5274f4a1fba1 --- /dev/null +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -0,0 +1,390 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is designed for testing the new VRF strict_mode functionality. + +ret=0 + +# identifies the "init" network namespace which is often called root network +# namespace. +INIT_NETNS_NAME="init" + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +ip_expand_args() +{ + local nsname=$1 + local nsarg="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + nsarg="-netns ${nsname}" + fi + + echo "${nsarg}" +} + +vrf_count() +{ + local nsname=$1 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -o link show type vrf | wc -l +} + +count_vrf_by_table_id() +{ + local nsname=$1 + local tableid=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l +} + +add_vrf() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null +} + +add_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +add_vrf_and_check_fail() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +del_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link del ${vrfname} + log_test $? 0 "${nsname}: remove vrf ${vrfname}" +} + +config_vrf_and_check() +{ + local nsname=$1 + local addr=$2 + local vrfname=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link set dev ${vrfname} up && \ + ip ${nsarg} addr add ${addr} dev ${vrfname} + log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}" +} + +read_strict_mode() +{ + local nsname=$1 + local rval + local rc=0 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \ + grep -E "^[0-1]$")" &> /dev/null + if [ $? -ne 0 ]; then + # set errors + rval=255 + rc=1 + fi + + # on success, rval can be only 0 or 1; on error, rval is equal to 255 + echo ${rval} + return ${rc} +} + +read_strict_mode_compare_and_check() +{ + local nsname=$1 + local expected=$2 + local res + + res="$(read_strict_mode ${nsname})" + log_test ${res} ${expected} "${nsname}: check strict_mode=${res}" +} + +set_strict_mode() +{ + local nsname=$1 + local val=$2 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null +} + +enable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 1 +} + +disable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 0 +} + +disable_strict_mode_and_check() +{ + local nsname=$1 + + disable_strict_mode ${nsname} + log_test $? 0 "${nsname}: disable strict_mode (=0)" +} + +enable_strict_mode_and_check() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 0 "${nsname}: enable strict_mode (=1)" +} + +enable_strict_mode_and_check_fail() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 1 "${nsname}: CANNOT enable strict_mode" +} + +strict_mode_check_default() +{ + local nsname=$1 + local strictmode + local vrfcnt + + vrfcnt=$(vrf_count ${nsname}) + strictmode=$(read_strict_mode ${nsname}) + log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs" +} + +setup() +{ + modprobe vrf + + ip netns add testns + ip netns exec testns ip link set lo up +} + +cleanup() +{ + ip netns del testns 2>/dev/null + + ip link del vrf100 2>/dev/null + ip link del vrf101 2>/dev/null + ip link del vrf102 2>/dev/null + + echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null +} + +vrf_strict_mode_tests_init() +{ + vrf_strict_mode_check_support init + + strict_mode_check_default init + + add_vrf_and_check init vrf100 100 + config_vrf_and_check init 172.16.100.1/24 vrf100 + + enable_strict_mode_and_check init + + add_vrf_and_check_fail init vrf101 100 + + disable_strict_mode_and_check init + + add_vrf_and_check init vrf101 100 + config_vrf_and_check init 172.16.101.1/24 vrf101 + + enable_strict_mode_and_check_fail init + + del_vrf_and_check init vrf101 + + enable_strict_mode_and_check init + + add_vrf_and_check init vrf102 102 + config_vrf_and_check init 172.16.102.1/24 vrf102 + + # the strict_modle is enabled in the init +} + +vrf_strict_mode_tests_testns() +{ + vrf_strict_mode_check_support testns + + strict_mode_check_default testns + + enable_strict_mode_and_check testns + + add_vrf_and_check testns vrf100 100 + config_vrf_and_check testns 10.0.100.1/24 vrf100 + + add_vrf_and_check_fail testns vrf101 100 + + add_vrf_and_check_fail testns vrf102 100 + + add_vrf_and_check testns vrf200 200 + + disable_strict_mode_and_check testns + + add_vrf_and_check testns vrf101 100 + + add_vrf_and_check testns vrf102 100 + + #the strict_mode is disabled in the testns +} + +vrf_strict_mode_tests_mix() +{ + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 + + del_vrf_and_check testns vrf101 + + del_vrf_and_check testns vrf102 + + disable_strict_mode_and_check init + + enable_strict_mode_and_check testns + + enable_strict_mode_and_check init + enable_strict_mode_and_check init + + disable_strict_mode_and_check testns + disable_strict_mode_and_check testns + + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 +} + +vrf_strict_mode_tests() +{ + log_section "VRF strict_mode test on init network namespace" + vrf_strict_mode_tests_init + + log_section "VRF strict_mode test on testns network namespace" + vrf_strict_mode_tests_testns + + log_section "VRF strict_mode test mixing init and testns network namespaces" + vrf_strict_mode_tests_mix +} + +vrf_strict_mode_check_support() +{ + local nsname=$1 + local output + local rc + + output="$(lsmod | grep '^vrf' | awk '{print $1}')" + if [ -z "${output}" ]; then + modinfo vrf || return $? + fi + + # we do not care about the value of the strict_mode; we only check if + # the strict_mode parameter is available or not. + read_strict_mode ${nsname} &>/dev/null; rc=$? + log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available" + + return ${rc} +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +cleanup &> /dev/null + +setup +vrf_strict_mode_tests +cleanup + +print_log_test_results + +exit $ret