From patchwork Tue Mar 28 02:57:56 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13190368 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7E599C6FD1D for ; Tue, 28 Mar 2023 02:58:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229946AbjC1C6E (ORCPT ); Mon, 27 Mar 2023 22:58:04 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50672 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229436AbjC1C6D (ORCPT ); Mon, 27 Mar 2023 22:58:03 -0400 Received: from 167-179-156-38.a7b39c.syd.nbn.aussiebb.net (167-179-156-38.a7b39c.syd.nbn.aussiebb.net [167.179.156.38]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D36F91BFF for ; Mon, 27 Mar 2023 19:58:01 -0700 (PDT) Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.94.2 #2 (Debian)) id 1pgzX2-009NnY-U5; Tue, 28 Mar 2023 10:57:57 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Tue, 28 Mar 2023 10:57:56 +0800 From: "Herbert Xu" Date: Tue, 28 Mar 2023 10:57:56 +0800 Subject: [PATCH 1/2] macvlan: Skip broadcast queue if multicast with single receiver References: To: netdev@vger.kernel.org Message-Id: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org As it stands all broadcast and multicast packets are queued and processed in a work queue. This is so that we don't overwhelm the receive softirq path by generating thousands of packets or more (see commit 412ca1550cbe "macvlan: Move broadcasts into a work queue"). As such all multicast packets will be delayed, even if they will be received by a single macvlan device. As using a workqueue is not free in terms of latency, we should avoid this where possible. This patch adds a new filter to determine which addresses should be delayed and which ones won't. This is done using a crude counter of how many times an address has been added to the macvlan port (ha->synced). For now if an address has been added more than once, then it will be considered to be broadcast. This could be tuned further by making this threshold configurable. Signed-off-by: Herbert Xu --- drivers/net/macvlan.c | 74 +++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 99a971929c8e..62b4748d3836 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -50,6 +50,7 @@ struct macvlan_port { u32 flags; int count; struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE]; + DECLARE_BITMAP(bc_filter, MACVLAN_MC_FILTER_SZ); DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); unsigned char perm_addr[ETH_ALEN]; }; @@ -291,6 +292,31 @@ static void macvlan_broadcast(struct sk_buff *skb, } } +static void macvlan_multicast_rx(const struct macvlan_port *port, + const struct macvlan_dev *src, + struct sk_buff *skb) +{ + if (!src) + /* frame comes from an external address */ + macvlan_broadcast(skb, port, NULL, + MACVLAN_MODE_PRIVATE | + MACVLAN_MODE_VEPA | + MACVLAN_MODE_PASSTHRU| + MACVLAN_MODE_BRIDGE); + else if (src->mode == MACVLAN_MODE_VEPA) + /* flood to everyone except source */ + macvlan_broadcast(skb, port, src->dev, + MACVLAN_MODE_VEPA | + MACVLAN_MODE_BRIDGE); + else + /* + * flood only to VEPA ports, bridge ports + * already saw the frame on the way out. + */ + macvlan_broadcast(skb, port, src->dev, + MACVLAN_MODE_VEPA); +} + static void macvlan_process_broadcast(struct work_struct *w) { struct macvlan_port *port = container_of(w, struct macvlan_port, @@ -308,27 +334,7 @@ static void macvlan_process_broadcast(struct work_struct *w) const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; rcu_read_lock(); - - if (!src) - /* frame comes from an external address */ - macvlan_broadcast(skb, port, NULL, - MACVLAN_MODE_PRIVATE | - MACVLAN_MODE_VEPA | - MACVLAN_MODE_PASSTHRU| - MACVLAN_MODE_BRIDGE); - else if (src->mode == MACVLAN_MODE_VEPA) - /* flood to everyone except source */ - macvlan_broadcast(skb, port, src->dev, - MACVLAN_MODE_VEPA | - MACVLAN_MODE_BRIDGE); - else - /* - * flood only to VEPA ports, bridge ports - * already saw the frame on the way out. - */ - macvlan_broadcast(skb, port, src->dev, - MACVLAN_MODE_VEPA); - + macvlan_multicast_rx(port, src, skb); rcu_read_unlock(); if (src) @@ -476,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) } hash = mc_hash(NULL, eth->h_dest); - if (test_bit(hash, port->mc_filter)) + if (test_bit(hash, port->bc_filter)) macvlan_broadcast_enqueue(port, src, skb); + else if (test_bit(hash, port->mc_filter)) + macvlan_multicast_rx(port, src, skb); return RX_HANDLER_PASS; } @@ -780,20 +788,27 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) static void macvlan_compute_filter(unsigned long *mc_filter, struct net_device *dev, - struct macvlan_dev *vlan) + struct macvlan_dev *vlan, int cutoff) { if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { - bitmap_fill(mc_filter, MACVLAN_MC_FILTER_SZ); + if (cutoff >= 0) + bitmap_fill(mc_filter, MACVLAN_MC_FILTER_SZ); + else + bitmap_zero(mc_filter, MACVLAN_MC_FILTER_SZ); } else { - struct netdev_hw_addr *ha; DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ); + struct netdev_hw_addr *ha; bitmap_zero(filter, MACVLAN_MC_FILTER_SZ); netdev_for_each_mc_addr(ha, dev) { + if (cutoff >= 0 && ha->synced <= cutoff) + continue; + __set_bit(mc_hash(vlan, ha->addr), filter); } - __set_bit(mc_hash(vlan, dev->broadcast), filter); + if (cutoff >= 0) + __set_bit(mc_hash(vlan, dev->broadcast), filter); bitmap_copy(mc_filter, filter, MACVLAN_MC_FILTER_SZ); } @@ -803,7 +818,7 @@ static void macvlan_set_mac_lists(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - macvlan_compute_filter(vlan->mc_filter, dev, vlan); + macvlan_compute_filter(vlan->mc_filter, dev, vlan, 0); dev_uc_sync(vlan->lowerdev, dev); dev_mc_sync(vlan->lowerdev, dev); @@ -821,7 +836,10 @@ static void macvlan_set_mac_lists(struct net_device *dev) * The solution is to maintain a list of broadcast addresses like * we do for uc/mc, if you care. */ - macvlan_compute_filter(vlan->port->mc_filter, vlan->lowerdev, NULL); + macvlan_compute_filter(vlan->port->mc_filter, vlan->lowerdev, NULL, + 0); + macvlan_compute_filter(vlan->port->bc_filter, vlan->lowerdev, NULL, + 1); } static int macvlan_change_mtu(struct net_device *dev, int new_mtu) From patchwork Tue Mar 28 02:57:59 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Herbert Xu X-Patchwork-Id: 13190369 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 73AE5C6FD1D for ; Tue, 28 Mar 2023 02:58:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230473AbjC1C6J (ORCPT ); Mon, 27 Mar 2023 22:58:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50680 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229436AbjC1C6F (ORCPT ); Mon, 27 Mar 2023 22:58:05 -0400 Received: from 167-179-156-38.a7b39c.syd.nbn.aussiebb.net (167-179-156-38.a7b39c.syd.nbn.aussiebb.net [167.179.156.38]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ED2CB1BFF for ; Mon, 27 Mar 2023 19:58:03 -0700 (PDT) Received: from loth.rohan.me.apana.org.au ([192.168.167.2]) by formenos.hmeau.com with smtp (Exim 4.94.2 #2 (Debian)) id 1pgzX5-009Nng-3N; Tue, 28 Mar 2023 10:58:00 +0800 Received: by loth.rohan.me.apana.org.au (sSMTP sendmail emulation); Tue, 28 Mar 2023 10:57:59 +0800 From: "Herbert Xu" Date: Tue, 28 Mar 2023 10:57:59 +0800 Subject: [PATCH 2/2] macvlan: Add netlink attribute for broadcast cutoff References: To: netdev@vger.kernel.org Message-Id: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org Make the broadcast cutoff configurable through netlink. Note that macvlan is weird because there is no central device for us to configure (the lowerdev could be anything). So all the options are duplicated over what could be thousands of child devices. IFLA_MACVLAN_BC_QUEUE_LEN took the approach of taking the maximum of all child device settings. This is unnecessary as we could simply store the option in the port device and take the last child device that gets updated as the value to use. Signed-off-by: Herbert Xu --- drivers/net/macvlan.c | 31 +++++++++++++++++++++++++++++-- include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 62b4748d3836..4215106adc40 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -47,6 +47,7 @@ struct macvlan_port { struct sk_buff_head bc_queue; struct work_struct bc_work; u32 bc_queue_len_used; + int bc_cutoff; u32 flags; int count; struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE]; @@ -814,6 +815,12 @@ static void macvlan_compute_filter(unsigned long *mc_filter, } } +static void macvlan_recompute_bc_filter(struct macvlan_dev *vlan) +{ + macvlan_compute_filter(vlan->port->bc_filter, vlan->lowerdev, NULL, + vlan->port->bc_cutoff); +} + static void macvlan_set_mac_lists(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -838,8 +845,16 @@ static void macvlan_set_mac_lists(struct net_device *dev) */ macvlan_compute_filter(vlan->port->mc_filter, vlan->lowerdev, NULL, 0); - macvlan_compute_filter(vlan->port->bc_filter, vlan->lowerdev, NULL, - 1); + macvlan_recompute_bc_filter(vlan); +} + +static void update_port_bc_cutoff(struct macvlan_dev *vlan, int cutoff) +{ + if (vlan->port->bc_cutoff == cutoff) + return; + + vlan->port->bc_cutoff = cutoff; + macvlan_recompute_bc_filter(vlan); } static int macvlan_change_mtu(struct net_device *dev, int new_mtu) @@ -1254,6 +1269,7 @@ static int macvlan_port_create(struct net_device *dev) INIT_HLIST_HEAD(&port->vlan_source_hash[i]); port->bc_queue_len_used = 0; + port->bc_cutoff = 1; skb_queue_head_init(&port->bc_queue); INIT_WORK(&port->bc_work, macvlan_process_broadcast); @@ -1527,6 +1543,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); + if (data && data[IFLA_MACVLAN_BC_CUTOFF]) + update_port_bc_cutoff( + vlan, nla_get_s32(data[IFLA_MACVLAN_BC_CUTOFF])); + err = register_netdevice(dev); if (err < 0) goto destroy_macvlan_port; @@ -1623,6 +1643,10 @@ static int macvlan_changelink(struct net_device *dev, update_port_bc_queue_len(vlan->port); } + if (data && data[IFLA_MACVLAN_BC_CUTOFF]) + update_port_bc_cutoff( + vlan, nla_get_s32(data[IFLA_MACVLAN_BC_CUTOFF])); + if (set_mode) vlan->mode = mode; if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { @@ -1703,6 +1727,9 @@ static int macvlan_fill_info(struct sk_buff *skb, goto nla_put_failure; if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used)) goto nla_put_failure; + if (port->bc_cutoff != 1 && + nla_put_s32(skb, IFLA_MACVLAN_BC_CUTOFF, port->bc_cutoff)) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 57ceb788250f..8d679688efe0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -635,6 +635,7 @@ enum { IFLA_MACVLAN_MACADDR_COUNT, IFLA_MACVLAN_BC_QUEUE_LEN, IFLA_MACVLAN_BC_QUEUE_LEN_USED, + IFLA_MACVLAN_BC_CUTOFF, __IFLA_MACVLAN_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 901d98b865a1..39e659c83cfd 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -605,6 +605,7 @@ enum { IFLA_MACVLAN_MACADDR_COUNT, IFLA_MACVLAN_BC_QUEUE_LEN, IFLA_MACVLAN_BC_QUEUE_LEN_USED, + IFLA_MACVLAN_BC_CUTOFF, __IFLA_MACVLAN_MAX, };