From patchwork Thu Apr 30 19:21:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521295 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2439381 for ; Thu, 30 Apr 2020 19:21:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 15A7D2072A for ; Thu, 30 Apr 2020 19:21:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726746AbgD3TV4 (ORCPT ); Thu, 30 Apr 2020 15:21:56 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44306 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726660AbgD3TVz (ORCPT ); Thu, 30 Apr 2020 15:21:55 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAJ004128; Thu, 30 Apr 2020 22:21:52 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 01/16] net/core: Introduce netdev_get_xmit_slave Date: Thu, 30 Apr 2020 22:21:31 +0300 Message-Id: <20200430192146.12863-2-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add new ndo to get the xmit slave of master device. The reference counters are not incremented so the caller must be careful with locks. User can ask to get the xmit slave assume all the slaves can transmit by set all_slaves arg to true. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: David Ahern --- include/linux/netdevice.h | 12 ++++++++++++ net/core/dev.c | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..26bc0f11b7ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1146,6 +1146,12 @@ struct netdev_net_notifier { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * + * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, + * struct sk_buff *skb, + * bool all_slaves); + * Get the xmit slave of master device. If all_slaves is true, function + * assume all the slaves can transmit. + * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1389,6 +1395,9 @@ struct net_device_ops { struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2731,6 +2740,9 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 522288177bbd..294e59e20080 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7786,6 +7786,28 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +/** + * netdev_get_xmit_slave - Get the xmit slave of master device + * @skb: The packet + * @all_slaves: assume all the slaves are active + * + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + * %NULL is returned if no slave is found. + */ + +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_xmit_slave) + return NULL; + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); +} +EXPORT_SYMBOL(netdev_get_xmit_slave); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; From patchwork Thu Apr 30 19:21:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521327 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 044E61575 for ; Thu, 30 Apr 2020 19:22:32 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id EA31D2072A for ; Thu, 30 Apr 2020 19:22:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727804AbgD3TWZ (ORCPT ); Thu, 30 Apr 2020 15:22:25 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56869 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726366AbgD3TV7 (ORCPT ); Thu, 30 Apr 2020 15:21:59 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAK004128; Thu, 30 Apr 2020 22:21:52 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 02/16] bonding: Export skip slave logic to function Date: Thu, 30 Apr 2020 22:21:32 +0300 Message-Id: <20200430192146.12863-3-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org As a preparation for following change that add array of all slaves, extract code that skip slave to function. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_main.c | 47 ++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2e70e43c5df5..f7aded014f08 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4087,6 +4087,29 @@ static void bond_slave_arr_handler(struct work_struct *work) bond_slave_arr_work_rearm(bond, 1); } +static void bond_skip_slave(struct bond_up_slave *slaves, + struct slave *skipslave) +{ + int idx; + + /* Rare situation where caller has asked to skip a specific + * slave but allocation failed (most likely!). BTW this is + * only possible when the call is initiated from + * __bond_release_one(). In this situation; overwrite the + * skipslave entry in the array with the last entry from the + * array to avoid a situation where the xmit path may choose + * this to-be-skipped slave to send a packet out. + */ + for (idx = 0; slaves && idx < slaves->count; idx++) { + if (skipslave == slaves->arr[idx]) { + slaves->arr[idx] = + slaves->arr[slaves->count - 1]; + slaves->count--; + break; + } + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4156,27 +4179,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (old_arr) kfree_rcu(old_arr, rcu); out: - if (ret != 0 && skipslave) { - int idx; - - /* Rare situation where caller has asked to skip a specific - * slave but allocation failed (most likely!). BTW this is - * only possible when the call is initiated from - * __bond_release_one(). In this situation; overwrite the - * skipslave entry in the array with the last entry from the - * array to avoid a situation where the xmit path may choose - * this to-be-skipped slave to send a packet out. - */ - old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { - if (skipslave == old_arr->arr[idx]) { - old_arr->arr[idx] = - old_arr->arr[old_arr->count-1]; - old_arr->count--; - break; - } - } - } + if (ret != 0 && skipslave) + bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave); + return ret; } From patchwork Thu Apr 30 19:21:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521297 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BB5FE1575 for ; Thu, 30 Apr 2020 19:21:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id ACD402073E for ; Thu, 30 Apr 2020 19:21:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726961AbgD3TV4 (ORCPT ); Thu, 30 Apr 2020 15:21:56 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44311 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726746AbgD3TVz (ORCPT ); Thu, 30 Apr 2020 15:21:55 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAL004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 03/16] bonding: Rename slave_arr to usable_slaves Date: Thu, 30 Apr 2020 22:21:33 +0300 Message-Id: <20200430192146.12863-4-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Rename slave_arr to usable_slaves, since we will have two arrays, one for the usable slaves and the other to all slaves. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_alb.c | 4 ++-- drivers/net/bonding/bond_main.c | 40 ++++++++++++++++----------------- include/net/bonding.h | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c81698550e5a..7bb49b049dcc 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1360,7 +1360,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[hash_index % @@ -1494,7 +1494,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7aded014f08..2cb41d480ae2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4120,9 +4120,9 @@ static void bond_skip_slave(struct bond_up_slave *slaves, */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { + struct bond_up_slave *usable_slaves, *old_usable_slaves; struct slave *slave; struct list_head *iter; - struct bond_up_slave *new_arr, *old_arr; int agg_id = 0; int ret = 0; @@ -4130,11 +4130,10 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) WARN_ON(lockdep_is_held(&bond->mode_lock)); #endif - new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), - GFP_KERNEL); - if (!new_arr) { + usable_slaves = kzalloc(struct_size(usable_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves) { ret = -ENOMEM; - pr_err("Failed to build slave-array.\n"); goto out; } if (BOND_MODE(bond) == BOND_MODE_8023AD) { @@ -4142,14 +4141,14 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(new_arr, rcu); + kfree_rcu(usable_slaves, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_arr = rtnl_dereference(bond->slave_arr); - if (old_arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); - kfree_rcu(old_arr, rcu); + old_usable_slaves = rtnl_dereference(bond->usable_slaves); + if (old_usable_slaves) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(old_usable_slaves, rcu); } goto out; } @@ -4169,18 +4168,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) continue; slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", - new_arr->count); + usable_slaves->count); - new_arr->arr[new_arr->count++] = slave; + usable_slaves->arr[usable_slaves->count++] = slave; } - old_arr = rtnl_dereference(bond->slave_arr); - rcu_assign_pointer(bond->slave_arr, new_arr); - if (old_arr) - kfree_rcu(old_arr, rcu); + old_usable_slaves = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + if (old_usable_slaves) + kfree_rcu(old_usable_slaves, rcu); out: if (ret != 0 && skipslave) - bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave); + bond_skip_slave(rtnl_dereference(bond->usable_slaves), + skipslave); return ret; } @@ -4197,7 +4197,7 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) { slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; @@ -4488,9 +4488,9 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->slave_arr); + arr = rtnl_dereference(bond->usable_slaves); if (arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); + RCU_INIT_POINTER(bond->usable_slaves, NULL); kfree_rcu(arr, rcu); } diff --git a/include/net/bonding.h b/include/net/bonding.h index dc2ce31a1f52..33bdb6d5182d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -200,7 +200,7 @@ struct bonding { struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; - struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ + struct bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */ bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, From patchwork Thu Apr 30 19:21:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521325 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3D8F91575 for ; Thu, 30 Apr 2020 19:22:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 303182072A for ; Thu, 30 Apr 2020 19:22:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726366AbgD3TW0 (ORCPT ); Thu, 30 Apr 2020 15:22:26 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56873 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726660AbgD3TV7 (ORCPT ); Thu, 30 Apr 2020 15:21:59 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAM004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 04/16] bonding/alb: Add helper functions to get the xmit slave Date: Thu, 30 Apr 2020 22:21:34 +0300 Message-Id: <20200430192146.12863-5-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add two helper functions to get the xmit slave of bond in alb or tlb mode. Extract the logic of find the xmit slave from the xmit flow to function. Xmit flow will xmit through this slave and in the following patches the new .ndo will call to the helper function to return the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_alb.c | 35 +++++++++++++++++++++++++--------- include/net/bond_alb.h | 4 ++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7bb49b049dcc..e863c694c309 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1334,11 +1334,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, return NETDEV_TX_OK; } -netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; struct slave *tx_slave = NULL; + struct ethhdr *eth_data; u32 hash_index; skb_reset_mac_header(skb); @@ -1369,20 +1369,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } } - return bond_do_alb_xmit(skb, bond, tx_slave); + return tx_slave; } -netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; + struct slave *tx_slave; + + tx_slave = bond_xmit_tlb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); +} + +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *tx_slave = NULL; static const __be32 ip_bcast = htonl(0xffffffff); - int hash_size = 0; + struct slave *tx_slave = NULL; + const u8 *hash_start = NULL; bool do_tx_balance = true; + struct ethhdr *eth_data; u32 hash_index = 0; - const u8 *hash_start = NULL; + int hash_size = 0; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); @@ -1501,7 +1510,15 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) count]; } } + return tx_slave; +} + +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; + tx_slave = bond_xmit_alb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index b3504fcd773d..f6af76c87a6c 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb); +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb); void bond_alb_monitor(struct work_struct *); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); From patchwork Thu Apr 30 19:21:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521305 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BE18F15E6 for ; Thu, 30 Apr 2020 19:22:02 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A746420870 for ; Thu, 30 Apr 2020 19:22:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727053AbgD3TWB (ORCPT ); Thu, 30 Apr 2020 15:22:01 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44351 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727052AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAN004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 05/16] bonding: Add helper function to get the xmit slave based on hash Date: Thu, 30 Apr 2020 22:21:35 +0300 Message-Id: <20200430192146.12863-6-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Both xor and 802.3ad modes use bond_xmit_hash to get the xmit slave. Export the logic to helper function so it could be used in the following patches by the .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_main.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2cb41d480ae2..8e6305955c75 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4185,6 +4185,23 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) return ret; } +static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, + struct sk_buff *skb, + struct bond_up_slave *slaves) +{ + struct slave *slave; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash(bond, skb); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + slave = slaves->arr[hash % count]; + return slave; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4193,18 +4210,15 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); - struct slave *slave; struct bond_up_slave *slaves; - unsigned int count; + struct slave *slave; slaves = rcu_dereference(bond->usable_slaves); - count = slaves ? READ_ONCE(slaves->count) : 0; - if (likely(count)) { - slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + if (likely(slave)) bond_dev_queue_xmit(bond, skb, slave->dev); - } else { + else bond_tx_drop(dev, skb); - } return NETDEV_TX_OK; } From patchwork Thu Apr 30 19:21:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521323 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id ECF7281 for ; Thu, 30 Apr 2020 19:22:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DFFA92072A for ; Thu, 30 Apr 2020 19:22:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726660AbgD3TW0 (ORCPT ); Thu, 30 Apr 2020 15:22:26 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56897 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727023AbgD3TV7 (ORCPT ); Thu, 30 Apr 2020 15:21:59 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAO004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 06/16] bonding: Add helper function to get the xmit slave in rr mode Date: Thu, 30 Apr 2020 22:21:36 +0300 Message-Id: <20200430192146.12863-7-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add helper function to get the xmit slave when bond is in round robin mode. Change bond_xmit_slave_id to bond_get_slave_by_id, then the logic for find the next slave for transmit could be used both by the xmit flow and the .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_main.c | 56 ++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e6305955c75..09c8485e965d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3923,16 +3923,15 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) } /** - * bond_xmit_slave_id - transmit skb through slave with slave_id + * bond_get_slave_by_id - get xmit slave with slave_id * @bond: bonding device that is transmitting - * @skb: buffer to transmit * @slave_id: slave id up to slave_cnt-1 through which to transmit * - * This function tries to transmit through slave with slave_id but in case + * This function tries to get slave with slave_id but in case * it fails, it tries to find the first available slave for transmission. - * The skb is consumed in all cases, thus the function is void. */ -static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static struct slave *bond_get_slave_by_id(struct bonding *bond, + int slave_id) { struct list_head *iter; struct slave *slave; @@ -3941,10 +3940,8 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl /* Here we start from the slave with slave_id */ bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } } @@ -3953,13 +3950,11 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } - /* no slave that can tx has been found */ - bond_tx_drop(bond->dev, skb); + + return NULL; } /** @@ -3995,10 +3990,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) return slave_id; } -static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, - struct net_device *bond_dev) +static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int slave_cnt; u32 slave_id; @@ -4020,21 +4014,31 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, if (iph->protocol == IPPROTO_IGMP) { slave = rcu_dereference(bond->curr_active_slave); if (slave) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_xmit_slave_id(bond, skb, 0); - return NETDEV_TX_OK; + return slave; + return bond_get_slave_by_id(bond, 0); } } non_igmp: slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { - slave_id = bond_rr_gen_slave_id(bond); - bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); - } else { - bond_tx_drop(bond_dev, skb); + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); } + return NULL; +} + +static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, + struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + slave = bond_xmit_roundrobin_slave_get(bond, skb); + if (slave) + bond_dev_queue_xmit(bond, skb, slave->dev); + else + bond_tx_drop(bond_dev, skb); return NETDEV_TX_OK; } From patchwork Thu Apr 30 19:21:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521303 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 27A8281 for ; Thu, 30 Apr 2020 19:22:02 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0EAC020836 for ; Thu, 30 Apr 2020 19:22:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727065AbgD3TWB (ORCPT ); Thu, 30 Apr 2020 15:22:01 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44356 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727053AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAP004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 07/16] bonding: Add function to get the xmit slave in active-backup mode Date: Thu, 30 Apr 2020 22:21:37 +0300 Message-Id: <20200430192146.12863-8-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add helper function to get the xmit slave in active-backup mode. It's only one line function that return the curr_active_slave, but it will used both in the xmit flow and by the new .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 09c8485e965d..1b0ae750d732 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4042,6 +4042,12 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return NETDEV_TX_OK; } +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + return rcu_dereference(bond->curr_active_slave); +} + /* In active-backup mode, we know that bond->curr_active_slave is always valid if * the bond has a usable interface. */ @@ -4051,7 +4057,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = rcu_dereference(bond->curr_active_slave); + slave = bond_xmit_activebackup_slave_get(bond, skb); if (slave) bond_dev_queue_xmit(bond, skb, slave->dev); else From patchwork Thu Apr 30 19:21:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521317 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DAD8A1575 for ; Thu, 30 Apr 2020 19:22:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CE3482073E for ; Thu, 30 Apr 2020 19:22:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727074AbgD3TWM (ORCPT ); Thu, 30 Apr 2020 15:22:12 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56903 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727027AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAQ004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 08/16] bonding: Add array of all slaves Date: Thu, 30 Apr 2020 22:21:38 +0300 Message-Id: <20200430192146.12863-9-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Keep all slaves in array so it could be used to get the xmit slave assume all the slaves are active. The logic to add slave to the array is like the usable slaves, except that we also add slaves that currently can't transmit - not up or active. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh --- drivers/net/bonding/bond_main.c | 78 +++++++++++++++++++++++++-------- include/net/bonding.h | 3 +- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1b0ae750d732..2de693f0262e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4120,6 +4120,38 @@ static void bond_skip_slave(struct bond_up_slave *slaves, } } +static void bond_set_slave_arr(struct bonding *bond, + struct bond_up_slave *usable_slaves, + struct bond_up_slave *all_slaves) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + kfree_rcu(usable, rcu); + + all = rtnl_dereference(bond->all_slaves); + rcu_assign_pointer(bond->all_slaves, all_slaves); + kfree_rcu(all, rcu); +} + +static void bond_reset_slave_arr(struct bonding *bond) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4130,7 +4162,7 @@ static void bond_skip_slave(struct bond_up_slave *slaves, */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { - struct bond_up_slave *usable_slaves, *old_usable_slaves; + struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; struct slave *slave; struct list_head *iter; int agg_id = 0; @@ -4142,7 +4174,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves = kzalloc(struct_size(usable_slaves, arr, bond->slave_cnt), GFP_KERNEL); - if (!usable_slaves) { + all_slaves = kzalloc(struct_size(all_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves || !all_slaves) { ret = -ENOMEM; goto out; } @@ -4151,20 +4185,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(usable_slaves, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - if (old_usable_slaves) { - RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(old_usable_slaves, rcu); - } + bond_reset_slave_arr(bond); goto out; } agg_id = ad_info.aggregator_id; } bond_for_each_slave(bond, slave, iter) { + if (skipslave == slave) + continue; + + all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg; @@ -4174,8 +4207,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) } if (!bond_slave_can_tx(slave)) continue; - if (skipslave == slave) - continue; slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", usable_slaves->count); @@ -4183,14 +4214,17 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves->arr[usable_slaves->count++] = slave; } - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - rcu_assign_pointer(bond->usable_slaves, usable_slaves); - if (old_usable_slaves) - kfree_rcu(old_usable_slaves, rcu); + bond_set_slave_arr(bond, usable_slaves, all_slaves); + return ret; out: - if (ret != 0 && skipslave) + if (ret != 0 && skipslave) { + bond_skip_slave(rtnl_dereference(bond->all_slaves), + skipslave); bond_skip_slave(rtnl_dereference(bond->usable_slaves), skipslave); + } + kfree_rcu(all_slaves, rcu); + kfree_rcu(usable_slaves, rcu); return ret; } @@ -4501,9 +4535,9 @@ void bond_setup(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct bond_up_slave *usable, *all; struct list_head *iter; struct slave *slave; - struct bond_up_slave *arr; bond_netpoll_cleanup(bond_dev); @@ -4512,10 +4546,16 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->usable_slaves); - if (arr) { + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(arr, rcu); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); } list_del(&bond->bond_list); diff --git a/include/net/bonding.h b/include/net/bonding.h index 33bdb6d5182d..b5e49bedbc9f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -200,7 +200,8 @@ struct bonding { struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; - struct bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */ + struct bond_up_slave __rcu *usable_slaves; + struct bond_up_slave __rcu *all_slaves; bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, From patchwork Thu Apr 30 19:21:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521311 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7D68B81 for ; Thu, 30 Apr 2020 19:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 655D22072A for ; Thu, 30 Apr 2020 19:22:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727089AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44385 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727065AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAR004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 09/16] bonding: Implement ndo_get_xmit_slave Date: Thu, 30 Apr 2020 22:21:39 +0300 Message-Id: <20200430192146.12863-10-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add implementation of ndo_get_xmit_slave. Find the slave by using the helper function according to the bond mode. If the caller set all_slaves to true, then it assumes that all slaves are available to transmit. Signed-off-by: Maor Gottlieb Reviewed-by: Jay Vosburgh Reviewed-by: Jiri Pirko --- drivers/net/bonding/bond_main.c | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2de693f0262e..39b1ad7edbb4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4347,6 +4347,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, return txq; } +static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, + struct sk_buff *skb, + bool all_slaves) +{ + struct bonding *bond = netdev_priv(master_dev); + struct bond_up_slave *slaves; + struct slave *slave = NULL; + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xmit_roundrobin_slave_get(bond, skb); + break; + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond, skb); + break; + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + if (all_slaves) + slaves = rcu_dereference(bond->all_slaves); + else + slaves = rcu_dereference(bond->usable_slaves); + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + break; + case BOND_MODE_BROADCAST: + break; + case BOND_MODE_ALB: + slave = bond_xmit_alb_slave_get(bond, skb); + break; + case BOND_MODE_TLB: + slave = bond_xmit_tlb_slave_get(bond, skb); + break; + default: + /* Should never happen, mode already checked */ + WARN_ONCE(true, "Unknown bonding mode"); + break; + } + + if (slave) + return slave->dev; + return NULL; +} + static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); @@ -4468,6 +4510,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, + .ndo_get_xmit_slave = bond_xmit_get_slave, }; static const struct device_type bond_type = { From patchwork Thu Apr 30 19:21:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521309 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 43E8081 for ; Thu, 30 Apr 2020 19:22:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 367342072A for ; Thu, 30 Apr 2020 19:22:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727049AbgD3TWF (ORCPT ); Thu, 30 Apr 2020 15:22:05 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56928 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727047AbgD3TWE (ORCPT ); Thu, 30 Apr 2020 15:22:04 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAS004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 10/16] net/mlx5: Change lag mutex lock to spin lock Date: Thu, 30 Apr 2020 22:21:40 +0300 Message-Id: <20200430192146.12863-11-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org The lag lock could be a spin lock, the critical section is short and there is no need that the thread will sleep. Change the lock that protects the LAG structure from mutex to spin lock. It is required for next patch that need to access this structure from context that we can't sleep. In addition there is no need to hold this lock when query the congestion counters. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 93052b07c76c..496a3408d771 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -42,7 +42,7 @@ * Beware of lock dependencies (preferably, no locks should be acquired * under it). */ -static DEFINE_MUTEX(lag_mutex); +static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, u8 remap_port2) @@ -297,9 +297,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (!dev0 || !dev1) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); tracker = ldev->tracker; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); @@ -481,9 +481,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, break; } - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev->tracker = tracker; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); if (changed) mlx5_queue_bond_work(ldev, 0); @@ -525,7 +525,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, if (fn >= MLX5_MAX_PORTS) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev->pf[fn].dev = dev; ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; @@ -533,7 +533,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, dev->priv.lag = ldev; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); } static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, @@ -548,11 +548,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, if (i == MLX5_MAX_PORTS) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); } /* Must be called with intf_mutex held */ @@ -630,10 +630,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_roce(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -644,10 +644,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_active(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -658,10 +658,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_sriov(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -687,7 +687,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) struct net_device *ndev = NULL; struct mlx5_lag *ldev; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) @@ -704,7 +704,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) dev_hold(ndev); unlock: - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return ndev; } @@ -746,7 +746,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, memset(values, 0, sizeof(*values) * num_counters); - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; @@ -756,18 +756,18 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, num_ports = 1; mdev[MLX5_LAG_P1] = dev; } + spin_unlock(&lag_lock); for (i = 0; i < num_ports; ++i) { ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); if (ret) - goto unlock; + goto free; for (j = 0; j < num_counters; ++j) values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); } -unlock: - mutex_unlock(&lag_mutex); +free: kvfree(out); return ret; } From patchwork Thu Apr 30 19:21:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521301 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6F9D91575 for ; Thu, 30 Apr 2020 19:22:01 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 57FD42072A for ; Thu, 30 Apr 2020 19:22:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727087AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44387 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727073AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAT004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 mlx5-next 11/16] net/mlx5: Add support to get lag physical port Date: Thu, 30 Apr 2020 22:21:41 +0300 Message-Id: <20200430192146.12863-12-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add function to get the device physical port of the lag slave. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 24 +++++++++++++++++++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 496a3408d771..5461fbe47c0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -710,6 +710,30 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) +{ + struct mlx5_lag *ldev; + u8 port = 0; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev_get(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->pf[MLX5_LAG_P1].netdev == slave) + port = MLX5_LAG_P1; + else + port = MLX5_LAG_P2; + + port = ldev->v2p_map[port]; + +unlock: + spin_unlock(&lag_lock); + return port; +} +EXPORT_SYMBOL(mlx5_lag_get_slave_port); + bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6f8f79ef829b..7b81b512d116 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1062,6 +1062,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, From patchwork Thu Apr 30 19:21:42 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521347 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 98A6E1575 for ; Thu, 30 Apr 2020 19:28:46 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7C1572072A for ; Thu, 30 Apr 2020 19:28:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726338AbgD3T2p (ORCPT ); Thu, 30 Apr 2020 15:28:45 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:57497 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726272AbgD3T2p (ORCPT ); Thu, 30 Apr 2020 15:28:45 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAU004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 rdma-next 12/16] RDMA: Group create AH arguments in struct Date: Thu, 30 Apr 2020 22:21:42 +0300 Message-Id: <20200430192146.12863-13-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Following patch adds additional argument to the create AH function, so it make sense to group ah_attr and flags arguments in struct. Signed-off-by: Maor Gottlieb Acked-by: Devesh Sharma Acked-by: Gal Pressman Acked-by: Weihang Li Reviewed-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 5 ++++- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 +- drivers/infiniband/hw/efa/efa.h | 3 +-- drivers/infiniband/hw/efa/efa_verbs.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_ah.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/mlx4/ah.c | 11 +++++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx5/ah.c | 5 +++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 9 +++++---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 2 +- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 5 +++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 2 +- drivers/infiniband/sw/rdmavt/ah.c | 11 ++++++----- drivers/infiniband/sw/rdmavt/ah.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 +++++---- include/rdma/ib_verbs.h | 9 +++++++-- 22 files changed, 66 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3bfadd8effcc..86be8a54a2d6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -502,6 +502,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata) { + struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; struct ib_ah *ah; int ret; @@ -521,8 +522,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->type = ah_attr->type; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + init_attr.ah_attr = ah_attr; + init_attr.flags = flags; - ret = device->ops.create_ah(ah, ah_attr, flags, udata); + ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { kfree(ah); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d98348e82422..5a7c090204c5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -631,11 +631,12 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; @@ -673,7 +674,8 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_CREATE_AH_SLEEPABLE)); + !(init_attr->flags & + RDMA_CREATE_AH_SLEEPABLE)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate HW AH"); return rc; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 18dd46f46cf4..204c0849ba28 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -170,7 +170,7 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index aa7396a1588a..45d519edb4c3 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -153,8 +153,7 @@ int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 5c57098a4aee..454b01b21e6a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1639,10 +1639,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) } int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct efa_dev *dev = to_edev(ibah->device); struct efa_com_create_ah_params params = {}; struct efa_ibv_create_ah_resp resp = {}; @@ -1650,7 +1650,7 @@ int efa_create_ah(struct ib_ah *ibah, struct efa_ah *ah = to_eah(ibah); int err; - if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Create address handle is not supported in atomic context\n"); err = -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 8a522e14ef62..5b2f9314edd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,13 +39,14 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); u16 vlan_id = 0xffff; bool vlan_en = false; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ecbfeb6dbdd4..e1032cec2b12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1208,8 +1208,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 02a169f8027b..5f8f8d5c0ce0 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) - +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) return -EINVAL; @@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; + struct rdma_ah_init_attr init_attr = {}; struct mlx4_ib_ah *mah = to_mah(ah); int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + init_attr.ah_attr = &slave_attr; + ret = mlx4_ib_create_ah(ah, &init_attr, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d188573187fa..182a237b87f7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -752,7 +752,7 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 80642dd359bc..9b59348d51b5 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -68,10 +68,11 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, } } -int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct mlx5_ib_ah *ah = to_mah(ibah); struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9b2baf119823..3a1977ab899b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1163,7 +1163,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 69a3e4f62fb1..bc3e3d741ca3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -388,14 +388,15 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_ah *ah = to_mah(ibah); - return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, - ah); + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), + init_attr->ah_attr, ah); } static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 2b7f00ac41b0..6eea02b18968 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -155,7 +155,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { u32 *ahid_addr; @@ -165,6 +165,7 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct rdma_ah_attr *attr = init_attr->ah_attr; struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 9780afcde780..8b73b3489f3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,7 +51,7 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a5bd3adaf90a..d6b94a713573 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2750,12 +2750,12 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { struct qedr_ah *ah = get_qedr_ah(ibah); - rdma_copy_ah_attr(&ah->attr, attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 18027844eb87..5e02387e068d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,7 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index faf7ecd7b3fa..ccbded2d26ce 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -509,9 +509,10 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) * * @return: 0 on success, otherwise errno. */ -int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct pvrdma_dev *dev = to_vdev(ibah->device); struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index e4a48f5c0c85..267702226f10 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -414,7 +414,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ee02c6176007..40480add7dd3 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -98,14 +98,14 @@ EXPORT_SYMBOL(rvt_check_ah); * * Return: 0 on success */ -int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct rvt_ah *ah = ibah_to_rvtah(ibah); struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(ibah->device, ah_attr)) + if (rvt_check_ah(ibah->device, init_attr->ah_attr)) return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); @@ -117,10 +117,11 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - rdma_copy_ah_attr(&ah->attr, ah_attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, + init_attr->ah_attr, ah); return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index bbb4d3bdec4e..40b7123fec76 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -50,8 +50,8 @@ #include -int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9dd4bd7aea92..b8a22af724e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -195,15 +195,16 @@ static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) rxe_drop_ref(pd); } -static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) return err; @@ -211,7 +212,7 @@ static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, if (err) return err; - rxe_init_av(attr, &ah->av); + rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbc5cfb57cd2..20ea26810349 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -880,6 +880,11 @@ struct ib_mr_status { */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); +struct rdma_ah_init_attr { + struct rdma_ah_attr *ah_attr; + u32 flags; +}; + enum rdma_ah_attr_type { RDMA_AH_ATTR_TYPE_UNDEFINED, RDMA_AH_ATTR_TYPE_IB, @@ -2403,8 +2408,8 @@ struct ib_device_ops { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); void (*destroy_ah)(struct ib_ah *ah, u32 flags); From patchwork Thu Apr 30 19:21:43 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521315 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 401011575 for ; Thu, 30 Apr 2020 19:22:11 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 327B02073E for ; Thu, 30 Apr 2020 19:22:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727030AbgD3TWJ (ORCPT ); Thu, 30 Apr 2020 15:22:09 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44414 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727074AbgD3TWB (ORCPT ); Thu, 30 Apr 2020 15:22:01 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAV004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 rdma-next 13/16] RDMA/core: Add LAG functionality Date: Thu, 30 Apr 2020 22:21:43 +0300 Message-Id: <20200430192146.12863-14-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add support to get the RoCE LAG xmit slave by building skb of the RoCE packet and call to master_get_xmit_slave. If driver wants to get the slave assume all slaves are available, then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/lag.c | 136 +++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 1 + include/rdma/lag.h | 23 ++++++ 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/lag.c create mode 100644 include/rdma/lag.h diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d1b14887960e..870f0fcd54d5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ - trace.o + trace.o lag.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c new file mode 100644 index 000000000000..a29533626a7c --- /dev/null +++ b/drivers/infiniband/core/lag.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#include +#include +#include + +static struct sk_buff *rdma_build_skb(struct ib_device *device, + struct net_device *netdev, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + struct udphdr *uh; + u8 smac[ETH_ALEN]; + bool is_ipv4; + int hdr_len; + + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw); + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev); + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr); + + skb = alloc_skb(hdr_len, flags); + if (!skb) + return NULL; + + skb->dev = netdev; + skb_reserve(skb, hdr_len); + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->source = htons(0xC000); + uh->dest = htons(ROCE_V2_UDP_DPORT); + uh->len = htons(sizeof(struct udphdr)); + + if (is_ipv4) { + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + iph->frag_off = 0; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iph->ihl = 0x5; + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct + iphdr)); + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12, + sizeof(struct in_addr)); + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12, + sizeof(struct in_addr)); + } else { + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label, + sizeof(*ip6h->flow_lbl)); + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw, + sizeof(struct in6_addr)); + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw, + sizeof(struct in6_addr)); + } + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6); + rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac); + memcpy(eth->h_source, smac, ETH_ALEN); + memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN); + + return skb; +} + +static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, + struct net_device *master, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave; + struct sk_buff *skb; + + skb = rdma_build_skb(device, master, ah_attr, flags); + if (!skb) + return ERR_PTR(-ENOMEM); + + rcu_read_lock(); + slave = netdev_get_xmit_slave(master, skb, + !!(device->lag_flags & + RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); + if (slave) + dev_hold(slave); + rcu_read_unlock(); + kfree_skb(skb); + return slave; +} + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave) +{ + if (xmit_slave) + dev_put(xmit_slave); +} + +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave = NULL; + struct net_device *master; + + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) + return NULL; + + rcu_read_lock(); + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr); + if (IS_ERR(master)) { + rcu_read_unlock(); + return master; + } + dev_hold(master); + rcu_read_unlock(); + + if (!netif_is_bond_master(master)) + goto put; + + slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags); +put: + dev_put(master); + return slave; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 20ea26810349..e6c18ec0365a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2714,6 +2714,7 @@ struct ib_device { /* Used by iWarp CM */ char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; + u32 lag_flags; }; struct ib_client_nl_info; diff --git a/include/rdma/lag.h b/include/rdma/lag.h new file mode 100644 index 000000000000..7c06ec9b2eef --- /dev/null +++ b/include/rdma/lag.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_LAG_H_ +#define _RDMA_LAG_H_ + +#include + +struct ib_device; +struct rdma_ah_attr; + +enum rdma_lag_flags { + RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 +}; + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave); +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags); + +#endif /* _RDMA_LAG_H_ */ From patchwork Thu Apr 30 19:21:44 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521307 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E319F81 for ; Thu, 30 Apr 2020 19:22:05 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D3DC420836 for ; Thu, 30 Apr 2020 19:22:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727100AbgD3TWE (ORCPT ); Thu, 30 Apr 2020 15:22:04 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56952 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727049AbgD3TWE (ORCPT ); Thu, 30 Apr 2020 15:22:04 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:53 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAW004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 rdma-next 14/16] RDMA/core: Get xmit slave for LAG Date: Thu, 30 Apr 2020 22:21:44 +0300 Message-Id: <20200430192146.12863-15-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add a call to rdma_lag_get_ah_roce_slave when Address handle is created. Lower driver can use it to select the QP's affinity port. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 61 +++++++++++++++++++++++---------- include/rdma/ib_verbs.h | 2 ++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 86be8a54a2d6..bf0249f76ae9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "core_priv.h" #include @@ -500,7 +501,8 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata) + struct ib_udata *udata, + struct net_device *xmit_slave) { struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; @@ -524,6 +526,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); init_attr.ah_attr = ah_attr; init_attr.flags = flags; + init_attr.xmit_slave = xmit_slave; ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { @@ -550,15 +553,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags) { const struct ib_gid_attr *old_sgid_attr; + struct net_device *slave; struct ib_ah *ah; int ret; ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (ret) return ERR_PTR(ret); - - ah = _rdma_create_ah(pd, ah_attr, flags, NULL); - + slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? + GFP_KERNEL : GFP_ATOMIC); + if (IS_ERR(slave)) { + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return (void *)slave; + } + ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); + rdma_lag_put_ah_roce_slave(slave); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } @@ -597,7 +607,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, + udata, NULL); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -1636,11 +1647,35 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + attr->xmit_slave = NULL; if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); if (ret) return ret; + + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + struct net_device *slave; + + /* + * If the user provided the qp_attr then we have to + * resolve it. Kerne users have to provide already + * resolved rdma_ah_attr's. + */ + if (udata) { + ret = ib_resolve_eth_dmac(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } + slave = rdma_lag_get_ah_roce_slave(qp->device, + &attr->ah_attr, + GFP_KERNEL); + if (IS_ERR(slave)) + goto out_av; + attr->xmit_slave = slave; + } } if (attr_mask & IB_QP_ALT_PATH) { /* @@ -1667,18 +1702,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } - /* - * If the user provided the qp_attr then we have to resolve it. Kernel - * users have to provide already resolved rdma_ah_attr's - */ - if (udata && (attr_mask & IB_QP_AV) && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto out; - } - if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, @@ -1720,8 +1743,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { + rdma_lag_put_ah_roce_slave(attr->xmit_slave); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); + } return ret; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6c18ec0365a..8d29f2f79da8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -883,6 +883,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct rdma_ah_init_attr { struct rdma_ah_attr *ah_attr; u32 flags; + struct net_device *xmit_slave; }; enum rdma_ah_attr_type { @@ -1272,6 +1273,7 @@ struct ib_qp_attr { u8 alt_port_num; u8 alt_timeout; u32 rate_limit; + struct net_device *xmit_slave; }; enum ib_wr_opcode { From patchwork Thu Apr 30 19:21:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521321 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3858681 for ; Thu, 30 Apr 2020 19:22:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2B4F72073E for ; Thu, 30 Apr 2020 19:22:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727768AbgD3TWS (ORCPT ); Thu, 30 Apr 2020 15:22:18 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44416 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727079AbgD3TWA (ORCPT ); Thu, 30 Apr 2020 15:22:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:54 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAX004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 rdma-next 15/16] RDMA/mlx5: Refactor affinity related code Date: Thu, 30 Apr 2020 22:21:45 +0300 Message-Id: <20200430192146.12863-16-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Move affinity related code in modify qp to function. It's a preparation for next patch the extend the affinity calculation to consider the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 90 +++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2673678f1899..518abbda33c0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3409,33 +3409,61 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return 0; } -static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, - struct mlx5_ib_pd *pd, - struct mlx5_ib_qp_base *qp_base, - u8 port_num, struct ib_udata *udata) +static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, + struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - unsigned int tx_port_affinity; + u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; + atomic_t *tx_port_affinity; - if (ucontext) { - tx_port_affinity = (unsigned int)atomic_add_return( - 1, &ucontext->tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + if (ucontext) + tx_port_affinity = &ucontext->tx_port_affinity; + else + tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; + + return (unsigned int)atomic_add_return(1, tx_port_affinity) % + MLX5_MAX_PORTS + 1; +} + +static bool qp_supports_affinity(struct ib_qp *qp) +{ + struct mlx5_ib_qp *mqp = to_mqp(qp); + + if ((qp->qp_type == IB_QPT_RC) || + (qp->qp_type == IB_QPT_UD && + !(mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || + (qp->qp_type == IB_QPT_UC) || + (qp->qp_type == IB_QPT_RAW_PACKET) || + (qp->qp_type == IB_QPT_XRC_INI) || + (qp->qp_type == IB_QPT_XRC_TGT)) + return true; + return false; +} + +static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, + struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_qp_base *qp_base; + unsigned int tx_affinity; + + if (!(dev->lag_active && init && qp_supports_affinity(qp))) + return 0; + + tx_affinity = get_tx_affinity_rr(dev, udata); + + qp_base = &mqp->trans_qp.base; + if (ucontext) mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", - tx_port_affinity, qp_base->mqp.qpn, ucontext); - } else { - tx_port_affinity = - (unsigned int)atomic_add_return( - 1, &dev->port[port_num].roce.tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + tx_affinity, qp_base->mqp.qpn, ucontext); + else mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", - tx_port_affinity, qp_base->mqp.qpn); - } - - return tx_port_affinity; + tx_affinity, qp_base->mqp.qpn); + return tx_affinity; } static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, @@ -3546,22 +3574,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { - if ((ibqp->qp_type == IB_QPT_RC) || - (ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || - (ibqp->qp_type == IB_QPT_UC) || - (ibqp->qp_type == IB_QPT_RAW_PACKET) || - (ibqp->qp_type == IB_QPT_XRC_INI) || - (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev) - 1; - tx_affinity = get_tx_affinity(dev, pd, base, p, - udata); - context->flags |= cpu_to_be32(tx_affinity << 24); - } - } - } + tx_affinity = get_tx_affinity(ibqp, + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT, udata); + context->flags |= cpu_to_be32(tx_affinity << 24); if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; From patchwork Thu Apr 30 19:21:46 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maor Gottlieb X-Patchwork-Id: 11521313 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9624D15E6 for ; Thu, 30 Apr 2020 19:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 825902072A for ; Thu, 30 Apr 2020 19:22:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726375AbgD3TWJ (ORCPT ); Thu, 30 Apr 2020 15:22:09 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:56965 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727085AbgD3TWF (ORCPT ); Thu, 30 Apr 2020 15:22:05 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maorg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 30 Apr 2020 22:21:54 +0300 Received: from dev-l-vrt-201.mtl.labs.mlnx (dev-l-vrt-201.mtl.labs.mlnx [10.134.201.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 03UJLqAY004128; Thu, 30 Apr 2020 22:21:53 +0300 From: Maor Gottlieb To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org Cc: leonro@mellanox.com, saeedm@mellanox.com, linux-rdma@vger.kernel.org, netdev@vger.kernel.org, alexr@mellanox.com, Maor Gottlieb Subject: [PATCH V8 rdma-next 16/16] RDMA/mlx5: Set lag tx affinity according to slave Date: Thu, 30 Apr 2020 22:21:46 +0300 Message-Id: <20200430192146.12863-17-maorg@mellanox.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200430192146.12863-1-maorg@mellanox.com> References: <20200430192146.12863-1-maorg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org The patch sets the lag tx affinity of the data QPs and the GSI QPs according to the LAG xmit slave. For GSI QPs, in case that the link layer is Ethenet (RoCE) we create two GSI QPs, one for each physical port. When the driver selects the GSI QP, it will consider the port affinity result. For connected QPs, the driver sets the affinity of the xmit slave. The above, ensure that RC QP and it's corresponding GSI QP will transmit from the same physical port. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ah.c | 9 +++-- drivers/infiniband/hw/mlx5/gsi.c | 33 ++++++++++++++---- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 52 +++++++++++++++++++--------- include/linux/mlx5/mlx5_ifc.h | 4 ++- include/linux/mlx5/qp.h | 2 ++ 7 files changed, 76 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 9b59348d51b5..cc858f658567 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -33,8 +33,9 @@ #include "mlx5_ib.h" static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_init_attr *init_attr) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { @@ -51,6 +52,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + if (init_attr->xmit_slave) + ah->xmit_port = + mlx5_lag_get_slave_port(dev->mdev, + init_attr->xmit_slave); gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, @@ -98,7 +103,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, ah_attr); + create_ib_ah(dev, ah, init_attr); return 0; } diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 1afbf03d1a98..40d418153891 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -119,10 +119,17 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct mlx5_ib_gsi_qp *gsi; struct ib_qp_init_attr hw_init_attr = *init_attr; const u8 port_num = init_attr->port_num; - const int num_pkeys = pd->device->attrs.max_pkeys; - const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; + int num_qps = 0; int ret; + if (mlx5_ib_deth_sqpn_cap(dev)) { + if (MLX5_CAP_GEN(dev->mdev, + port_type) == MLX5_CAP_PORT_TYPE_IB) + num_qps = pd->device->attrs.max_pkeys; + else if (dev->lag_active) + num_qps = MLX5_MAX_PORTS; + } + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); if (!gsi) return ERR_PTR(-ENOMEM); @@ -261,7 +268,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) } static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, - u16 qp_index) + u16 pkey_index) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct ib_qp_attr attr; @@ -270,7 +277,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; attr.qp_state = IB_QPS_INIT; - attr.pkey_index = qp_index; + attr.pkey_index = pkey_index; attr.qkey = IB_QP1_QKEY; attr.port_num = gsi->port_num; ret = ib_modify_qp(qp, &attr, mask); @@ -304,12 +311,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) { struct ib_device *device = gsi->rx_qp->device; struct mlx5_ib_dev *dev = to_mdev(device); + int pkey_index = qp_index; + struct mlx5_ib_qp *mqp; struct ib_qp *qp; unsigned long flags; u16 pkey; int ret; - ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + pkey_index = 0; + + ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey); if (ret) { mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", gsi->port_num, qp_index); @@ -338,7 +350,10 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) return; } - ret = modify_to_rts(gsi, qp, qp_index); + mqp = to_mqp(qp); + if (dev->lag_active) + mqp->gsi_lag_port = qp_index + 1; + ret = modify_to_rts(gsi, qp, pkey_index); if (ret) goto err_destroy_qp; @@ -457,11 +472,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) { struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_ah *ah = to_mah(wr->ah); int qp_index = wr->pkey_index; - if (!mlx5_ib_deth_sqpn_cap(dev)) + if (!gsi->num_qps) return gsi->rx_qp; + if (dev->lag_active && ah->xmit_port) + qp_index = ah->xmit_port - 1; + if (qp_index >= gsi->num_qps) return NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 80ae8f04bfd5..e7fb290c9d8d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include "mlx5_ib.h" @@ -6567,6 +6568,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3a1977ab899b..bbe86c6dd5a3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -471,6 +471,7 @@ struct mlx5_ib_qp { * but not take effective */ u32 counter_pending; + u16 gsi_lag_port; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 518abbda33c0..9314aaabe43d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3045,10 +3045,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, @@ -3056,17 +3058,20 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | @@ -3075,7 +3080,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, }, [MLX5_QP_STATE_RTR] = { @@ -3428,11 +3434,8 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, static bool qp_supports_affinity(struct ib_qp *qp) { - struct mlx5_ib_qp *mqp = to_mqp(qp); - if ((qp->qp_type == IB_QPT_RC) || - (qp->qp_type == IB_QPT_UD && - !(mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || + (qp->qp_type == IB_QPT_UD) || (qp->qp_type == IB_QPT_UC) || (qp->qp_type == IB_QPT_RAW_PACKET) || (qp->qp_type == IB_QPT_XRC_INI) || @@ -3441,7 +3444,9 @@ static bool qp_supports_affinity(struct ib_qp *qp) return false; } -static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, +static unsigned int get_tx_affinity(struct ib_qp *qp, + const struct ib_qp_attr *attr, + int attr_mask, u8 init, struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( @@ -3451,10 +3456,18 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, struct mlx5_ib_qp_base *qp_base; unsigned int tx_affinity; - if (!(dev->lag_active && init && qp_supports_affinity(qp))) + if (!(dev->lag_active && qp_supports_affinity(qp))) return 0; - tx_affinity = get_tx_affinity_rr(dev, udata); + if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) + tx_affinity = mqp->gsi_lag_port; + else if (init) + tx_affinity = get_tx_affinity_rr(dev, udata); + else if ((attr_mask & IB_QP_AV) && attr->xmit_slave) + tx_affinity = + mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave); + else + return 0; qp_base = &mqp->trans_qp.base; if (ucontext) @@ -3539,7 +3552,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; - enum mlx5_qp_optpar optpar; + enum mlx5_qp_optpar optpar = 0; u32 set_id = 0; int mlx5_st; int err; @@ -3574,10 +3587,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - tx_affinity = get_tx_affinity(ibqp, + tx_affinity = get_tx_affinity(ibqp, attr, attr_mask, cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT, udata); - context->flags |= cpu_to_be32(tx_affinity << 24); + if (tx_affinity) { + context->flags |= cpu_to_be32(tx_affinity << 24); + if (new_state == IB_QPS_RTR && + MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) + optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; + } if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; @@ -3714,7 +3732,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } op = optab[mlx5_cur][mlx5_new]; - optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar |= ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fb243848132d..c1ba89198335 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1321,7 +1321,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 stat_rate_support[0x10]; u8 reserved_at_1f0[0x1]; u8 pci_sync_for_fw_update_event[0x1]; - u8 reserved_at_1f2[0xa]; + u8 reserved_at_1f2[0x6]; + u8 init2_lag_tx_port_affinity[0x1]; + u8 reserved_at_1fa[0x3]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f23eb18526fe..b9facdb9b9bd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -66,6 +66,7 @@ enum mlx5_qp_optpar { MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX5_QP_OPTPAR_LAG_TX_AFF = 1 << 15, MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, MLX5_QP_OPTPAR_SRQN = 1 << 18, MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, @@ -321,6 +322,7 @@ struct mlx5_av { struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; + u8 xmit_port; }; static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)