From patchwork Thu Nov 12 19:24:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901261 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0A191697 for ; Thu, 12 Nov 2020 19:24:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D3DD520B80 for ; Thu, 12 Nov 2020 19:24:51 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="XnnnZp1Z" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726808AbgKLTYu (ORCPT ); Thu, 12 Nov 2020 14:24:50 -0500 Received: from hqnvemgate24.nvidia.com ([216.228.121.143]:11325 "EHLO hqnvemgate24.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726807AbgKLTYt (ORCPT ); Thu, 12 Nov 2020 14:24:49 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:56 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:47 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 01/13] devlink: Prepare code to fill multiple port function attributes Date: Thu, 12 Nov 2020 21:24:11 +0200 Message-ID: <20201112192424.2742-2-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209096; bh=CGowwf6WxKONC1zCiFLEXwuhX6atWac00OfQ2pv/YUQ=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=XnnnZp1Z+U/N8UBbjLSsWikJ4a+Lg+Q5RvOQOGEIUC+b9hDqOmFNubeyF+cVI2358 C21rmiCEMrAiK8mh0nvSfauII/ZRxbKoGfbivqPuatOiUR3mJEILPjHTkaUrxkPEa7 7f5Kx7xYdz9jkiCKKkmh8YSUep0VK7Z/Tcv7DLeKLtYoRlsYtBe3eYC3NivAdCpQ/s hZG1XfGpu6BG6QRq+Z1pdt7zQnYwHpazAunZC+55CtCTFSIP8QGS7aRFF3uQWyk5zC e0TX84/Royas0a00VTeebaPihLXneGknYm23FLLdhciEyNxa/dnmN4GmCX4llxq7zm g74rVFPzXXVrQ== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Prepare code to fill zero or more port function optional attributes. Subsequent patch makes use of this to fill more port function attributes. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham --- net/core/devlink.c | 63 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index a578634052a3..75cca9cbb9d9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -695,6 +695,31 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } +static int +devlink_port_function_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, + struct devlink_port *port, struct sk_buff *msg, + struct netlink_ext_ack *extack, bool *msg_updated) +{ + u8 hw_addr[MAX_ADDR_LEN]; + int hw_addr_len; + int err; + + if (!ops->port_function_hw_addr_get) + return 0; + + err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); + if (err) + return err; + *msg_updated = true; + return 0; +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -702,36 +727,16 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; - bool empty_nest = true; - int err = 0; + bool msg_updated = false; + int err; function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION); if (!function_attr) return -EMSGSIZE; ops = devlink->ops; - if (ops->port_function_hw_addr_get) { - int hw_addr_len; - u8 hw_addr[MAX_ADDR_LEN]; - - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); - if (err == -EOPNOTSUPP) { - /* Port function attributes are optional for a port. If port doesn't - * support function attribute, returning -EOPNOTSUPP is not an error. - */ - err = 0; - goto out; - } else if (err) { - goto out; - } - err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); - if (err) - goto out; - empty_nest = false; - } - -out: - if (err || empty_nest) + err = devlink_port_function_hw_addr_fill(devlink, ops, port, msg, extack, &msg_updated); + if (err || !msg_updated) nla_nest_cancel(msg, function_attr); else nla_nest_end(msg, function_attr); @@ -964,7 +969,6 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * const struct devlink_ops *ops; const u8 *hw_addr; int hw_addr_len; - int err; hw_addr = nla_data(attr); hw_addr_len = nla_len(attr); @@ -989,12 +993,7 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * return -EOPNOTSUPP; } - err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); - if (err) - return err; - - devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); - return 0; + return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); } static int @@ -1015,6 +1014,8 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, if (attr) err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + if (!err) + devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); return err; } From patchwork Thu Nov 12 19:24:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901287 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E7D07697 for ; Thu, 12 Nov 2020 19:25:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id BFB6620B80 for ; Thu, 12 Nov 2020 19:25:26 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="mPbMV2li" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727041AbgKLTZZ (ORCPT ); Thu, 12 Nov 2020 14:25:25 -0500 Received: from hqnvemgate24.nvidia.com ([216.228.121.143]:11329 "EHLO hqnvemgate24.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726812AbgKLTYu (ORCPT ); Thu, 12 Nov 2020 14:24:50 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:57 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:48 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 02/13] devlink: Introduce PCI SF port flavour and port attribute Date: Thu, 12 Nov 2020 21:24:12 +0200 Message-ID: <20201112192424.2742-3-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209097; bh=qAAyHZbrk+w0mcub+q1sHLPG34iiL4uRk0ZQDzAVc2M=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=mPbMV2liF8U9ZVw4VgFXQfozW3yVZrSijFqz8YKkTZBYNkk67lLdFnE+KX5/hYPtR PvVBTO29vX7rUMVv+zh+JhYFkP842uaM3DAHsafNGgrbtcXScwmRCmQE4sPSW0oXDO BIEc6PQt6VLEPHlcUvyOjFz1qHo3agktKRz5ruYxl/WNj28wlIqoUqdpG8MvBr/umF vcwGiWAfM8b9UrQSm67VdoHywlROiUVr5o48SNLj2YfpvmUEct3Bc/QvRCGWkxwvOF n1gg27vFSAtqN/PAxmtON6Jp14xlANu8RwsHQg81oRE8nMRcmS8jVeVJBJmMupX8Pi +e8fqNCvv4u5A== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org A PCI sub-function (SF) represents a portion of the device similar to PCI VF. In an eswitch, PCI SF may have port which is normally represented using a representor netdevice. To have better visibility of eswitch port, its association with SF, and its representor netdevice, introduce a PCI SF port flavour. When devlink port flavour is PCI SF, fill up PCI SF attributes of the port. Extend port name creation using PCI PF and SF number scheme on best effort basis, so that vendor drivers can skip defining their own scheme. This is done as cApfNSfM, where A, N and M are controller, PCI PF and PCI SF number respectively. This is similar to existing naming for PCI PF and PCI VF ports. An example view of a PCI SF port: $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev eth0 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state active opstate attached $ devlink port show pci/0000:06:00.0/32768 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "eth0", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham --- include/net/devlink.h | 17 +++++++++++++ include/uapi/linux/devlink.h | 5 ++++ net/core/devlink.c | 46 ++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index b01bb9bca5a2..1b7c9fbc607a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -92,6 +92,20 @@ struct devlink_port_pci_vf_attrs { u8 external:1; }; +/** + * struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes + * @controller: Associated controller number + * @pf: Associated PCI PF number for this port. + * @sf: Associated PCI SF for of the PCI PF for this port. + * @external: when set, indicates if a port is for an external controller + */ +struct devlink_port_pci_sf_attrs { + u32 controller; + u16 pf; + u32 sf; + u8 external:1; +}; + /** * struct devlink_port_attrs - devlink port object * @flavour: flavour of the port @@ -113,6 +127,7 @@ struct devlink_port_attrs { struct devlink_port_phys_attrs phys; struct devlink_port_pci_pf_attrs pci_pf; struct devlink_port_pci_vf_attrs pci_vf; + struct devlink_port_pci_sf_attrs pci_sf; }; }; @@ -1401,6 +1416,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro u16 pf, bool external); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u16 vf, bool external); +void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, + u16 pf, u32 sf, bool external); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0113bc4db9f5..57065722b9c3 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -200,6 +200,10 @@ enum devlink_port_flavour { DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but * is not used in any way. */ + DEVLINK_PORT_FLAVOUR_PCI_SF, /* Represents eswitch port + * for the PCI SF. It is an internal + * port that faces the PCI SF. + */ }; enum devlink_param_cmode { @@ -527,6 +531,7 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */ DEVLINK_ATTR_REMOTE_RELOAD_STATS, /* nested */ + DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 75cca9cbb9d9..b1e849b624a6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -673,6 +673,15 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external)) return -EMSGSIZE; break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, + attrs->pci_sf.controller) || + nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_sf.pf) || + nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, attrs->pci_sf.sf)) + return -EMSGSIZE; + if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_sf.external)) + return -EMSGSIZE; + break; case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -8330,6 +8339,33 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set); +/** + * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes + * + * @devlink_port: devlink port + * @controller: associated controller number for the devlink port instance + * @pf: associated PF for the devlink port instance + * @sf: associated SF of a PF for the devlink port instance + * @external: indicates if the port is for an external controller + */ +void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, + u16 pf, u32 sf, bool external) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + int ret; + + if (WARN_ON(devlink_port->registered)) + return; + ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF); + if (ret) + return; + attrs->pci_sf.controller = controller; + attrs->pci_sf.pf = pf; + attrs->pci_sf.sf = sf; + attrs->pci_sf.external = external; +} +EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { @@ -8378,6 +8414,16 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%uvf%u", attrs->pci_vf.pf, attrs->pci_vf.vf); break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + if (attrs->pci_sf.external) { + n = snprintf(name, len, "c%u", attrs->pci_sf.controller); + if (n >= len) + return -EINVAL; + len -= n; + name += n; + } + n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); + break; } if (n >= len) From patchwork Thu Nov 12 19:24:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901285 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B9F511391 for ; Thu, 12 Nov 2020 19:25:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9475921D7F for ; Thu, 12 Nov 2020 19:25:25 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="CxacRKt6" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726831AbgKLTYw (ORCPT ); Thu, 12 Nov 2020 14:24:52 -0500 Received: from hqnvemgate26.nvidia.com ([216.228.121.65]:14152 "EHLO hqnvemgate26.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726826AbgKLTYv (ORCPT ); Thu, 12 Nov 2020 14:24:51 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate26.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:54 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:49 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 03/13] devlink: Support add and delete devlink port Date: Thu, 12 Nov 2020 21:24:13 +0200 Message-ID: <20201112192424.2742-4-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209095; bh=D4JZp5kVleCr5eF3o9evBl+V3rS5np4bBhWU/57b7mE=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=CxacRKt6bmjC54t3dJiI0b32E03UQgrzDHATkPkA2VQ7xLMceUMnIm5pSzqehX0GD N96SLyDteYGRpUO08CjYxjPwQyqYBiJoiZFGxH+77avsOzAWKrknxsjhGnxvvHqa2J Zu36loAk8+nGYFgXR7NY7RDNJrDlup4or4vC8TTF0qypdDl6mFadJxakEP1WQrP+AJ ADQSvnHiYwUg8vy1zh43otVgceT7spQ06czFDudRMv862Gn//uLc0AnrM8whs6Ja+R TADxaPZAJ8LjEFDrMLbTmlmCo/JnAKb1AMcP7TnebTL6DBd0vwj54GuFgULM0qOq6X TfDkMpVH0JA1Q== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Extended devlink interface for the user to add and delete port. Extend devlink to connect user requests to driver to add/delete such port in the device. When driver routines are invoked, devlink instance lock is not held. This enables driver to perform several devlink objects registration, unregistration such as (port, health reporter, resource etc) by using exising devlink APIs. This also helps to uniformly use the code for port unregistration during driver unload and during port deletion initiated by user. Examples of add, show and delete commands: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev eth0 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ udevadm test-builtin net_id /sys/class/net/eth0 Load module index Parsed configuration file /usr/lib/systemd/network/99-default.link Created link configuration context. Using default interface naming scheme 'v245'. ID_NET_NAMING_SCHEME=v245 ID_NET_NAME_PATH=enp6s0f0npf0sf88 ID_NET_NAME_SLOT=ens2f0npf0sf88 Unload module index Unloaded link configuration context. $ devlink port del netdevsim/netdevsim10/32768 Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham --- include/net/devlink.h | 38 ++++++++++++++++++++++++ net/core/devlink.c | 67 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 1b7c9fbc607a..3991345ef3e2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -152,6 +152,17 @@ struct devlink_port { struct mutex reporters_lock; /* Protects reporter_list */ }; +struct devlink_port_new_attrs { + enum devlink_port_flavour flavour; + unsigned int port_index; + u32 controller; + u32 sfnum; + u16 pfnum; + u8 port_index_valid:1, + controller_valid:1, + sfnum_valid:1; +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -1360,6 +1371,33 @@ struct devlink_ops { int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + /** + * @port_new: Port add function. + * + * Should be used by device driver to let caller add new port of a specified flavour + * with optional attributes. + * Driver should return -EOPNOTSUPP if it doesn't support port addition of a specified + * flavour or specified attributes. Driver should set extack error message in case of fail + * to add the port. + * devlink core does not hold a devlink instance lock when this callback is invoked. + * Driver must ensures synchronization when adding or deleting a port. Driver must + * register a port with devlink core. + */ + int (*port_new)(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, + struct netlink_ext_ack *extack); + /** + * @port_del: Port delete function. + * + * Should be used by device driver to let caller delete port which was previously created + * using port_new() callback. + * Driver should return -EOPNOTSUPP if it doesn't support port deletion. + * Driver should set extack error message in case of fail to delete the port. + * devlink core does not hold a devlink instance lock when this callback is invoked. + * Driver must ensures synchronization when adding or deleting a port. Driver must + * register a port with devlink core. + */ + int (*port_del)(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/net/core/devlink.c b/net/core/devlink.c index b1e849b624a6..dccdf36afba6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1124,6 +1124,57 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port_unsplit(devlink, port_index, info->extack); } +static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink_port_new_attrs new_attrs = {}; + struct devlink *devlink = info->user_ptr[0]; + + if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] || + !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) { + NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified"); + return -EINVAL; + } + new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]); + new_attrs.pfnum = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]); + + if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { + new_attrs.port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); + new_attrs.port_index_valid = true; + } + if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) { + new_attrs.controller = + nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]); + new_attrs.controller_valid = true; + } + if (info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) { + new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]); + new_attrs.sfnum_valid = true; + } + + if (!devlink->ops->port_new) + return -EOPNOTSUPP; + + return devlink->ops->port_new(devlink, &new_attrs, extack); +} + +static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + unsigned int port_index; + + if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) { + NL_SET_ERR_MSG_MOD(extack, "Port index is not specified"); + return -EINVAL; + } + port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); + + if (!devlink->ops->port_del) + return -EOPNOTSUPP; + return devlink->ops->port_del(devlink, port_index, extack); +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -7565,6 +7616,10 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_ACTION_MAX), [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), + [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7604,6 +7659,18 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, + { + .cmd = DEVLINK_CMD_PORT_NEW, + .doit = devlink_nl_cmd_port_new_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_PORT_DEL, + .doit = devlink_nl_cmd_port_del_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, From patchwork Thu Nov 12 19:24:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901263 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A587B697 for ; Thu, 12 Nov 2020 19:24:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7EB5F2223F for ; Thu, 12 Nov 2020 19:24:55 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="LlYO5K1q" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726853AbgKLTYx (ORCPT ); Thu, 12 Nov 2020 14:24:53 -0500 Received: from hqnvemgate25.nvidia.com ([216.228.121.64]:11879 "EHLO hqnvemgate25.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726648AbgKLTYw (ORCPT ); Thu, 12 Nov 2020 14:24:52 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate25.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:46 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:50 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 04/13] devlink: Support get and set state of port function Date: Thu, 12 Nov 2020 21:24:14 +0200 Message-ID: <20201112192424.2742-5-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209086; bh=LGS78c0/HYZgm/YQsSGw/xFezP/poNpCmeTJPZRY3NY=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=LlYO5K1qqX8qJP9HoZMeoWno7JcJoR+ezBkNg/xbBUlItDBJsBMMJBgfg/CbL4IYO B8F4w1+g3nWki6dLiDO0GO6ISv3Vs9VE48LHOmh7kGbQgq93UmOto+90GHnixi/Hhh bYBuo22VbJb94xYJ9xN8/S3VdYkRqqMr7OMGcVjP2ryJq89Vm/EXf6e+SR6k2Ekkb8 UX76KyPXt4s0KgyZCCjee6iUJ+a1KhsGImIHn9jH7hRIJWaIW9mSHe27wkcI2O+dEX oFT5CAKyRkkjn+xCZ2shBrwyFXs7YFcB91xezaWkxYG7VcCi/SnF1eGixskvLa3JIC MfmnbgQNbGSLw== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org devlink port function can be in active or inactive state. Allow users to get and set port function's state. When the port function it activated, its operational state may change after a while when the device is created and driver binds to it. Similarly on deactivation flow. To clearly describe the state of the port function and its device's operational state in the host system, define state and opstate attributes. Example of a PCI SF port which supports a port function: Create a device with ID=10 and one physical port. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev eth0 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 state active $ devlink port show pci/0000:06:00.0/32768 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "eth0", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham --- include/net/devlink.h | 21 +++++++++ include/uapi/linux/devlink.h | 21 +++++++++ net/core/devlink.c | 89 +++++++++++++++++++++++++++++++++++- 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 3991345ef3e2..124bac130c22 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1371,6 +1371,27 @@ struct devlink_ops { int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + /** + * @port_function_state_get: Port function's state get function. + * + * Should be used by device drivers to report the state of a function managed + * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port + * function handling for a particular port. + */ + int (*port_function_state_get)(struct devlink *devlink, struct devlink_port *port, + enum devlink_port_function_state *state, + enum devlink_port_function_opstate *opstate, + struct netlink_ext_ack *extack); + /** + * @port_function_state_set: Port function's state set function. + * + * Should be used by device drivers to set the state of a function managed + * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port + * function handling for a particular port. + */ + int (*port_function_state_set)(struct devlink *devlink, struct devlink_port *port, + enum devlink_port_function_state state, + struct netlink_ext_ack *extack); /** * @port_new: Port add function. * diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 57065722b9c3..0c6eb3add736 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -581,9 +581,30 @@ enum devlink_resource_unit { enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + DEVLINK_PORT_FUNCTION_ATTR_STATE, /* u8 */ + DEVLINK_PORT_FUNCTION_ATTR_OPSTATE, /* u8 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 }; +enum devlink_port_function_state { + DEVLINK_PORT_FUNCTION_STATE_INACTIVE, + DEVLINK_PORT_FUNCTION_STATE_ACTIVE, +}; + +/** + * enum devlink_port_function_opstate - indicates operational state of port function + * @DEVLINK_PORT_FUNCTION_OPSTATE_ATTACHED: Driver is attached to the function of port, for + * gracefufl tear down of the function, after + * inactivation of the port function, user should wait + * for operational state to turn DETACHED. + * @DEVLINK_PORT_FUNCTION_OPSTATE_DETACHED: Driver is detached from the function of port; it is + * safe to delete the port. + */ +enum devlink_port_function_opstate { + DEVLINK_PORT_FUNCTION_OPSTATE_DETACHED, + DEVLINK_PORT_FUNCTION_OPSTATE_ATTACHED, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/net/core/devlink.c b/net/core/devlink.c index dccdf36afba6..3e59ba73d5c4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -87,6 +87,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, + [DEVLINK_PORT_FUNCTION_ATTR_STATE] = + NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FUNCTION_STATE_INACTIVE, + DEVLINK_PORT_FUNCTION_STATE_ACTIVE), }; static LIST_HEAD(devlink_list); @@ -729,6 +732,52 @@ devlink_port_function_hw_addr_fill(struct devlink *devlink, const struct devlink return 0; } +static bool devlink_port_function_state_valid(enum devlink_port_function_state state) +{ + return state == DEVLINK_PORT_FUNCTION_STATE_INACTIVE || + state == DEVLINK_PORT_FUNCTION_STATE_ACTIVE; +} + +static bool devlink_port_function_opstate_valid(enum devlink_port_function_opstate state) +{ + return state == DEVLINK_PORT_FUNCTION_OPSTATE_DETACHED || + state == DEVLINK_PORT_FUNCTION_OPSTATE_ATTACHED; +} + +static int devlink_port_function_state_fill(struct devlink *devlink, const struct devlink_ops *ops, + struct devlink_port *port, struct sk_buff *msg, + struct netlink_ext_ack *extack, bool *msg_updated) +{ + enum devlink_port_function_opstate opstate; + enum devlink_port_function_state state; + int err; + + if (!ops->port_function_state_get) + return 0; + + err = ops->port_function_state_get(devlink, port, &state, &opstate, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + if (!devlink_port_function_state_valid(state)) { + WARN_ON_ONCE(1); + NL_SET_ERR_MSG_MOD(extack, "Invalid state value read from driver"); + return -EINVAL; + } + if (!devlink_port_function_opstate_valid(opstate)) { + WARN_ON_ONCE(1); + NL_SET_ERR_MSG_MOD(extack, "Invalid operational state value read from driver"); + return -EINVAL; + } + if (nla_put_u8(msg, DEVLINK_PORT_FUNCTION_ATTR_STATE, state) || + nla_put_u8(msg, DEVLINK_PORT_FUNCTION_ATTR_OPSTATE, opstate)) + return -EMSGSIZE; + *msg_updated = true; + return 0; +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -745,6 +794,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por ops = devlink->ops; err = devlink_port_function_hw_addr_fill(devlink, ops, port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_port_function_state_fill(devlink, ops, port, msg, extack, &msg_updated); + if (err) + goto out; +out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); else @@ -1005,6 +1060,28 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); } +static int +devlink_port_function_state_set(struct devlink *devlink, struct devlink_port *port, + const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + enum devlink_port_function_state state; + const struct devlink_ops *ops; + int err; + + state = nla_get_u8(attr); + ops = devlink->ops; + if (!ops->port_function_state_set) { + NL_SET_ERR_MSG_MOD(extack, "Port function does not support state setting"); + return -EOPNOTSUPP; + } + err = ops->port_function_state_set(devlink, port, state, extack); + if (err) + return err; + + devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); + return 0; +} + static int devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) @@ -1020,8 +1097,18 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, } attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; - if (attr) + if (attr) { err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when + * multiple port function attributes are set along with state, + * Those can be applied first before activating the state. + */ + attr = tb[DEVLINK_PORT_FUNCTION_ATTR_STATE]; + if (attr) + err = devlink_port_function_state_set(devlink, port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); From patchwork Thu Nov 12 19:24:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901277 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 27FD71391 for ; Thu, 12 Nov 2020 19:25:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F0BFC21D7F for ; Thu, 12 Nov 2020 19:25:15 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="UBX6Yj3d" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726295AbgKLTYz (ORCPT ); Thu, 12 Nov 2020 14:24:55 -0500 Received: from hqnvemgate26.nvidia.com ([216.228.121.65]:14156 "EHLO hqnvemgate26.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726849AbgKLTYx (ORCPT ); Thu, 12 Nov 2020 14:24:53 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate26.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:56 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:51 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit Subject: [PATCH net-next 05/13] devlink: Avoid global devlink mutex, use per instance reload lock Date: Thu, 12 Nov 2020 21:24:15 +0200 Message-ID: <20201112192424.2742-6-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209097; bh=+/c5UpUQBIoWffcy2omGkhbvAAq7SZJuvOWE5xQXU2A=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=UBX6Yj3dvenJFVTJQRw+nw6+garONpVQvNTZaYb7hpEmJSPxlD7iTlDWPPPk/5L+G LSc/ZoTGOXgrNs4EgcHDi19DlutzKJzhdXYgOZ0lTtg2SXo4cc0A7mFRiIgtDF/A0a SRW6VJVYTKGdGTwft5mBe/ajog/w0Zl+YwUYikeg6gbGix9kKluTzj4JW25rkyMhT4 8ExJlIZ5eQOVC5scZkNJNM8VLRR42SrMAkStnzaArXl4k44ccQSPy4LbvtUA6aIKP/ 4LsxVkZh2bk6+lDRR55iqEPUYp0eG4IhwcJZI4zrQ4MkkirttYEQlyRfIiskh3X48l AtbBK5y+68EgA== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org devlink device reload is a special operation which brings down and up the device. Such operation will unregister devlink device of sub function port. During devlink_reload() with devlink_mutex held leads to cyclic dependency. For example, devlink_reload() mutex_lock(&devlink_mutex); <- First lock acquire mlx5_reload_down(PCI PF device) disable_sf_devices(); sf_state_set(inactive); ancillary_dev->remove(); mlx5_adev_remove(adev); devlink_unregister(adev->devlink_instance); mutex_lock(&devlink_mutex); <- Second lock acquire Hence devlink_reload() operation cannot be done under global devlink_mutex mutex. In second such instance reload_down() callback likely to disable reload on child devlink device. This also prevents devlink_reload() to use the overloaded global devlink_mutex. devlink_reload() mutex_lock(&devlink_mutex); <- First lock acquire mlx5_reload_down(PCI PF device) disable_sf_devices(); ancillary_dev->remove(); mlx5_adev_remove(adev); devlink_reload_disable(adev->devlink_instance); mutex_lock(&devlink_mutex); <- Second lock acquire Therefore, introduce a reload_lock per devlink instance which is held when performing devlink device reload. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko --- include/net/devlink.h | 1 + net/core/devlink.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 124bac130c22..ef487b8ed17b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -52,6 +52,7 @@ struct devlink { struct mutex lock; /* Serializes access to devlink instance specific objects such as * port, sb, dpipe, resource, params, region, traps and more. */ + struct mutex reload_lock; /* Protects reload operation */ u8 reload_failed:1, reload_enabled:1, registered:1; diff --git a/net/core/devlink.c b/net/core/devlink.c index 3e59ba73d5c4..c7c6f274d392 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3307,29 +3307,32 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; int err; - if (!devlink->reload_enabled) - return -EOPNOTSUPP; + mutex_lock(&devlink->reload_lock); + if (!devlink->reload_enabled) { + err = -EOPNOTSUPP; + goto done; + } memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack); if (err) - return err; + goto done; if (dest_net && !net_eq(dest_net, devlink_net(devlink))) devlink_reload_netns_change(devlink, dest_net); err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); - if (err) - return err; WARN_ON(!(*actions_performed & BIT(action))); /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats))); devlink_reload_stats_update(devlink, limit, *actions_performed); - return 0; +done: + mutex_unlock(&devlink->reload_lock); + return err; } static int @@ -8118,6 +8121,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + mutex_init(&devlink->reload_lock); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -8166,9 +8170,9 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_reload_enable(struct devlink *devlink) { - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->reload_lock); devlink->reload_enabled = true; - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->reload_lock); } EXPORT_SYMBOL_GPL(devlink_reload_enable); @@ -8182,12 +8186,12 @@ EXPORT_SYMBOL_GPL(devlink_reload_enable); */ void devlink_reload_disable(struct devlink *devlink) { - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->reload_lock); /* Mutex is taken which ensures that no reload operation is in * progress while setting up forbidded flag. */ devlink->reload_enabled = false; - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->reload_lock); } EXPORT_SYMBOL_GPL(devlink_reload_disable); @@ -8198,6 +8202,7 @@ EXPORT_SYMBOL_GPL(devlink_reload_disable); */ void devlink_free(struct devlink *devlink) { + mutex_destroy(&devlink->reload_lock); mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); From patchwork Thu Nov 12 19:24:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901281 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7D6C2697 for ; Thu, 12 Nov 2020 19:25:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 532DE2222F for ; Thu, 12 Nov 2020 19:25:17 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="F9eSGrW6" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727002AbgKLTZP (ORCPT ); Thu, 12 Nov 2020 14:25:15 -0500 Received: from hqnvemgate26.nvidia.com ([216.228.121.65]:14161 "EHLO hqnvemgate26.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726865AbgKLTYz (ORCPT ); Thu, 12 Nov 2020 14:24:55 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate26.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:57 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:52 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit Subject: [PATCH net-next 06/13] devlink: Introduce devlink refcount to reduce scope of global devlink_mutex Date: Thu, 12 Nov 2020 21:24:16 +0200 Message-ID: <20201112192424.2742-7-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209098; bh=duM8oXC36aVKHiWOtlUJ0x2Qpxs/rbzwQ5FiUJNUeko=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=F9eSGrW6Z8OaszUNTbldZ87T3/J/ImhHJ0VRlBMNRpT5tg6wQ65GSqQSPxZHim3ow S3m5Hbu/XYjGKRsKPkE+ITjfOf6gMK5kcY+YI7VhXrzFEDN3M/qZ+JyJjrqnW3gFpY IYPtv7pE54lbZIBSk3xZv8VIidvVQG+qOgmhCUZv5N/SrRhNEtXNFD68/jlGPKG/LS Kzec/BzzV32nMZiYUIEA9X3MMN1uaUw7N5ShDWlAmGlFabiwl+SQMTW2Y6Adoo9ljJ nWH5CZJDJM+ukaQUZYyniiOOm+f4zSTk2A53urZHpIcNpj4jdl2xL1ADcC9JrRPha6 qMPukygEERxVg== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Currently global devlink_mutex is held while a doit() operation is progress. This brings a limitation. A Driver cannot perform devlink_register()/unregister() calls during devlink doit() callback functions. This is typically required when a port state change described in RFC [1] callback wants to delete an active SF port or wants to activate a SF port that results into unregistering or registering a devlink instance on different bus such as ancillary bus. An example flow: devlink_predoit() mutex_lock(&devlink_mutex); <- First lock acquire devlink_reload() driver->reload_down(inactive) adev->remove(); mlx5_adev_remove(ancillary_dev); devlink_unregister(ancillary_dev->devlink_instance); mutex_lock(&devlink_mutex); <- Second lock acquire This patch is preparation patch to enable drivers to achieve this. It achieves this by maintaining a per devlink instance refcount to prevent devlink device unregistration while user command are in progress or while devlink device is migration to init_net net namespace. devlink_nl_family continue to remain registered with parallel_ops disabled. So even after removing devlink_mutex during doit commands, it doesn't enable userspace to run multiple devlink commands for one or multiple devlink instance. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko --- include/net/devlink.h | 5 +++ net/core/devlink.c | 84 +++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index ef487b8ed17b..c8eab814c234 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -53,6 +53,11 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex reload_lock; /* Protects reload operation */ + struct list_head reload_list; + refcount_t refcount; /* Serializes user doit commands and netns command + * with device unregistration. + */ + struct completion unregister_complete; u8 reload_failed:1, reload_enabled:1, registered:1; diff --git a/net/core/devlink.c b/net/core/devlink.c index c7c6f274d392..84f3ec12b3e8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list); /* devlink_mutex * - * An overall lock guarding every operation coming from userspace. - * It also guards devlink devices list and it is taken when - * driver registers/unregisters it. + * An overall lock guarding devlink devices list during operations coming from + * userspace and when driver registers/unregisters devlink device. */ static DEFINE_MUTEX(devlink_mutex); @@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net) } EXPORT_SYMBOL_GPL(devlink_net_set); +static inline bool +devlink_try_get(struct devlink *devlink) +{ + return refcount_inc_not_zero(&devlink->refcount); +} + +static void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + complete(&devlink->unregister_complete); +} + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net, list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) + net_eq(devlink_net(devlink), net) && devlink_try_get(devlink)) return devlink; } @@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global - * devlink lock is taken and protects from disruption by user-calls. + * devlink lock is taken and protects from disruption by dumpit user-calls. */ #define DEVLINK_NL_FLAG_NO_LOCK BIT(2) @@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_lock(&devlink_mutex); devlink = devlink_get_from_info(info); - if (IS_ERR(devlink)) { - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink_mutex); + + if (IS_ERR(devlink)) return PTR_ERR(devlink); - } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_lock(&devlink->lock); info->user_ptr[0] = devlink; @@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); - mutex_unlock(&devlink_mutex); + devlink_put(devlink); return err; } @@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink = info->user_ptr[0]; if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); - mutex_unlock(&devlink_mutex); + devlink_put(devlink); } static struct genl_family devlink_nl_family; @@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); mutex_init(&devlink->reload_lock); + init_completion(&devlink->unregister_complete); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev) { devlink->dev = dev; devlink->registered = true; + refcount_set(&devlink->refcount, 1); mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + /* Remove from the list first, so that no new users can get it */ mutex_lock(&devlink_mutex); - WARN_ON(devlink_reload_supported(devlink->ops) && - devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); mutex_unlock(&devlink_mutex); + + /* Balances with refcount_set in devlink_register(). */ + devlink_put(devlink); + /* Wait for any existing users to stop using the devlink device */ + wait_for_completion(&devlink->unregister_complete); + + /* At this point there are no active users working on the devlink instance; + * also net ns exit operation (if any) is also completed. + * devlink is out of global list, hence no users can acquire reference to this devlink + * instance anymore. Hence, it is safe to proceed with unregistration. + */ + WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) { struct devlink *devlink; u32 actions_performed; + LIST_HEAD(local_list); + struct devlink *tmp; int err; /* In case network namespace is getting destroyed, reload @@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) */ mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (net_eq(devlink_net(devlink), net)) { - if (WARN_ON(!devlink_reload_supported(devlink->ops))) - continue; - err = devlink_reload(devlink, &init_net, - DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_LIMIT_UNSPEC, - &actions_performed, NULL); - if (err && err != -EOPNOTSUPP) - pr_warn("Failed to reload devlink instance into init_net\n"); - } + if (!net_eq(devlink_net(devlink), net)) + continue; + + if (WARN_ON(!devlink_reload_supported(devlink->ops))) + continue; + + /* Hold the reference to devlink instance so that it doesn't get unregistered + * once global devlink_mutex is unlocked. + * Store the devlink to a shadow list so that if devlink unregistration is + * started, it can be still found in the shadow list. + */ + if (devlink_try_get(devlink)) + list_add_tail(&devlink->reload_list, &local_list); } mutex_unlock(&devlink_mutex); + + list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) { + list_del_init(&devlink->reload_list); + err = devlink_reload(devlink, &init_net, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_LIMIT_UNSPEC, + &actions_performed, NULL); + if (err && err != -EOPNOTSUPP) + pr_warn("Failed to reload devlink instance into init_net\n"); + devlink_put(devlink); + } } static struct pernet_operations devlink_pernet_ops __net_initdata = { From patchwork Thu Nov 12 19:24:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901279 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BE05C16C1 for ; Thu, 12 Nov 2020 19:25:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8D0F82222F for ; Thu, 12 Nov 2020 19:25:16 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="Lvx7sg6o" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726998AbgKLTZP (ORCPT ); Thu, 12 Nov 2020 14:25:15 -0500 Received: from hqnvemgate25.nvidia.com ([216.228.121.64]:11882 "EHLO hqnvemgate25.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726826AbgKLTYz (ORCPT ); Thu, 12 Nov 2020 14:24:55 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate25.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:49 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:53 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 07/13] net/mlx5: SF, Add auxiliary device support Date: Thu, 12 Nov 2020 21:24:17 +0200 Message-ID: <20201112192424.2742-8-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209089; bh=XkRd9aq6/G3DKokOFC93KLRgF4gaeOp5YmQLYKMRFu0=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=Lvx7sg6osG76sqP2wNoIt2Kpa2a5aQmfvt6wVxk3yH9l30z5PhAaaND/1VX21GqTN eMuPxnIdi5aObmkrtZsyXNYwa3Fe4PznzHiADdNwcI0ffjsuh6jd+Y1047KWLv9KNJ 8ENQY4zUHjObyipcexWEUSf8iKAxQ9nD96FWT6FZHp9KYrpgSZqek9GDItovINlhrH TQrPgxGru8VJM62cwzEDbHfj2CNHoqae6Xf+lzcwPqBl5xZNL+d8bic3Vcq4ZplbO4 RjM5FhMe5qYrhbHK5RUSecDmmsoQ8ivGa3ZzvlRog9bi7FIFPIyIxND3vP7HcWm7IG UZxyE2UZii0sw== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Introduce API to add and delete an auxiliary device for an SF. Each SF has its own dedicated window in the PCI BAR 2. SF device is similar to PCI PF and VF that supports multiple class of devices such as net, rdma and vdpa. SF device will be added or removed in subsequent patch during SF devlink port function state change command. A subfunction device exposes user supplied subfunction number which will be further used by systemd/udev to have deterministic name for its netdevice and rdma device. An mlx5 subfunction auxiliary device example: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set ens2f0npf0sf88 hw_addr 00:00:00:00:88:88 state active On activation, $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.0 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.0 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.0/sfnum 88 Signed-off-by: Parav Pandit Reviewed-by: Vu Pham --- .../net/ethernet/mellanox/mlx5/core/Kconfig | 9 + .../net/ethernet/mellanox/mlx5/core/Makefile | 4 + .../net/ethernet/mellanox/mlx5/core/main.c | 12 + .../ethernet/mellanox/mlx5/core/sf/dev/dev.c | 213 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/sf/dev/dev.h | 55 +++++ include/linux/mlx5/driver.h | 4 + 6 files changed, 297 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 485478979b1a..10dfaf671c90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -202,3 +202,12 @@ config MLX5_SW_STEERING default y help Build support for software-managed steering in the NIC. + +config MLX5_SF + bool "Mellanox Technologies subfunction device support using auxiliary device" + depends on MLX5_CORE && MLX5_CORE_EN + default n + help + Build support for subfuction device in the NIC. A Mellanox subfunction + device can support RDMA, netdevice and vdpa device. + It is similar to a SRIOV VF but it doesn't require SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 2d477f9a8cb7..ee866da1d9ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -85,3 +85,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_ste.o steering/dr_send.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o +# +# SF device +# +mlx5_core-$(CONFIG_MLX5_SF) += sf/dev/dev.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 37fa56904235..a1ba6056952b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -73,6 +73,7 @@ #include "ecpf.h" #include "lib/hv_vhca.h" #include "diag/rsc_dump.h" +#include "sf/dev/dev.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -884,6 +885,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + err = mlx5_sf_dev_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF device table %d\n", err); + goto err_sf_dev_cleanup; + } + dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) mlx5_core_warn(dev, "Failed to init device memory%d\n", err); @@ -894,6 +901,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; +err_sf_dev_cleanup: + mlx5_fpga_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); err_sriov_cleanup: @@ -925,6 +934,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); + mlx5_sf_dev_table_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); @@ -1141,6 +1151,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_ec; } + mlx5_sf_dev_table_create(dev); return 0; err_ec: @@ -1171,6 +1182,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) static void mlx5_unload(struct mlx5_core_dev *dev) { + mlx5_sf_dev_table_destroy(dev); mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c new file mode 100644 index 000000000000..a25f6027b7cd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include "mlx5_core.h" +#include "dev.h" + +struct mlx5_sf_dev_table { + /* Serializes table access between driver unload context and + * device add/remove user command context. + */ + struct mutex table_lock; + struct xarray devices; + unsigned int max_sfs; + phys_addr_t base_address; + u64 sf_bar_length; +}; + +static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf); +} + +static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", sf_dev->sfnum); +} +static DEVICE_ATTR_RO(sfnum); + +static struct attribute *sf_device_attrs[] = { + &dev_attr_sfnum.attr, + NULL, +}; + +static const struct attribute_group sf_attr_group = { + .attrs = sf_device_attrs, +}; + +static const struct attribute_group *sf_attr_groups[2] = { + &sf_attr_group, + NULL +}; + +static void mlx5_sf_dev_release(struct device *device) +{ + struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_adev_idx_free(sf_dev->adev.id); + kfree(sf_dev); +} + +static void mlx5_sf_dev_remove(struct mlx5_sf_dev *sf_dev) +{ + auxiliary_device_delete(&sf_dev->adev); + auxiliary_device_uninit(&sf_dev->adev); +} + +int mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u32 sfnum) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + struct mlx5_sf_dev *sf_dev; + struct pci_dev *pdev; + int id; + int err; + + id = mlx5_adev_idx_alloc(); + if (id < 0) + return id; + + sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL); + if (!sf_dev) { + mlx5_adev_idx_free(id); + return -ENOMEM; + } + pdev = dev->pdev; + sf_dev->adev.id = id; + sf_dev->adev.name = MLX5_SF_DEV_ID_NAME; + sf_dev->adev.dev.release = mlx5_sf_dev_release; + sf_dev->adev.dev.parent = &pdev->dev; + sf_dev->adev.dev.groups = sf_attr_groups; + sf_dev->sfnum = sfnum; + sf_dev->parent_mdev = dev; + + /* Serialize with unloading the driver. */ + mutex_lock(&table->table_lock); + if (!table->max_sfs) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + err = -EOPNOTSUPP; + goto add_err; + } + sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length); + + err = auxiliary_device_init(&sf_dev->adev); + if (err) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + goto add_err; + } + + err = auxiliary_device_add(&sf_dev->adev); + if (err) { + put_device(&sf_dev->adev.dev); + goto add_err; + } + + err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL); + if (err) + goto xa_err; + mutex_unlock(&table->table_lock); + return 0; + +xa_err: + mlx5_sf_dev_remove(sf_dev); +add_err: + mutex_unlock(&table->table_lock); + return err; +} + +void mlx5_sf_dev_del(struct mlx5_core_dev *dev, u16 sf_index) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + struct mlx5_sf_dev *sf_dev; + + mutex_lock(&table->table_lock); + sf_dev = xa_load(&table->devices, sf_index); + if (!sf_dev) + goto done; + + xa_erase(&table->devices, sf_index); + mlx5_sf_dev_remove(sf_dev); +done: + mutex_unlock(&table->table_lock); +} + +int mlx5_sf_dev_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table; + + if (!mlx5_sf_dev_supported(dev)) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + mutex_init(&table->table_lock); + xa_init(&table->devices); + dev->priv.sf_dev_table = table; + return 0; +} + +void mlx5_sf_dev_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!table) + return; + + WARN_ON(!xa_empty(&table->devices)); + mutex_destroy(&table->table_lock); + kfree(table); + dev->priv.sf_dev_table = NULL; +} + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + unsigned int max_sfs; + + if (!table) + return; + + /* Honor the caps changed during reload */ + if (!mlx5_sf_dev_supported(dev)) + return; + + max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf); + table->base_address = pci_resource_start(dev->pdev, 2); + table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12); + mutex_lock(&table->table_lock); + table->max_sfs = max_sfs; + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_sf_dev *sf_dev; + unsigned long index; + + xa_for_each(&table->devices, index, sf_dev) { + xa_erase(&table->devices, index); + mlx5_sf_dev_remove(sf_dev); + } +} + +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!table) + return; + + mutex_lock(&table->table_lock); + table->max_sfs = 0; + mlx5_sf_dev_destroy_all(table); + mutex_unlock(&table->table_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h new file mode 100644 index 000000000000..d81612122a45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_DEV_H__ +#define __MLX5_SF_DEV_H__ + +#ifdef CONFIG_MLX5_SF + +#include + +#define MLX5_SF_DEV_ID_NAME "sf" + +struct mlx5_sf_dev { + struct auxiliary_device adev; + struct mlx5_core_dev *parent_mdev; + phys_addr_t bar_base_addr; + u32 sfnum; +}; + +void __exit mlx5_sf_dev_exit(void); +int mlx5_sf_dev_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_cleanup(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u32 sfnum); +void mlx5_sf_dev_del(struct mlx5_core_dev *dev, u16 sf_index); + +#else + +static inline void __exit mlx5_sf_dev_exit(void) +{ +} + +static inline int mlx5_sf_dev_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_dev_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 28e9b2f17eb9..151cacab07db 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -507,6 +507,7 @@ struct mlx5_devcom; struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; +struct mlx5_sf_dev_table; struct mlx5_rate_limit { u32 rate; @@ -603,6 +604,9 @@ struct mlx5_priv { struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; +#ifdef CONFIG_MLX5_SF + struct mlx5_sf_dev_table *sf_dev_table; +#endif }; enum mlx5_device_state { From patchwork Thu Nov 12 19:24:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901265 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 370CE697 for ; Thu, 12 Nov 2020 19:24:59 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F287221D7F for ; Thu, 12 Nov 2020 19:24:58 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="lTsBIkX9" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726901AbgKLTY5 (ORCPT ); Thu, 12 Nov 2020 14:24:57 -0500 Received: from hqnvemgate24.nvidia.com ([216.228.121.143]:11355 "EHLO hqnvemgate24.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726884AbgKLTY4 (ORCPT ); Thu, 12 Nov 2020 14:24:56 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:25:03 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:54 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 08/13] net/mlx5: SF, Add auxiliary device driver Date: Thu, 12 Nov 2020 21:24:18 +0200 Message-ID: <20201112192424.2742-9-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209103; bh=4apZ+3wx5/uPC2lt0e8tLLWW3EG/y8PJM2AUoOTcS60=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=lTsBIkX9b4UYHK0eFpYitFTaRq3+echhP3CpJgplILl9W8z1BmT0is3ZIaMToYu67 9pPR7GpfPrCmL24eFy5lBEM6xPmtUnLJf/gdQP7OKk+dYwYR9VlWgdnHJW1NIeIjg8 8D6KxPBPFSriaawM5Q4O4w+5kD1VMvTKlhfe3/n0s/lV5t/QZxgCcOaUl+sb70Lxxs eb1SdRdrPW37srgATfpriR68YllQzlicS3xh5xNJnDNDjaWODJl1Wj+Jtdzh6MukMM yuRoBeBNZS7kpV7eCodMW6Mn3bOHllkTqBPp13zWJ0EzSXpJNVH2+5QWdsuQQt0oFh XTSkrM+mHExLw== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add auxiliary device driver for mlx5 subfunction auxiliary device. A mlx5 subfunction is similar to PCI PF and VF. For a subfunction an auxiliary device is created. As a result, when mlx5 SF auxiliary device binds to the driver, its netdev and rdma device are created, they appear as $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.0 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.0 $ ls -l /sys/class/net/eth1/device /sys/class/net/eth1/device -> ../../../mlx5_core.sf.0 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.0/sfnum 88 $ devlink dev show pci/0000:06:00.0 auxiliary/mlx5_core.sf.0 $ devlink port show auxiliary/mlx5_core.sf.0/1 auxiliary/mlx5_core.sf.0/1: type eth netdev p0sf88 flavour virtual port 0 splittable false $ rdma link show mlx5_0/1 link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88 $ rdma dev show 8: rocep6s0f1: node_type ca fw 16.27.1017 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112 13: mlx5_0: node_type ca fw 16.27.1017 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112 In future, devlink device instance name will adapt to have sfnum annotation using either an alias or as devlink instance name described in RFC [1]. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ Signed-off-by: Parav Pandit Reviewed-by: Vu Pham --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/main.c | 22 ++-- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 10 ++ .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 20 ++++ .../ethernet/mellanox/mlx5/core/sf/dev/dev.h | 13 +++ .../mellanox/mlx5/core/sf/dev/driver.c | 105 ++++++++++++++++++ include/linux/mlx5/driver.h | 4 +- 8 files changed, 168 insertions(+), 10 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index ee866da1d9ba..7dd5be49fb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -88,4 +88,4 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/dev/dev.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/dev/dev.o sf/dev/driver.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8ebfe782f95e..50c235e54c86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -465,7 +465,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); - eq_table->irq_table = dev->priv.irq_table; + eq_table->irq_table = mlx5_irq_table_get(dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a1ba6056952b..adfa21de938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -83,7 +83,6 @@ unsigned int mlx5_core_debug_mask; module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); -#define MLX5_DEFAULT_PROF 2 static unsigned int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); @@ -1295,7 +1294,7 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) mutex_unlock(&dev->intf_state_mutex); } -static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { struct mlx5_priv *priv = &dev->priv; int err; @@ -1345,7 +1344,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) return err; } -static void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -1684,16 +1683,24 @@ static int __init init(void) if (err) goto err_debug; + err = mlx5_sf_driver_register(); + if (err) + goto err_sf; + #ifdef CONFIG_MLX5_CORE_EN err = mlx5e_init(); - if (err) { - pci_unregister_driver(&mlx5_core_driver); - goto err_debug; - } + if (err) + goto err_eth; #endif return 0; +#ifdef CONFIG_MLX5_CORE_EN +err_eth: + mlx5_sf_driver_unregister(); +#endif +err_sf: + pci_unregister_driver(&mlx5_core_driver); err_debug: mlx5_unregister_debugfs(); return err; @@ -1704,6 +1711,7 @@ static void __exit cleanup(void) #ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); #endif + mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); mlx5_unregister_debugfs(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index dd7312621d0d..499aa76bf8d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -117,6 +117,8 @@ enum mlx5_semaphore_space_address { MLX5_SEMAPHORE_SW_RESET = 0x20, }; +#define MLX5_DEFAULT_PROF 2 + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); @@ -172,6 +174,7 @@ struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); @@ -253,6 +256,13 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); +static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_SF; +} + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); +void mlx5_mdev_uninit(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 6fd974920394..a61e09aff152 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -30,6 +30,9 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table; + if (mlx5_core_is_sf(dev)) + return 0; + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); if (!irq_table) return -ENOMEM; @@ -40,6 +43,9 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev) void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) { + if (mlx5_core_is_sf(dev)) + return; + kvfree(dev->priv.irq_table); } @@ -268,6 +274,9 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) int nvec; int err; + if (mlx5_core_is_sf(dev)) + return 0; + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); @@ -319,6 +328,9 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) struct mlx5_irq_table *table = dev->priv.irq_table; int i; + if (mlx5_core_is_sf(dev)) + return; + /* free_irq requires that affinity and rmap will be cleared * before calling it. This is why there is asymmetry with set_rmap * which should be called after alloc_irq but before request_irq. @@ -332,3 +344,11 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) kfree(table->irq); } +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.irq_table; +#endif + return dev->priv.irq_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h index d81612122a45..37634e3dedb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -13,6 +13,7 @@ struct mlx5_sf_dev { struct auxiliary_device adev; struct mlx5_core_dev *parent_mdev; + struct mlx5_core_dev *mdev; phys_addr_t bar_base_addr; u32 sfnum; }; @@ -26,6 +27,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); int mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u32 sfnum); void mlx5_sf_dev_del(struct mlx5_core_dev *dev, u16 sf_index); +int mlx5_sf_driver_register(void); +void mlx5_sf_driver_unregister(void); + #else static inline void __exit mlx5_sf_dev_exit(void) @@ -50,6 +54,15 @@ static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) { } +static inline int mlx5_sf_driver_register(void) +{ + return 0; +} + +static inline void mlx5_sf_driver_unregister(void) +{ +} + #endif #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c new file mode 100644 index 000000000000..10fe41c13a4a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include "mlx5_core.h" +#include "dev.h" +#include "devlink.h" + +static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct mlx5_core_dev *mdev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(); + if (!devlink) + return -ENOMEM; + + mdev = devlink_priv(devlink); + mdev->device = &adev->dev; + mdev->pdev = sf_dev->parent_mdev->pdev; + mdev->bar_addr = sf_dev->bar_base_addr; + mdev->iseg_base = sf_dev->bar_base_addr; + mdev->coredev_type = MLX5_COREDEV_SF; + mdev->priv.parent_mdev = sf_dev->parent_mdev; + mdev->priv.adev_idx = sf_dev->adev.id; + sf_dev->mdev = mdev; + + err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF); + if (err) { + mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); + goto mdev_err; + } + + mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); + if (!mdev->iseg) { + mlx5_core_warn(mdev, "remap error\n"); + goto remap_err; + } + + err = mlx5_load_one(mdev, true); + if (err) { + mlx5_core_warn(mdev, "mlx5_load_one err=%d\n", err); + goto load_one_err; + } + devlink_reload_enable(devlink); + return 0; + +load_one_err: + iounmap(mdev->iseg); +remap_err: + mlx5_mdev_uninit(mdev); +mdev_err: + mlx5_devlink_free(devlink); + return err; +} + +static int mlx5_sf_dev_remove(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct devlink *devlink; + + devlink = priv_to_devlink(sf_dev->mdev); + devlink_reload_disable(devlink); + mlx5_unload_one(sf_dev->mdev, true); + iounmap(sf_dev->mdev->iseg); + mlx5_mdev_uninit(sf_dev->mdev); + mlx5_devlink_free(devlink); + return 0; +} + +static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_unload_one(sf_dev->mdev, false); +} + +static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { + { .name = KBUILD_MODNAME "." MLX5_SF_DEV_ID_NAME, }, + { }, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table); + +static struct auxiliary_driver mlx5_sf_driver = { + .name = MLX5_SF_DEV_ID_NAME, + .probe = mlx5_sf_dev_probe, + .remove = mlx5_sf_dev_remove, + .shutdown = mlx5_sf_dev_shutdown, + .id_table = mlx5_sf_dev_id_table, +}; + +int mlx5_sf_driver_register(void) +{ + return auxiliary_driver_register(&mlx5_sf_driver); +} + +void mlx5_sf_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlx5_sf_driver); +} + diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 151cacab07db..f3104b50ade5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -193,7 +193,8 @@ enum port_state_policy { enum mlx5_coredev_type { MLX5_COREDEV_PF, - MLX5_COREDEV_VF + MLX5_COREDEV_VF, + MLX5_COREDEV_SF, }; struct mlx5_field_desc { @@ -606,6 +607,7 @@ struct mlx5_priv { struct mlx5_uars_page *uar; #ifdef CONFIG_MLX5_SF struct mlx5_sf_dev_table *sf_dev_table; + struct mlx5_core_dev *parent_mdev; #endif }; From patchwork Thu Nov 12 19:24:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901267 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id EF8BD697 for ; Thu, 12 Nov 2020 19:25:02 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id BDDED20B80 for ; Thu, 12 Nov 2020 19:25:02 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="lKVKPeiQ" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726925AbgKLTZA (ORCPT ); Thu, 12 Nov 2020 14:25:00 -0500 Received: from hqnvemgate26.nvidia.com ([216.228.121.65]:14174 "EHLO hqnvemgate26.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726898AbgKLTY5 (ORCPT ); Thu, 12 Nov 2020 14:24:57 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate26.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:25:01 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:55 +0000 From: Parav Pandit To: , , CC: , , , , , , , Vu Pham , Parav Pandit , Roi Dayan Subject: [PATCH net-next 09/13] net/mlx5: E-switch, Prepare eswitch to handle SF vport Date: Thu, 12 Nov 2020 21:24:19 +0200 Message-ID: <20201112192424.2742-10-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209101; bh=+QdD0DnoAEwckFSr4hAwtfSNQ5UmKtgLH+sUbnwKqEA=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=lKVKPeiQ968zLaPe/y3n5OomvB01RDErZg/Ygneo0cV3YKQaUSdxrjSm0Xu7YTj3u f91nHz1o+kZaI8VzeLkK9V8Wfxr7IwVJQdmGYUgjEWY9ja711owLOdVNbdEEdcdMcf lsWdrR/pd0oBY5pQF9oEBP2IIRn/UwydbKDI1i7LtTOknn37cHKBn21cBBIVeCz8pV L+1SeCQi8RNQbv8PRsYT0WSPozSQVYMolJevxK6ydu1/qQQ2z4fNLKx7OFOEObncur VpbXV2ZvLJMqgS/e1vqJwtsE30zpE2Cg9KSXVnnbjH5tUHiuuW9E7W4sygNdDLF4/6 Nkjes6P9G0yxA== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Vu Pham Prepare eswitch to handle SF vport during (a) querying eswitch functions (b) egress ACL creation (c) account for SF vports in total vports calculation Assign a dedicated placeholder for SFs vports and their representors. They are placed after VFs vports and before ECPF vports as below: [PF,VF0,...,VFn,SF0,...SFm,ECPF,UPLINK]. Change functions to map SF's vport numbers to indices when accessing the vports or representors arrays, and vice versa. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan --- .../net/ethernet/mellanox/mlx5/core/Kconfig | 10 ++++ .../mellanox/mlx5/core/esw/acl/egress_ofld.c | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 55 +++++++++++++++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 11 ++++ .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 35 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/vport.c | 3 +- 7 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 10dfaf671c90..11d5e0e99bd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -211,3 +211,13 @@ config MLX5_SF Build support for subfuction device in the NIC. A Mellanox subfunction device can support RDMA, netdevice and vdpa device. It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_SF_MANAGER + bool + depends on MLX5_SF && MLX5_ESWITCH + default y + help + Build support for subfuction port in the NIC. A Mellanox subfunction + port is managed through devlink. A subfunction supports RDMA, netdevice + and vdpa device. It is similar to a SRIOV VF but it doesn't require + SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index c3faae67e4d6..45758ff3c14e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -150,7 +150,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num) { - return mlx5_eswitch_is_vf_vport(esw, vport_num); + return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num); } int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b44f28fb5518..5b90f126b7f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1369,9 +1369,15 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) { int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u16 max_sf_vports; u32 *out; int err; + max_sf_vports = mlx5_sf_max_ports(dev); + /* Device interface is array of 64-bits */ + if (max_sf_vports) + outlen += DIV_ROUND_UP(max_sf_vports, BITS_PER_TYPE(__be64)) * sizeof(__be64); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return ERR_PTR(-ENOMEM); @@ -1379,7 +1385,7 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) MLX5_SET(query_esw_functions_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); - err = mlx5_cmd_exec_inout(dev, query_esw_functions, in, out); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); if (!err) return out; @@ -1874,7 +1880,8 @@ static bool is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num) { return vport_num == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); } int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf87de94418f..2165bc065196 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -43,6 +43,7 @@ #include #include "lib/mpfs.h" #include "lib/fs_chains.h" +#include "sf/sf.h" #include "en/tc_ct.h" #ifdef CONFIG_MLX5_ESWITCH @@ -499,6 +500,45 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; } +static inline u16 mlx5_esw_sf_start_vport_num(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf_base_id); +} + +static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw) +{ + /* PF and VF vports indices start from 0 to max_vfs */ + return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); +} + +static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw) +{ + return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_ports(esw->dev); +} + +static inline int +mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num - mlx5_esw_sf_start_vport_num(esw->dev) + + MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); +} + +static inline u16 +mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx) +{ + return mlx5_esw_sf_start_vport_num(esw->dev) + idx - + (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev)); +} + +static inline bool +mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_sf_supported(esw->dev) && + vport_num >= mlx5_esw_sf_start_vport_num(esw->dev) && + (vport_num < (mlx5_esw_sf_start_vport_num(esw->dev) + + mlx5_sf_max_ports(esw->dev))); +} + static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev); @@ -527,6 +567,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, if (vport_num == MLX5_VPORT_UPLINK) return mlx5_eswitch_uplink_idx(esw); + if (mlx5_esw_is_sf_vport(esw, vport_num)) + return mlx5_esw_sf_vport_num_to_index(esw, vport_num); + + /* PF and VF vports start from 0 to max_vfs */ return vport_num; } @@ -540,6 +584,12 @@ static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, if (index == mlx5_eswitch_uplink_idx(esw)) return MLX5_VPORT_UPLINK; + /* SF vports indices are after VFs and before ECPF */ + if (mlx5_sf_supported(esw->dev) && + index > mlx5_core_max_vfs(esw->dev)) + return mlx5_esw_sf_vport_index_to_num(esw, index); + + /* PF and VF vports start from 0 to max_vfs */ return index; } @@ -625,6 +675,11 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); for ((vport) = (nvfs); \ (vport) >= (esw)->first_host_vport; (vport)--) +#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ + for ((i) = mlx5_esw_sf_start_idx(esw); \ + (rep) = &(esw)->offloads.vport_reps[(i)], \ + (i) < mlx5_esw_sf_end_idx(esw); (i++)) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 429dc613530b..01242afbfcce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1801,11 +1801,22 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } +static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_sf_rep(esw, i, rep) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; int i; + __unload_reps_sf_vport(esw, rep_type); + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) __esw_offloads_unload_rep(esw, rep, rep_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h new file mode 100644 index 000000000000..7b9071a865ce --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_H__ +#define __MLX5_SF_H__ + +#include + +#ifdef CONFIG_MLX5_SF_MANAGER + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf); +} + +static inline u16 mlx5_sf_max_ports(const struct mlx5_core_dev *dev) +{ + return mlx5_sf_supported(dev) ? 1 << MLX5_CAP_GEN(dev, log_max_sf) : 0; +} + +#else + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return false; +} + +static inline u16 mlx5_sf_max_ports(const struct mlx5_core_dev *dev) +{ + return 0; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bdafc85fd874..233aa8242916 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_core.h" +#include "sf/sf.h" /* Mutex to hold while enabling or disabling RoCE */ static DEFINE_MUTEX(mlx5_roce_en_lock); @@ -1160,6 +1161,6 @@ EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); */ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) { - return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev); + return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_ports(dev); } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); From patchwork Thu Nov 12 19:24:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901275 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 65391697 for ; Thu, 12 Nov 2020 19:25:15 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3AC772222F for ; Thu, 12 Nov 2020 19:25:15 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="CAcvkw6y" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726876AbgKLTZK (ORCPT ); Thu, 12 Nov 2020 14:25:10 -0500 Received: from hqnvemgate24.nvidia.com ([216.228.121.143]:11362 "EHLO hqnvemgate24.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726912AbgKLTY6 (ORCPT ); Thu, 12 Nov 2020 14:24:58 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:25:06 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:57 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham , Roi Dayan Subject: [PATCH net-next 10/13] net/mlx5: E-switch, Add eswitch helpers for SF vport Date: Thu, 12 Nov 2020 21:24:20 +0200 Message-ID: <20201112192424.2742-11-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209106; bh=X7wkyOLBY1v8uaw9RgSYUbc7p4gr/Rh3LJuyyQDy0gc=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=CAcvkw6yVqcHbp8CvBdW4kbH1mjF5xY90jKZYFSfguRvTIfcTlYwoeZDD1ys923sy iO24Nopdvc8RvHRFfFujq80KQJH8GzvM/r2sWJhQPvXNcvS5xbUEPP2gWkn2j9q+cv FfL76fImB4Qqlb3qNpymq3lb979Ezbq8JapqZEJVokZAcU/xZNxcBIkuafgz4+qfVR Ue5oOx2zNcfBIYjgvU9qrgYvEEqivdCziIktsWumPvI8IC1TKVr1WLs7zrPMYWk+VY J70P6PD5l23bxgE0ChaGYAdCpxy5p0u6vQihqKfmKtesaI0li7cOssQpTU8w3tScjw d9P31y/0fZo4Q== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Add helpers to enable/disable eswitch port, register its devlink port and load its representor. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan --- .../mellanox/mlx5/core/esw/devlink_port.c | 41 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 10 +++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 +++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 36 +++++++++++++++- 4 files changed, 100 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 88688b84513b..f361a896c278 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -122,3 +122,44 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 vport = mlx5_eswitch_get_vport(esw, vport_num); return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; } + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + unsigned int dl_port_index; + struct mlx5_vport *vport; + struct devlink *devlink; + u16 pfnum; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + pfnum = PCI_FUNC(dev->pdev->devfn); + mlx5_esw_get_port_parent_id(dev, &ppid); + memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum, false); + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devlink_port_register(devlink, dl_port, dl_port_index); + if (err) + return err; + + vport->dl_port = dl_port; + return 0; +} + +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + devlink_port_unregister(vport->dl_port); + vport->dl_port = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5b90f126b7f3..d72766b78bd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1342,6 +1342,16 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, u16 vport_num) mutex_unlock(&esw->state_lock); } +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num) +{ + return esw_enable_vport(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE); +} + +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +{ + esw_disable_vport(esw, vport_num); +} + static int eswitch_vport_event(struct notifier_block *nb, unsigned long type, void *data) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2165bc065196..3a373f314a6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -711,6 +711,9 @@ esw_get_max_restore_tag(struct mlx5_eswitch *esw); int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num); + int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); @@ -722,6 +725,18 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum); +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum); +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 01242afbfcce..14f73c202adf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1834,7 +1834,7 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) __esw_offloads_unload_rep(esw, rep, rep_type); } -static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -1858,7 +1858,7 @@ static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) return err; } -static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2842,3 +2842,35 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum) +{ + int err; + + err = mlx5_esw_vport_enable(esw, vport_num); + if (err) + return err; + + err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum); + if (err) + goto devlink_err; + + err = mlx5_esw_offloads_rep_load(esw, vport_num); + if (err) + goto rep_err; + return 0; + +rep_err: + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); +devlink_err: + mlx5_esw_vport_disable(esw, vport_num); + return err; +} + +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +{ + mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); +} From patchwork Thu Nov 12 19:24:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901273 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5571F697 for ; Thu, 12 Nov 2020 19:25:11 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2A56421D7F for ; Thu, 12 Nov 2020 19:25:11 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="bx/iCrUn" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726970AbgKLTZJ (ORCPT ); Thu, 12 Nov 2020 14:25:09 -0500 Received: from hqnvemgate25.nvidia.com ([216.228.121.64]:11890 "EHLO hqnvemgate25.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726884AbgKLTY7 (ORCPT ); Thu, 12 Nov 2020 14:24:59 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate25.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:24:53 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:58 +0000 From: Parav Pandit To: , , CC: , , , , , , , Vu Pham , Parav Pandit Subject: [PATCH net-next 11/13] net/mlx5: SF, Add SF configuration hardware commands Date: Thu, 12 Nov 2020 21:24:21 +0200 Message-ID: <20201112192424.2742-12-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209093; bh=5i1PQZ1/8jjIYq9/gILm63zgTWgNxBLAtrpxiC4y5bQ=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=bx/iCrUn6h+eWSj6AdY0eOX5CC7KOXUAxU62W4DLGjdTBCvPHo5ynkIqLIMlYK/Wx D2NFnEZkzpm5mp5r1pDOU2yQAdPH3j6m3D3BskLyZfvXuxASw/XM2FB0vVIholsVxJ yoSEM8RDdlkUY5IIceg+L5zfflFOun5y77zZfToJ3x4PdJJ9SP4mQ5niNSWrk6DOO9 /IELPr5UyWEjfW438Tt3JqEZW1HPhDcf9KkPZBoqkCmrgK2ywiVG79qTZ62cMchmzm faBgHYw67x/jGBmFwgClN6pcIR6DTDd7qgSxXYK2YMhI1nsaZE8wO/nGTs0n9rN+Nf xZHbVQU/wBFFA== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Vu Pham Add command helpers to access SF port and function in device. Enable SF HCA port capability when such configuration is enabled and supported in a device. Use them in subsequent patches. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit --- .../net/ethernet/mellanox/mlx5/core/Makefile | 5 ++ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++ .../net/ethernet/mellanox/mlx5/core/main.c | 5 ++ .../net/ethernet/mellanox/mlx5/core/sf/cmd.c | 48 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/sf/priv.h | 14 ++++++ 5 files changed, 76 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 7dd5be49fb9e..911e7bb43b23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -89,3 +89,8 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # SF device # mlx5_core-$(CONFIG_MLX5_SF) += sf/dev/dev.o sf/dev/driver.o + +# +# SF manager +# +mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e49387dbef98..7de8139bc167 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -333,6 +333,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: + case MLX5_CMD_OP_DEALLOC_SF: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -464,6 +465,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_MEMIC: case MLX5_CMD_OP_MODIFY_XRQ: case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_ALLOC_SF: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -657,6 +659,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DESTROY_UMEM); MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); MLX5_COMMAND_STR_CASE(MODIFY_XRQ); + MLX5_COMMAND_STR_CASE(ALLOC_SF); + MLX5_COMMAND_STR_CASE(DEALLOC_SF); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index adfa21de938e..bd414d93f70e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -567,6 +567,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); +#ifdef CONFIG_MLX5_SF_MANAGER + if (MLX5_CAP_GEN_MAX(dev, sf) && MLX5_ESWITCH_MANAGER(dev)) + MLX5_SET(cmd_hca_cap, set_hca_cap, sf, 1); +#endif + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c new file mode 100644 index 000000000000..8dd44a2b2467 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {}; + + MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF); + MLX5_SET(alloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {}; + + MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF); + MLX5_SET(dealloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h new file mode 100644 index 000000000000..0e39df9f297e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_PRIV_H__ +#define __MLX5_SF_PRIV_H__ + +#include + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id); + +#endif From patchwork Thu Nov 12 19:24:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901271 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 75DE116C1 for ; Thu, 12 Nov 2020 19:25:05 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 33DBD2223F for ; Thu, 12 Nov 2020 19:25:05 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="TWi2UBrO" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726955AbgKLTZE (ORCPT ); Thu, 12 Nov 2020 14:25:04 -0500 Received: from hqnvemgate26.nvidia.com ([216.228.121.65]:14179 "EHLO hqnvemgate26.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726918AbgKLTZC (ORCPT ); Thu, 12 Nov 2020 14:25:02 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate26.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:25:04 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:24:59 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 12/13] net/mlx5: SF, Add port add delete functionality Date: Thu, 12 Nov 2020 21:24:22 +0200 Message-ID: <20201112192424.2742-13-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209104; bh=5gstZLtO626K6kOP6F234msF+sAiFuprZh30W0/Blzg=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=TWi2UBrOXbE4NB/EZ2a5XilrRw8nQ0o5bfaKtOdXOBwnkC4UBcnBfin9E2f99a6qX Fh3oHOHVslzQ4H7M9EhLMMVaOdDnR+y9BQJ4QViqz0VttRsgJ//bv27lf2SZttXM/Q cpZrQbE9BhEWHWT7nqP6S3CXLKiBIPsPxyZKqsOXOVf4wxRBWEaAoV8NursG7JzVmW jX9Bo5zMO3/MMH+gMEFKfLeNQr8AIZCEbTWfWzCjb5lV1cMZDgRfkTVEdIGUAaCnrG 9Qg/hDLj+TipV11bVC5tRXvjt7nvF027nwj3Q8CI7AFVFieeIjxW/oScH+TfKuZ3DO OgUlPIjoTkfwQ== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org To handle SF port management outside of the eswitch as independent software layer, introduce eswitch notifier APIs so that upper layer who wish to support sf port management in switchdev mode can perform its task whenever eswitch mode is set to switchdev or before eswitch is disabled. (a) Initialize sf port table on such eswitch event. (b) Add SF port add and delete functionality in switchdev mode. (c) Destroy all SF ports when eswitch is disabled. (d) Expose SF port add and delete to user via devlink commands. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port show ens2f0npf0sf88 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "inactive", "opstate": "detached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Vu Pham --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/devlink.c | 5 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 25 ++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 + .../net/ethernet/mellanox/mlx5/core/main.c | 9 + .../net/ethernet/mellanox/mlx5/core/sf/sf.c | 370 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 17 + include/linux/mlx5/driver.h | 4 + 8 files changed, 443 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 911e7bb43b23..b1d7f193375a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -93,4 +93,4 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/dev/dev.o sf/dev/driver.o # # SF manager # -mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o +mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/sf.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index aeffb6b135ee..7ad8dc26cb74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "sf/sf.h" static int mlx5_devlink_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -187,6 +188,10 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, +#endif +#ifdef CONFIG_MLX5_SF_MANAGER + .port_new = mlx5_devlink_sf_port_new, + .port_del = mlx5_devlink_sf_port_del, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d72766b78bd7..25f8c0918fca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1585,6 +1585,15 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) kvfree(out); } +static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) +{ + struct mlx5_esw_event_info info = {}; + + info.new_mode = mode; + + blocking_notifier_call_chain(&esw->n_head, 0, &info); +} + /** * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch @@ -1645,6 +1654,8 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + mlx5_esw_mode_change_notify(esw, mode); + return 0; abort: @@ -1701,6 +1712,11 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + /* Notify eswitch users that it is exiting from current mode. + * So that it can do necessary cleanup before the eswitch is disabled. + */ + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE); + mlx5_eswitch_event_handlers_unregister(esw); if (esw->mode == MLX5_ESWITCH_LEGACY) @@ -1801,6 +1817,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; dev->priv.eswitch = esw; + BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); return 0; abort: if (esw->work_queue) @@ -2493,4 +2510,12 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); } +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&esw->n_head, nb); +} +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&esw->n_head, nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3a373f314a6b..fb26690a0ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -278,6 +278,7 @@ struct mlx5_eswitch { struct { u32 large_group_num; } params; + struct blocking_notifier_head n_head; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -737,6 +738,17 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p u16 vport_num, u32 sfnum); void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +/** + * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * + * @new_mode: New mode of eswitch. + */ +struct mlx5_esw_event_info { + u16 new_mode; +}; + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n); +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bd414d93f70e..f6570ce9393f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -889,6 +889,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + err = mlx5_sf_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF table %d\n", err); + goto err_sf_table_cleanup; + } + err = mlx5_sf_dev_table_init(dev); if (err) { mlx5_core_err(dev, "Failed to init SF device table %d\n", err); @@ -906,6 +912,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; err_sf_dev_cleanup: + mlx5_sf_table_cleanup(dev); +err_sf_table_cleanup: mlx5_fpga_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); @@ -939,6 +947,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); mlx5_sf_dev_table_cleanup(dev); + mlx5_sf_table_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c new file mode 100644 index 000000000000..dff44ab5057d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "eswitch.h" +#include "priv.h" + +struct mlx5_sf { + struct devlink_port dl_port; + unsigned int port_index; + u32 usr_sfnum; + u16 sw_id; + u16 hw_fn_id; +}; + +struct mlx5_sf_table { + struct mlx5_core_dev *dev; /* To refer from notifier context. */ + struct xarray port_indices; /* port index based lookup. */ + struct ida fn_ida; /* allocator based on firmware limits. */ + int ida_max_ports; + refcount_t refcount; + struct completion disable_complete; + struct notifier_block esw_nb; +}; + +static u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id) +{ + return sw_id + mlx5_esw_sf_start_vport_num(dev); +} + +static int mlx5_sf_id_alloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + u16 hw_fn_id; + int sw_id; + int err; + + if (!table->ida_max_ports) + return -EOPNOTSUPP; + sw_id = ida_alloc_max(&table->fn_ida, table->ida_max_ports - 1, GFP_KERNEL); + if (sw_id < 0) + return sw_id; + sf->sw_id = sw_id; + + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id); + err = mlx5_cmd_alloc_sf(table->dev, hw_fn_id); + if (err) + goto cmd_err; + + sf->hw_fn_id = hw_fn_id; + return 0; + +cmd_err: + ida_free(&table->fn_ida, sf->sw_id); + return err; +} + +static void mlx5_sf_id_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + mlx5_cmd_dealloc_sf(table->dev, sf->hw_fn_id); + ida_free(&table->fn_ida, sf->sw_id); +} + +static struct mlx5_sf * +mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index) +{ + return xa_load(&table->port_indices, port_index); +} + +static struct mlx5_sf * +mlx5_sf_lookup_by_sfnum(struct mlx5_sf_table *table, u32 usr_sfnum) +{ + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) { + if (sf->usr_sfnum == usr_sfnum) + return sf; + } + return NULL; +} + +static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + int err; + + err = xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL); + /* After this stage, SF can be queried by devlink user by it port index. */ + return err; +} + +static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + xa_erase(&table->port_indices, sf->port_index); +} + +static struct mlx5_sf * +mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack) +{ + unsigned int dl_port_index; + struct mlx5_sf *sf; + int err; + + sf = mlx5_sf_lookup_by_sfnum(table, sfnum); + if (sf) { + NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum"); + err = -EEXIST; + goto err; + } + sf = kzalloc(sizeof(*sf), GFP_KERNEL); + if (!sf) { + err = -ENOMEM; + goto err; + } + err = mlx5_sf_id_alloc(table, sf); + if (err) + goto id_err; + + dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, sf->hw_fn_id); + sf->port_index = dl_port_index; + sf->usr_sfnum = sfnum; + + err = mlx5_sf_id_insert(table, sf); + if (err) + goto insert_err; + + return sf; + +insert_err: + mlx5_sf_id_free(table, sf); +id_err: + kfree(sf); +err: + return ERR_PTR(err); +} + +static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + mlx5_sf_id_erase(table, sf); + mlx5_sf_id_free(table, sf); + kfree(sf); +} + +static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return NULL; + + return refcount_inc_not_zero(&table->refcount) ? table : NULL; +} + +static void mlx5_sf_table_put(struct mlx5_sf_table *table) +{ + if (refcount_dec_and_test(&table->refcount)) + complete(&table->disable_complete); +} + +static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf *sf; + u16 hw_fn_id; + int err; + + sf = mlx5_sf_alloc(table, new_attr->sfnum, extack); + if (IS_ERR(sf)) + return PTR_ERR(sf); + + hw_fn_id = sf->hw_fn_id; + err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, hw_fn_id, new_attr->sfnum); + if (err) + goto esw_err; + return 0; + +esw_err: + mlx5_sf_free(table, sf); + return err; +} + +static void mlx5_sf_del(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_sf_free(table, sf); +} + +static int +mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) { + NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition."); + return -EOPNOTSUPP; + } + if (new_attr->port_index_valid) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support user defined port index assignment."); + return -EOPNOTSUPP; + } + if (!new_attr->sfnum_valid) { + NL_SET_ERR_MSG_MOD(extack, + "User must provide unique sfnum. Driver does not support auto assignment."); + return -EOPNOTSUPP; + } + if (new_attr->controller_valid && new_attr->controller) { + NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported."); + return -EOPNOTSUPP; + } + if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied."); + return -EOPNOTSUPP; + } + return 0; +} + +int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + int err; + + err = mlx5_sf_new_check_attr(dev, new_attr, extack); + if (err) + return err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + err = mlx5_sf_add(dev, table, new_attr, extack); + mlx5_sf_table_put(table); + return err; +} + +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, port_index); + if (!sf) { + err = -ENODEV; + goto sf_err; + } + + mlx5_sf_del(dev, table, sf); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static void mlx5_sf_destroy_all(struct mlx5_sf_table *table) +{ + struct mlx5_core_dev *dev = table->dev; + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) + mlx5_sf_del(dev, table, sf); +} + +static void mlx5_sf_table_enable(struct mlx5_sf_table *table) +{ + if (!mlx5_sf_max_ports(table->dev)) + return; + + xa_init(&table->port_indices); + table->ida_max_ports = mlx5_sf_max_ports(table->dev); + init_completion(&table->disable_complete); + refcount_set(&table->refcount, 1); +} + +void mlx5_sf_table_disable(struct mlx5_sf_table *table) +{ + if (!mlx5_sf_max_ports(table->dev)) + return; + + if (!refcount_read(&table->refcount)) + return; + + /* Balances with refcount_set; drop the reference so that new user cmd cannot start. */ + mlx5_sf_table_put(table); + wait_for_completion(&table->disable_complete); + + /* At this point, no new user commands can start. + * It is safe to destroy all user created SFs. + */ + mlx5_sf_destroy_all(table); + WARN_ON(!xa_empty(&table->port_indices)); +} + +static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb); + const struct mlx5_esw_event_info *mode = data; + + switch (mode->new_mode) { + case MLX5_ESWITCH_OFFLOADS: + mlx5_sf_table_enable(table); + break; + case MLX5_ESWITCH_NONE: + mlx5_sf_table_disable(table); + break; + default: + break; + }; + + return 0; +} + +static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) +{ + return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev); +} + +int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table; + int err; + + if (!mlx5_sf_table_supported(dev)) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->dev = dev; + ida_init(&table->fn_ida); + dev->priv.sf_table = table; + table->esw_nb.notifier_call = mlx5_sf_esw_event; + err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); + if (err) + goto reg_err; + return 0; + +reg_err: + kfree(table); + dev->priv.sf_table = NULL; + return err; +} + +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return; + + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); + WARN_ON(refcount_read(&table->refcount)); + WARN_ON(!ida_is_empty(&table->fn_ida)); + kfree(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 7b9071a865ce..555b19a5880d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -18,6 +18,14 @@ static inline u16 mlx5_sf_max_ports(const struct mlx5_core_dev *dev) return mlx5_sf_supported(dev) ? 1 << MLX5_CAP_GEN(dev, log_max_sf) : 0; } +int mlx5_sf_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); + #else static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) @@ -30,6 +38,15 @@ static inline u16 mlx5_sf_max_ports(const struct mlx5_core_dev *dev) return 0; } +static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ +} + #endif #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f3104b50ade5..e625cb20ad76 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -509,6 +509,7 @@ struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; struct mlx5_sf_dev_table; +struct mlx5_sf_table; struct mlx5_rate_limit { u32 rate; @@ -609,6 +610,9 @@ struct mlx5_priv { struct mlx5_sf_dev_table *sf_dev_table; struct mlx5_core_dev *parent_mdev; #endif +#ifdef CONFIG_MLX5_SF_MANAGER + struct mlx5_sf_table *sf_table; +#endif }; enum mlx5_device_state { From patchwork Thu Nov 12 19:24:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Parav Pandit X-Patchwork-Id: 11901269 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E51961391 for ; Thu, 12 Nov 2020 19:25:04 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B599A2222F for ; Thu, 12 Nov 2020 19:25:04 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="OZk/cVCJ" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726950AbgKLTZD (ORCPT ); Thu, 12 Nov 2020 14:25:03 -0500 Received: from hqnvemgate24.nvidia.com ([216.228.121.143]:11377 "EHLO hqnvemgate24.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726930AbgKLTZB (ORCPT ); Thu, 12 Nov 2020 14:25:01 -0500 Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 12 Nov 2020 11:25:09 -0800 Received: from sw-mtx-036.mtx.labs.mlnx (172.20.13.39) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 12 Nov 2020 19:25:00 +0000 From: Parav Pandit To: , , CC: , , , , , , , Parav Pandit , Vu Pham Subject: [PATCH net-next 13/13] net/mlx5: SF, Port function state change support Date: Thu, 12 Nov 2020 21:24:23 +0200 Message-ID: <20201112192424.2742-14-parav@nvidia.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20201112192424.2742-1-parav@nvidia.com> References: <20201112192424.2742-1-parav@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [172.20.13.39] X-ClientProxiedBy: HQMAIL107.nvidia.com (172.20.187.13) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1605209109; bh=BPkdJ1ifA/bO+m9fBGi+nxds8HnjhAL3G/KQIhHvcAk=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Transfer-Encoding:Content-Type: X-Originating-IP:X-ClientProxiedBy; b=OZk/cVCJmKcI2fb+0Z1Zi/h9ZLrga56KrZ1PPsPA/XOoZquIfCXKE3Yb89P4C+3Sj rvdYa/mgRK0gJTLj7pKFgL6tNhQvIcOkYm4f5W1m1w6GjD4zpDOoHTubxSgrxohRVG A0kmfjVR0TYRR+37fx93TK2GApcAXIHPs6My0knGuYne0okLUMkT5b9LG2LzRTkCLb xjI+GrOZ7J/f2YaD9Q/X9YdEUkygeEWfA00HFIcXUWsk0sKnehWtg79xc5jKwgvSpA +JaWRtdJ/z3g8IzTK2d7qEYcFqWPbVo+ocW0Sw2pE2lzNUhWRBgGM7IjdsS9SoH02G 1g4oaVJdekcdg== Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Support changing the state of the SF port's function through devlink. When activating the SF port's function, enable the hca in the device followed by adding its auxiliary device. When deactivating the SF port's function, delete its auxiliary device followed by disabling the HCA. Port function attributes get/set callbacks are invoked with devlink instance lock held. Such callbacks need to synchronize with sf port table getting disabled. These callbacks while operating on the devlink port, synchronize with table disable context by holding table refcount. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 state active $ devlink port show ens2f0npf0sf88 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } On port function activation, an auxiliary device is created in below example. $ devlink dev show devlink dev show auxiliary/mlx5_core.sf.0 $ devlink port show auxiliary/mlx5_core.sf.0/1 auxiliary/mlx5_core.sf.0/1: type eth netdev p0sf88 flavour virtual port 0 splittable false Signed-off-by: Parav Pandit Reviewed-by: Vu Pham --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 + .../net/ethernet/mellanox/mlx5/core/sf/sf.c | 128 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 7 + 3 files changed, 137 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 7ad8dc26cb74..22d22959e6f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -192,6 +192,8 @@ static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, .port_del = mlx5_devlink_sf_port_del, + .port_function_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_function_state_set = mlx5_devlink_sf_port_fn_state_set, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c index dff44ab5057d..7e90629fd910 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.c @@ -4,6 +4,7 @@ #include #include "eswitch.h" #include "priv.h" +#include "sf/dev/dev.h" struct mlx5_sf { struct devlink_port dl_port; @@ -11,6 +12,7 @@ struct mlx5_sf { u32 usr_sfnum; u16 sw_id; u16 hw_fn_id; + enum devlink_port_function_state state; }; struct mlx5_sf_table { @@ -115,6 +117,7 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex if (err) goto id_err; + sf->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE; dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, sf->hw_fn_id); sf->port_index = dl_port_index; sf->usr_sfnum = sfnum; @@ -156,6 +159,126 @@ static void mlx5_sf_table_put(struct mlx5_sf_table *table) complete(&table->disable_complete); } +static int +mlx5_sf_state_get(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + enum devlink_port_function_state *state, + enum devlink_port_function_opstate *opstate) +{ + int err = 0; + + *state = sf->state; + switch (sf->state) { + case DEVLINK_PORT_FUNCTION_STATE_ACTIVE: + *opstate = DEVLINK_PORT_FUNCTION_OPSTATE_ATTACHED; + break; + case DEVLINK_PORT_FUNCTION_STATE_INACTIVE: + *opstate = DEVLINK_PORT_FUNCTION_OPSTATE_DETACHED; + break; + default: + err = -EINVAL; + break; + } + return err; +} + +int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_function_state *state, + enum devlink_port_function_opstate *opstate, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + int err = -EOPNOTSUPP; + struct mlx5_sf *sf; + + table = mlx5_sf_table_try_get(dev); + if (!table) + return -EOPNOTSUPP; + + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) + goto sf_err; + err = mlx5_sf_state_get(dev, sf, state, opstate); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + err = mlx5_sf_dev_add(dev, sf->sw_id, sf->usr_sfnum); + if (err) + goto dev_err; + + sf->state = DEVLINK_PORT_FUNCTION_STATE_ACTIVE; + return 0; + +dev_err: + mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + return err; +} + +static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + mlx5_sf_dev_del(dev, sf->sw_id); + err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + if (err) + return err; + sf->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE; + return 0; +} + +static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + enum devlink_port_function_state state) +{ + int err; + + if (sf->state == state) + return 0; + if (state == DEVLINK_PORT_FUNCTION_STATE_ACTIVE) + err = mlx5_sf_activate(dev, sf); + else if (state == DEVLINK_PORT_FUNCTION_STATE_INACTIVE) + err = mlx5_sf_deactivate(dev, sf); + else + err = -EINVAL; + return err; +} + +int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_function_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port state set is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -ENODEV; + goto out; + } + + err = mlx5_sf_state_set(dev, sf, state); +out: + mlx5_sf_table_put(table); + return err; +} + static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, const struct devlink_port_new_attrs *new_attr, struct netlink_ext_ack *extack) @@ -184,6 +307,10 @@ static void mlx5_sf_del(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, { struct mlx5_eswitch *esw = dev->priv.eswitch; + if (sf->state == DEVLINK_PORT_FUNCTION_STATE_ACTIVE) { + mlx5_sf_dev_del(dev, sf->sw_id); + mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + } mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); mlx5_sf_free(table, sf); } @@ -343,6 +470,7 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev) table->dev = dev; ida_init(&table->fn_ida); + refcount_set(&table->refcount, 0); dev->priv.sf_table = table; table->esw_nb.notifier_call = mlx5_sf_esw_event; err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 555b19a5880d..3d1c459b9936 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -25,6 +25,13 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_ struct netlink_ext_ack *extack); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_function_state *state, + enum devlink_port_function_opstate *opstate, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_function_state state, + struct netlink_ext_ack *extack); #else