From patchwork Tue Jun 7 11:32:43 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12871797 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 940FFC433EF for ; Tue, 7 Jun 2022 11:33:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S242955AbiFGLdG (ORCPT ); Tue, 7 Jun 2022 07:33:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50380 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S242964AbiFGLc6 (ORCPT ); Tue, 7 Jun 2022 07:32:58 -0400 Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 915501A3BD for ; Tue, 7 Jun 2022 04:32:56 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 31452B81F6E for ; Tue, 7 Jun 2022 11:32:55 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 60AAAC385A5; Tue, 7 Jun 2022 11:32:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1654601573; bh=4VaX92GkdjV0xbwfl3ywmRyN/2xR72sDFNE99OytVTU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VHtVuVVk8ZDfJcemsp91d/eBWzmAdpyuMqkbbTTiJt1ucbH2u3+SMTvTUsJ5IcetO nuVSaug/fhPVkoKUeEyM8guXzwHK5LrayL3RFGwm9uHFhKDULdZOMJ/Xiypz3mRv4g r6vufZivK7S7RjJTTkzlwP5HlZg2JXqr2/avUqFrqPc7yuGMjutUGH3wcukb06e7J6 G4nAWSR3fycSYQgkxzymX3Trhd9DPG/Tg+oY68B97kj4O7b27RbMULOyDktr42ow5r K4yOFO7eee+uvdkiZbkZ1NkTuRYDS1zvUoRDLw/rVs0o3E2AjKmCxDunfzwiDJ3kXc 7yN+3FGlDt21A== From: Leon Romanovsky To: Jason Gunthorpe Cc: Patrisious Haddad , linux-rdma@vger.kernel.org, Mark Zhang Subject: [PATCH rdma-next v2 1/2] RDMA/core: Add an rb_tree that stores cm_ids sorted by ifindex and remote IP Date: Tue, 7 Jun 2022 14:32:43 +0300 Message-Id: <2fac52c86cc918c634ab24b3867d4aed992f54ec.1654601342.git.leonro@nvidia.com> X-Mailer: git-send-email 2.36.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Patrisious Haddad Add to the cma, a tree that keeps track of all rdma_id_private channels that were created while in RoCE mode. The IDs are sorted first according to their netdevice ifindex then their destination IP. And for IDs with matching IP they would be at the same node in the tree, since the tree data is a list of all ids with matching destination IP. The tree allows fast and efficient lookup of ids using an ifindex and IP address which is useful for identifying relevant net_events promptly. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 149 ++++++++++++++++++++++++++--- drivers/infiniband/core/cma_priv.h | 1 + 2 files changed, 138 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fabca5e51e3d..0a17b1bb9547 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -168,6 +169,9 @@ static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); +static struct rb_root id_table = RB_ROOT; +/* Serialize operations of id_table tree */ +static DEFINE_SPINLOCK(id_table_lock); static struct workqueue_struct *cma_wq; static unsigned int cma_pernet_id; @@ -202,6 +206,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) } } +struct id_table_entry { + struct list_head id_list; + struct rb_node rb_node; +}; + struct cma_device { struct list_head list; struct ib_device *device; @@ -420,11 +429,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) return hdr->ip_version >> 4; } -static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } +static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.dst_addr; +} + static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) { struct in_device *in_dev = NULL; @@ -445,6 +464,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) return (in_dev) ? 0 : -ENODEV; } +static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa, + struct id_table_entry *entry_b) +{ + struct rdma_id_private *id_priv = list_first_entry( + &entry_b->id_list, struct rdma_id_private, id_list_entry); + int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if; + struct sockaddr *sb = cma_dst_addr(id_priv); + + if (ifindex_a != ifindex_b) + return (ifindex_a > ifindex_b) ? 1 : -1; + + if (sa->sa_family != sb->sa_family) + return sa->sa_family - sb->sa_family; + + if (sa->sa_family == AF_INET) + return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr, + (char *)&((struct sockaddr_in *)sb)->sin_addr, + sizeof(((struct sockaddr_in *)sa)->sin_addr)); + + return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr, + &((struct sockaddr_in6 *)sb)->sin6_addr); +} + +static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv) +{ + struct rb_node **new, *parent = NULL; + struct id_table_entry *this, *node; + unsigned long flags; + int result; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + spin_lock_irqsave(&id_table_lock, flags); + new = &id_table.rb_node; + while (*new) { + this = container_of(*new, struct id_table_entry, rb_node); + result = compare_netdev_and_ip( + node_id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(node_id_priv), this); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + list_add_tail(&node_id_priv->id_list_entry, + &this->id_list); + kfree(node); + goto unlock; + } + } + + INIT_LIST_HEAD(&node->id_list); + list_add_tail(&node_id_priv->id_list_entry, &node->id_list); + + rb_link_node(&node->rb_node, parent, new); + rb_insert_color(&node->rb_node, &id_table); + +unlock: + spin_unlock_irqrestore(&id_table_lock, flags); + return 0; +} + +static struct id_table_entry * +node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa) +{ + struct rb_node *node = root->rb_node; + struct id_table_entry *data; + int result; + + while (node) { + data = container_of(node, struct id_table_entry, rb_node); + result = compare_netdev_and_ip(ifindex, sa, data); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + + return NULL; +} + +static void cma_remove_id_from_tree(struct rdma_id_private *id_priv) +{ + struct id_table_entry *data; + unsigned long flags; + + spin_lock_irqsave(&id_table_lock, flags); + if (list_empty(&id_priv->id_list_entry)) + goto out; + + data = node_from_ndev_ip(&id_table, + id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(id_priv)); + if (!data) + goto out; + + list_del_init(&id_priv->id_list_entry); + if (list_empty(&data->id_list)) { + rb_erase(&data->rb_node, &id_table); + kfree(data); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); +} + static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -481,16 +611,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.src_addr; -} - -static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; -} - static inline unsigned short cma_family(struct rdma_id_private *id_priv) { return id_priv->id.route.addr.src_addr.ss_family; @@ -861,6 +981,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->device_item); + INIT_LIST_HEAD(&id_priv->id_list_entry); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1883,6 +2004,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_cancel_operation(id_priv, state); rdma_restrack_del(&id_priv->res); + cma_remove_id_from_tree(id_priv); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) @@ -3172,8 +3294,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) cma_id_get(id_priv); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); - else if (rdma_protocol_roce(id->device, id->port_num)) + else if (rdma_protocol_roce(id->device, id->port_num)) { ret = cma_resolve_iboe_route(id_priv); + if (!ret) + cma_add_id_to_tree(id_priv); + } else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv); else diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 757a0ef79872..b7354c94cf1b 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -64,6 +64,7 @@ struct rdma_id_private { struct list_head listen_item; struct list_head listen_list; }; + struct list_head id_list_entry; struct cma_device *cma_dev; struct list_head mc_list; From patchwork Tue Jun 7 11:32:44 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12871798 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 14AE0C43334 for ; Tue, 7 Jun 2022 11:33:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S243001AbiFGLdP (ORCPT ); Tue, 7 Jun 2022 07:33:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50682 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S242999AbiFGLdK (ORCPT ); Tue, 7 Jun 2022 07:33:10 -0400 Received: from ams.source.kernel.org (ams.source.kernel.org [145.40.68.75]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6D0FC24BC2 for ; Tue, 7 Jun 2022 04:33:00 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id C6DB7B81F6D for ; Tue, 7 Jun 2022 11:32:58 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 22842C3411F; Tue, 7 Jun 2022 11:32:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1654601577; bh=DvoWcYNtfMsvpmZkZ4LyPILpRK0fSIma1peNoLoUxzc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=sFh5RwCNNyOqGRB53dcFsv3SitweqzSB6pQusq753983lRTvGPKQgxgcjY4o+T4Z3 s0PtH4S22oICr858EZQTlI8BGF/6Pwn5i19b6xku0K6EA/+zX4rr8gAPD5OSVUvNFb 2SAK79LVvRWL6znGxXX8rVJ+JSfOrDAhPXiHAz/fuCVfgD+mXORm0Kydak1VfBljdY s6ujWy5xpzePcPg5UFf1r9eneylZnqcqT+ssSu39qvdh9sTH0r/tdBrjk0nirIp86Q 2sVdFtjLWYywBAvreSqNfpz1bcaljmSZVL/elG1fnxOZRayzb0LIH+IVe9y0Vv7u3E g1Mn7TP9sakJw== From: Leon Romanovsky To: Jason Gunthorpe Cc: Patrisious Haddad , linux-rdma@vger.kernel.org, Mark Zhang Subject: [PATCH rdma-next v2 2/2] RDMA/core: Add a netevent notifier to cma Date: Tue, 7 Jun 2022 14:32:44 +0300 Message-Id: X-Mailer: git-send-email 2.36.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Patrisious Haddad Add a netevent callback for cma, mainly to catch NETEVENT_NEIGH_UPDATE. Previously, when a system with failover MAC mechanism change its MAC address during a CM connection attempt, the RDMA-CM would take a lot of time till it disconnects and timesout due to the incorrect MAC address. Now when we get a NETEVENT_NEIGH_UPDATE we check if it is due to a failover MAC change and if so, we instantly destroy the CM and notify the user in order to spare the unnecessary waiting for the timeout. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 81 +++++++++++++++++++++++++++++++++++ include/rdma/rdma_cm.h | 1 + 2 files changed, 82 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0a17b1bb9547..46d06678dfbe 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -5047,10 +5048,87 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, return ret; } +static void cma_netevent_work_handler(struct work_struct *_work) +{ + struct rdma_id_private *id_priv = + container_of(_work, struct rdma_id_private, id.net_work); + struct rdma_cm_event event = {}; + + mutex_lock(&id_priv->handler_mutex); + + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + + if (cma_cm_event_handler(id_priv, &event)) { + __acquire(&id_priv->handler_mutex); + id_priv->cm_id.ib = NULL; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + return; + } + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + cma_id_put(id_priv); +} + +static int cma_netevent_callback(struct notifier_block *self, + unsigned long event, void *ctx) +{ + struct id_table_entry *ips_node = NULL; + struct rdma_id_private *current_id; + struct neighbour *neigh = ctx; + unsigned long flags; + + if (event != NETEVENT_NEIGH_UPDATE) + return NOTIFY_DONE; + + spin_lock_irqsave(&id_table_lock, flags); + if (neigh->tbl->family == AF_INET6) { + struct sockaddr_in6 neigh_sock_6; + + neigh_sock_6.sin6_family = AF_INET6; + neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key; + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_6); + } else if (neigh->tbl->family == AF_INET) { + struct sockaddr_in neigh_sock_4; + + neigh_sock_4.sin_family = AF_INET; + neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key); + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_4); + } else + goto out; + + if (!ips_node) + goto out; + + list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) { + if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, + neigh->ha, ETH_ALEN)) + continue; + INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); + cma_id_get(current_id); + queue_work(cma_wq, ¤t_id->id.net_work); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); + return NOTIFY_DONE; +} + static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; +static struct notifier_block cma_netevent_cb = { + .notifier_call = cma_netevent_callback +}; + static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -5273,6 +5351,7 @@ static int __init cma_init(void) ib_sa_register_client(&sa_client); register_netdevice_notifier(&cma_nb); + register_netevent_notifier(&cma_netevent_cb); ret = ib_register_client(&cma_client); if (ret) @@ -5287,6 +5366,7 @@ static int __init cma_init(void) err_ib: ib_unregister_client(&cma_client); err: + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); @@ -5299,6 +5379,7 @@ static void __exit cma_cleanup(void) { cma_configfs_exit(); ib_unregister_client(&cma_client); + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d989f030fae0..5b18e2e36ee6 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -108,6 +108,7 @@ struct rdma_cm_id { enum rdma_ucm_port_space ps; enum ib_qp_type qp_type; u32 port_num; + struct work_struct net_work; }; struct rdma_cm_id *