From patchwork Tue Dec 20 03:21:41 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Gu X-Patchwork-Id: 13077485 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 86E22C4332F for ; Tue, 20 Dec 2022 03:22:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233035AbiLTDWW (ORCPT ); Mon, 19 Dec 2022 22:22:22 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52892 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233036AbiLTDWB (ORCPT ); Mon, 19 Dec 2022 22:22:01 -0500 Received: from out30-57.freemail.mail.aliyun.com (out30-57.freemail.mail.aliyun.com [115.124.30.57]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ADE3E13F15; Mon, 19 Dec 2022 19:21:58 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R531e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046050;MF=guwen@linux.alibaba.com;NM=1;PH=DS;RN=10;SR=0;TI=SMTPD_---0VXjiYcG_1671506513; Received: from localhost(mailfrom:guwen@linux.alibaba.com fp:SMTPD_---0VXjiYcG_1671506513) by smtp.aliyun-inc.com; Tue, 20 Dec 2022 11:21:55 +0800 From: Wen Gu To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next v2 1/5] net/smc: introduce SMC-D loopback device Date: Tue, 20 Dec 2022 11:21:41 +0800 Message-Id: <1671506505-104676-2-git-send-email-guwen@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> References: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC This patch introduces a kind of loopback device for SMC-D, thus enabling the SMC communication between two local sockets in one kernel. The loopback device supports basic capabilities defined by SMC-D, including registering DMB, unregistering DMB and moving data. Considering that there is no ism device on other servers expect IBM z13, the loopback device can be used as a dummy device to test SMC-D logic for the broad community. Signed-off-by: Wen Gu --- include/net/smc.h | 1 + net/smc/Makefile | 2 +- net/smc/af_smc.c | 12 ++- net/smc/smc_cdc.c | 6 ++ net/smc/smc_cdc.h | 1 + net/smc/smc_loopback.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_loopback.h | 59 +++++++++++ 7 files changed, 361 insertions(+), 2 deletions(-) create mode 100644 net/smc/smc_loopback.c create mode 100644 net/smc/smc_loopback.h diff --git a/include/net/smc.h b/include/net/smc.h index c926d33..7699f97 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -93,6 +93,7 @@ struct smcd_dev { atomic_t lgr_cnt; wait_queue_head_t lgrs_deleted; u8 going_away : 1; + u8 is_loopback : 1; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/net/smc/Makefile b/net/smc/Makefile index 875efcd..a8c3711 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_loopback.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e12d4fa..9546c02 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -52,6 +52,7 @@ #include "smc_stats.h" #include "smc_tracepoint.h" #include "smc_sysctl.h" +#include "smc_loopback.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3451,15 +3452,23 @@ static int __init smc_init(void) goto out_sock; } + rc = smc_loopback_init(); + if (rc) { + pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc); + goto out_ib; + } + rc = tcp_register_ulp(&smc_ulp_ops); if (rc) { pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); - goto out_ib; + goto out_lo; } static_branch_enable(&tcp_have_smc); return 0; +out_lo: + smc_loopback_exit(); out_ib: smc_ib_unregister_client(); out_sock: @@ -3494,6 +3503,7 @@ static void __exit smc_exit(void) tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); + smc_loopback_exit(); smc_ib_unregister_client(); destroy_workqueue(smc_close_wq); destroy_workqueue(smc_tcp_ls_wq); diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 53f63bf..61f5ff7 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -408,6 +408,12 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) static void smcd_cdc_rx_tsklet(struct tasklet_struct *t) { struct smc_connection *conn = from_tasklet(conn, t, rx_tsklet); + + smcd_cdc_rx_handler(conn); +} + +void smcd_cdc_rx_handler(struct smc_connection *conn) +{ struct smcd_cdc_msg *data_cdc; struct smcd_cdc_msg cdc; struct smc_sock *smc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 696cc11..11559d4 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -301,5 +301,6 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, struct smc_wr_buf *wr_buf); int smc_cdc_init(void) __init; void smcd_cdc_rx_init(struct smc_connection *conn); +void smcd_cdc_rx_handler(struct smc_connection *conn); #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c new file mode 100644 index 0000000..973382a --- /dev/null +++ b/net/smc/smc_loopback.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shared Memory Communications Direct over loopback device. + * + * Provide a SMC-D loopback dummy device. + * + * Copyright (c) 2022, Alibaba Inc. + * + * Author: Wen Gu + * Tony Lu + * + */ + +#include +#include +#include + +#include "smc_cdc.h" +#include "smc_loopback.h" + +#define DRV_NAME "smc_lodev" + +struct lo_dev *lo_dev; + +static struct lo_systemeid LO_SYSTEM_EID = { + .seid_string = "SMC-SYSZ-LOSEID000000000", + .serial_number = "0000", + .type = "0000", +}; + +static int lo_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid, + u32 vid) +{ + struct lo_dev *ldev = smcd->priv; + + /* return local gid */ + if (!ldev || rgid != ldev->lgid) + return -ENETUNREACH; + return 0; +} + +static int lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct lo_dev *ldev = smcd->priv; + struct lo_dmb_node *dmb_node; + int sba_idx, rc; + + /* check space for new dmb */ + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, LODEV_MAX_DMBS) { + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) + break; + } + if (sba_idx == LODEV_MAX_DMBS) + return -ENOSPC; + + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); + if (!dmb_node) { + rc = -ENOMEM; + goto err_bit; + } + + dmb_node->sba_idx = sba_idx; + dmb_node->cpu_addr = kzalloc(dmb->dmb_len, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!dmb_node->cpu_addr) { + rc = -ENOMEM; + goto err_node; + } + dmb_node->len = dmb->dmb_len; + + /* TODO: token is random but not exclusive ! + * suppose to find token in dmb hask table, if has this token + * already, then generate another one. + */ + /* add new dmb into hash table */ + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); + write_lock(&ldev->dmb_ht_lock); + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); + write_unlock(&ldev->dmb_ht_lock); + + dmb->sba_idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + + return 0; + +err_node: + kfree(dmb_node); +err_bit: + clear_bit(sba_idx, ldev->sba_idx_mask); + return rc; +} + +static int lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct lo_dmb_node *dmb_node = NULL, *tmp_node; + struct lo_dev *ldev = smcd->priv; + + /* remove dmb from hash table */ + write_lock(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + write_unlock(&ldev->dmb_ht_lock); + return -EINVAL; + } + hash_del(&dmb_node->list); + write_unlock(&ldev->dmb_ht_lock); + + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); + kfree(dmb_node->cpu_addr); + kfree(dmb_node); + + return 0; +} + +static int lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id) +{ + return 0; +} + +static int lo_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id) +{ + return 0; +} + +static int lo_set_vlan_required(struct smcd_dev *smcd) +{ + return 0; +} + +static int lo_reset_vlan_required(struct smcd_dev *smcd) +{ + return 0; +} + +static int lo_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq, + u32 event_code, u64 info) +{ + return 0; +} + +static int lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, + bool sf, unsigned int offset, void *data, + unsigned int size) +{ + struct lo_dmb_node *rmb_node = NULL, *tmp_node; + struct lo_dev *ldev = smcd->priv; + + read_lock(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) { + if (tmp_node->token == dmb_tok) { + rmb_node = tmp_node; + break; + } + } + if (!rmb_node) { + read_unlock(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock(&ldev->dmb_ht_lock); + + memcpy((char *)rmb_node->cpu_addr + offset, data, size); + + if (sf) { + struct smc_connection *conn = + smcd->conn[rmb_node->sba_idx]; + + if (conn && !conn->killed) + smcd_cdc_rx_handler(conn); + } + return 0; +} + +static u8 *lo_get_system_eid(void) +{ + return &LO_SYSTEM_EID.seid_string[0]; +} + +static u16 lo_get_chid(struct smcd_dev *smcd) +{ + return 0; +} + +static const struct smcd_ops lo_ops = { + .query_remote_gid = lo_query_rgid, + .register_dmb = lo_register_dmb, + .unregister_dmb = lo_unregister_dmb, + .add_vlan_id = lo_add_vlan_id, + .del_vlan_id = lo_del_vlan_id, + .set_vlan_required = lo_set_vlan_required, + .reset_vlan_required = lo_reset_vlan_required, + .signal_event = lo_signal_ieq, + .move_data = lo_move_data, + .get_system_eid = lo_get_system_eid, + .get_chid = lo_get_chid, +}; + +static int lo_dev_init(struct lo_dev *ldev) +{ + struct smcd_dev *smcd = ldev->smcd; + + /* smcd related */ + smcd->is_loopback = 1; + smcd->priv = ldev; + get_random_bytes(&smcd->local_gid, sizeof(smcd->local_gid)); + + /* ldev related */ + /* TODO: lgid is random but not exclusive ! + */ + ldev->lgid = smcd->local_gid; + rwlock_init(&ldev->dmb_ht_lock); + hash_init(ldev->dmb_ht); + + return smcd_register_dev(smcd); +} + +static int lo_dev_probe(void) +{ + struct lo_dev *ldev; + int ret; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return -ENOMEM; + + ldev->smcd = smcd_alloc_dev(NULL, "smcd-loopback-dev", + &lo_ops, LODEV_MAX_DMBS); + if (!ldev->smcd) { + ret = -ENOMEM; + goto err_ldev; + } + + ret = lo_dev_init(ldev); + if (ret) + goto err_smcd; + + lo_dev = ldev; + return 0; + +err_smcd: + smcd_free_dev(ldev->smcd); +err_ldev: + kfree(ldev); + return ret; +} + +static void lo_dev_exit(struct lo_dev *ldev) +{ + smcd_unregister_dev(ldev->smcd); +} + +static void lo_dev_remove(void) +{ + if (!lo_dev) + return; + + lo_dev_exit(lo_dev); + smcd_free_dev(lo_dev->smcd); + kfree(lo_dev); +} + +int smc_loopback_init(void) +{ + /* TODO: now lo_dev is a global device shared by + * the whole kernel, and can't be referred to by + * smc-tools command 'smcd dev'. Need to be improved. + */ + return lo_dev_probe(); +} + +void smc_loopback_exit(void) +{ + lo_dev_remove(); +} diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h new file mode 100644 index 0000000..d7f7815 --- /dev/null +++ b/net/smc/smc_loopback.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications Direct over loopback device. + * + * Provide a SMC-D loopback dummy device. + * + * Copyright (c) 2022, Alibaba Inc. + * + * Author: Wen Gu + * Tony Lu + * + */ + +#ifndef _SMC_LOOPBACK_H +#define _SMC_LOOPBACK_H + +#include +#include +#include +#include +#include + +#include "smc_core.h" + +#define LODEV_MAX_DMBS 5000 +#define LODEV_MAX_DMBS_BUCKETS 16 + +struct lo_dmb_node { + struct hlist_node list; + u64 token; + u32 len; + u32 sba_idx; + void *cpu_addr; + dma_addr_t dma_addr; +}; + +struct lo_dev { + struct smcd_dev *smcd; + /* priv data */ + u64 lgid; + DECLARE_BITMAP(sba_idx_mask, LODEV_MAX_DMBS); + rwlock_t dmb_ht_lock; + DECLARE_HASHTABLE(dmb_ht, LODEV_MAX_DMBS_BUCKETS); +}; + +struct lo_systemeid { + u8 seid_string[24]; + u8 serial_number[4]; + u8 type[4]; +}; + +/* smcd loopback dev*/ +extern struct lo_dev *lo_dev; + +int smc_loopback_init(void); +void smc_loopback_exit(void); + +#endif /* _SMC_LOOPBACK_H */ + From patchwork Tue Dec 20 03:21:42 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Gu X-Patchwork-Id: 13077484 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C0E91C4708D for ; Tue, 20 Dec 2022 03:22:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233015AbiLTDWS (ORCPT ); Mon, 19 Dec 2022 22:22:18 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52868 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233053AbiLTDWC (ORCPT ); Mon, 19 Dec 2022 22:22:02 -0500 Received: from out30-8.freemail.mail.aliyun.com (out30-8.freemail.mail.aliyun.com [115.124.30.8]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0D560A1A1; Mon, 19 Dec 2022 19:22:00 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R101e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045170;MF=guwen@linux.alibaba.com;NM=1;PH=DS;RN=10;SR=0;TI=SMTPD_---0VXjKPrz_1671506516; Received: from localhost(mailfrom:guwen@linux.alibaba.com fp:SMTPD_---0VXjKPrz_1671506516) by smtp.aliyun-inc.com; Tue, 20 Dec 2022 11:21:58 +0800 From: Wen Gu To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next v2 2/5] net/smc: choose loopback device in SMC-D communication Date: Tue, 20 Dec 2022 11:21:42 +0800 Message-Id: <1671506505-104676-3-git-send-email-guwen@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> References: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC This patch allows SMC-D to use loopback device. But noted that the implementation here is quiet simple and informal. Loopback device will always be firstly chosen, and fallback happens if loopback communication is impossible. It needs to be discussed how client indicates to peer that multiple SMC-D devices are available and how server picks a suitable one. Signed-off-by: Wen Gu --- net/smc/af_smc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++------ net/smc/smc_clc.c | 4 +++- net/smc/smc_ism.c | 3 ++- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9546c02..b9884c8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -979,6 +979,28 @@ static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) return 0; } +/* check if there is a lo device available for this connection. */ +static int smc_find_lo_device(struct smc_sock *smc, struct smc_init_info *ini) +{ + struct smcd_dev *sdev; + + mutex_lock(&smcd_dev_list.mutex); + list_for_each_entry(sdev, &smcd_dev_list.list, list) { + if (sdev->is_loopback && !sdev->going_away && + (!ini->ism_peer_gid[0] || + !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, + sdev))) { + ini->ism_dev[0] = sdev; + break; + } + } + mutex_unlock(&smcd_dev_list.mutex); + if (!ini->ism_dev[0]) + return SMC_CLC_DECL_NOSMCDDEV; + ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); + return 0; +} + /* is chid unique for the ism devices that are already determined? */ static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini, int cnt) @@ -1044,10 +1066,20 @@ static int smc_find_proposal_devices(struct smc_sock *smc, { int rc = 0; - /* check if there is an ism device available */ + /* TODO: + * How to indicate to peer if ism device and loopback + * device are both available ? + * + * The RFC patch hasn't resolved this, just simply always + * chooses loopback device first, and fallback if loopback + * communication is impossible. + * + */ + /* check if there is an ism or loopback device available */ if (!(ini->smcd_version & SMC_V1) || - smc_find_ism_device(smc, ini) || - smc_connect_ism_vlan_setup(smc, ini)) + (smc_find_lo_device(smc, ini) && + (smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)))) ini->smcd_version &= ~SMC_V1; /* else ISM V1 is supported for this connection */ @@ -2135,9 +2167,20 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, goto not_found; ini->is_smcd = true; /* prepare ISM check */ ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); - rc = smc_find_ism_device(new_smc, ini); - if (rc) - goto not_found; + + /* TODO: + * How to know that peer has both loopback and ism device ? + * + * The RFC patch hasn't resolved this, simply tries loopback + * device first, then ism device. + */ + /* find available loopback or ism device */ + if (smc_find_lo_device(new_smc, ini)) { + rc = smc_find_ism_device(new_smc, ini); + if (rc) + goto not_found; + } + ini->ism_selected = 0; rc = smc_listen_ism_init(new_smc, ini); if (!rc) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index dfb9797..3887692 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -486,7 +486,9 @@ static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, return -ENODEV; in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (!inet_ifa_match(ipv4, ifa)) + /* add loopback support */ + if (inet_addr_type(dev_net(dst->dev), ipv4) != RTN_LOCAL && + !inet_ifa_match(ipv4, ifa)) continue; prop->prefix_len = inet_mask_len(ifa->ifa_mask); prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 911fe08..1d10435 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -227,7 +227,8 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, use_cnt > 0)) goto errattr; memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); - smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev); + if (!smcd->is_loopback) + smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev); if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid)) goto errattr; if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid)) From patchwork Tue Dec 20 03:21:43 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Gu X-Patchwork-Id: 13077487 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0B36EC4167B for ; Tue, 20 Dec 2022 03:22:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232912AbiLTDW1 (ORCPT ); Mon, 19 Dec 2022 22:22:27 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52928 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233079AbiLTDWF (ORCPT ); Mon, 19 Dec 2022 22:22:05 -0500 Received: from out30-8.freemail.mail.aliyun.com (out30-8.freemail.mail.aliyun.com [115.124.30.8]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1548813CFB; Mon, 19 Dec 2022 19:22:02 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R121e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045192;MF=guwen@linux.alibaba.com;NM=1;PH=DS;RN=10;SR=0;TI=SMTPD_---0VXjTtLD_1671506518; Received: from localhost(mailfrom:guwen@linux.alibaba.com fp:SMTPD_---0VXjTtLD_1671506518) by smtp.aliyun-inc.com; Tue, 20 Dec 2022 11:22:00 +0800 From: Wen Gu To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next v2 3/5] net/smc: add dmb attach and detach interface Date: Tue, 20 Dec 2022 11:21:43 +0800 Message-Id: <1671506505-104676-4-git-send-email-guwen@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> References: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC This patch extends smcd_ops, adding two more semantic for SMC-D device: - attach_dmb: Attach an already registered dmb to a specific buf_desc, so that we can refer to the dmb through this buf_desc. - detach_dmb: Reverse operation of attach_dmb. detach the dmb from the buf_desc. This interface extension is to prepare for the reduction of data moving from sndbuf to RMB in SMC-D loopback device. Signed-off-by: Wen Gu --- include/net/smc.h | 2 ++ net/smc/smc_ism.c | 36 ++++++++++++++++++++++++++ net/smc/smc_ism.h | 2 ++ net/smc/smc_loopback.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_loopback.h | 4 +++ 5 files changed, 113 insertions(+) diff --git a/include/net/smc.h b/include/net/smc.h index 7699f97..60a96f7 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -63,6 +63,8 @@ struct smcd_ops { u32 vid); int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*attach_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*detach_dmb)(struct smcd_dev *dev, u64 token); int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); int (*set_vlan_required)(struct smcd_dev *dev); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 1d10435..2049388 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -202,6 +202,42 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, return rc; } +int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token, + struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + int rc = 0; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_tok = token; + + /* only support loopback device now */ + if (!dev->is_loopback) + return -EINVAL; + if (!dev->ops->attach_dmb) + return -EINVAL; + + rc = dev->ops->attach_dmb(dev, &dmb); + if (!rc) { + dmb_desc->sba_idx = dmb.sba_idx; + dmb_desc->token = dmb.dmb_tok; + dmb_desc->cpu_addr = dmb.cpu_addr; + dmb_desc->dma_addr = dmb.dma_addr; + dmb_desc->len = dmb.dmb_len; + } + return rc; +} + +int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token) +{ + if (!dev->is_loopback) + return -EINVAL; + if (!dev->ops->detach_dmb) + return -EINVAL; + + return dev->ops->detach_dmb(dev, token); +} + static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index d6b2db6..9022979 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -38,6 +38,8 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, struct smc_buf_desc *dmb_desc); int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); +int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token, struct smc_buf_desc *dmb_desc); +int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token); int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c index 973382a..bc3ff82 100644 --- a/net/smc/smc_loopback.c +++ b/net/smc/smc_loopback.c @@ -68,6 +68,7 @@ static int lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) goto err_node; } dmb_node->len = dmb->dmb_len; + refcount_set(&dmb_node->refcnt, 1); /* TODO: token is random but not exclusive ! * suppose to find token in dmb hask table, if has this token @@ -78,6 +79,7 @@ static int lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) write_lock(&ldev->dmb_ht_lock); hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); write_unlock(&ldev->dmb_ht_lock); + atomic_inc(&ldev->dmb_cnt); dmb->sba_idx = dmb_node->sba_idx; dmb->dmb_tok = dmb_node->token; @@ -115,9 +117,69 @@ static int lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) write_unlock(&ldev->dmb_ht_lock); clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); + + /* wait for dmb refcnt equal to 0 */ + if (!refcount_dec_and_test(&dmb_node->refcnt)) + wait_event(ldev->dmbs_release, !refcount_read(&dmb_node->refcnt)); kfree(dmb_node->cpu_addr); kfree(dmb_node); + if (atomic_dec_and_test(&ldev->dmb_cnt)) + wake_up(&ldev->ldev_release); + + return 0; +} + +static int lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct lo_dmb_node *dmb_node = NULL, *tmp_node; + struct lo_dev *ldev = smcd->priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock(&ldev->dmb_ht_lock); + refcount_inc(&dmb_node->refcnt); + + /* provide dmb information */ + dmb->sba_idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + return 0; +} + +static int lo_detach_dmb(struct smcd_dev *smcd, u64 token) +{ + struct lo_dmb_node *dmb_node = NULL, *tmp_node; + struct lo_dev *ldev = smcd->priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) { + if (tmp_node->token == token) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock(&ldev->dmb_ht_lock); + + if (refcount_dec_and_test(&dmb_node->refcnt)) + wake_up_all(&ldev->dmbs_release); return 0; } @@ -193,6 +255,8 @@ static u16 lo_get_chid(struct smcd_dev *smcd) .query_remote_gid = lo_query_rgid, .register_dmb = lo_register_dmb, .unregister_dmb = lo_unregister_dmb, + .attach_dmb = lo_attach_dmb, + .detach_dmb = lo_detach_dmb, .add_vlan_id = lo_add_vlan_id, .del_vlan_id = lo_del_vlan_id, .set_vlan_required = lo_set_vlan_required, @@ -218,6 +282,9 @@ static int lo_dev_init(struct lo_dev *ldev) ldev->lgid = smcd->local_gid; rwlock_init(&ldev->dmb_ht_lock); hash_init(ldev->dmb_ht); + atomic_set(&ldev->dmb_cnt, 0); + init_waitqueue_head(&ldev->dmbs_release); + init_waitqueue_head(&ldev->ldev_release); return smcd_register_dev(smcd); } @@ -255,6 +322,8 @@ static int lo_dev_probe(void) static void lo_dev_exit(struct lo_dev *ldev) { smcd_unregister_dev(ldev->smcd); + if (atomic_read(&ldev->dmb_cnt)) + wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt)); } static void lo_dev_remove(void) diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h index d7f7815..f4122be 100644 --- a/net/smc/smc_loopback.h +++ b/net/smc/smc_loopback.h @@ -32,6 +32,7 @@ struct lo_dmb_node { u32 sba_idx; void *cpu_addr; dma_addr_t dma_addr; + refcount_t refcnt; }; struct lo_dev { @@ -41,6 +42,9 @@ struct lo_dev { DECLARE_BITMAP(sba_idx_mask, LODEV_MAX_DMBS); rwlock_t dmb_ht_lock; DECLARE_HASHTABLE(dmb_ht, LODEV_MAX_DMBS_BUCKETS); + atomic_t dmb_cnt; + wait_queue_head_t dmbs_release; + wait_queue_head_t ldev_release; }; struct lo_systemeid { From patchwork Tue Dec 20 03:21:44 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Gu X-Patchwork-Id: 13077486 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7E2A7C4167B for ; Tue, 20 Dec 2022 03:22:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233007AbiLTDWZ (ORCPT ); Mon, 19 Dec 2022 22:22:25 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52950 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233111AbiLTDWI (ORCPT ); Mon, 19 Dec 2022 22:22:08 -0500 Received: from out199-15.us.a.mail.aliyun.com (out199-15.us.a.mail.aliyun.com [47.90.199.15]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2CF9AA1A1; Mon, 19 Dec 2022 19:22:05 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R801e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045176;MF=guwen@linux.alibaba.com;NM=1;PH=DS;RN=10;SR=0;TI=SMTPD_---0VXjWttj_1671506520; Received: from localhost(mailfrom:guwen@linux.alibaba.com fp:SMTPD_---0VXjWttj_1671506520) by smtp.aliyun-inc.com; Tue, 20 Dec 2022 11:22:02 +0800 From: Wen Gu To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next v2 4/5] net/smc: avoid data copy from sndbuf to peer RMB in SMC-D loopback Date: Tue, 20 Dec 2022 11:21:44 +0800 Message-Id: <1671506505-104676-5-git-send-email-guwen@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> References: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC This patch aims to improve SMC-D loopback performance by avoiding data copy from local sndbuf to peer RMB. The main idea is to let local sndbuf and peer RMB share the same physical memory. +----------+ +----------+ | socket A | | socket B | +----------+ +----------+ | ^ | +---------+ | regard as | | ----------| local sndbuf | B's | regard as | | RMB | local RMB |-------> | | +---------+ For connections using smcd loopback device: 1. Only create and maintain local RMB. a. Create or reuse RMB when create connection; b. Free RMB when lgr free; 2. Attach local sndbuf to peer RMB. a. sndbuf_desc describes the same memory region as peer rmb_desc. b. sndbuf_desc is exclusive to specific connection and won't be added to lgr buffer pool for reuse. c. sndbuf is attached to peer RMB when receive remote token after CLC accept/confirm message. d. sndbuf is detached from peer RMB when connection is freed. Therefore, the data copied from the userspace to local sndbuf directly reaches the peer RMB. Signed-off-by: Wen Gu --- net/smc/af_smc.c | 23 +++++++++++++++++++- net/smc/smc_core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_core.h | 2 ++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b9884c8..c7de566 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1073,7 +1073,6 @@ static int smc_find_proposal_devices(struct smc_sock *smc, * The RFC patch hasn't resolved this, just simply always * chooses loopback device first, and fallback if loopback * communication is impossible. - * */ /* check if there is an ism or loopback device available */ if (!(ini->smcd_version & SMC_V1) || @@ -1397,6 +1396,17 @@ static int smc_connect_ism(struct smc_sock *smc, } smc_conn_save_peer_info(smc, aclc); + + /* special for smcd loopback + * conns above smcd loopback dev only create their rmbs. + * their sndbufs are 'maps' of peer rmbs. + */ + if (smc->conn.lgr->smcd->is_loopback) { + rc = smcd_buf_attach(&smc->conn); + if (rc) + goto connect_abort; + smc->sk.sk_sndbuf = 2 * (smc->conn.sndbuf_desc->len); + } smc_close_init(smc); smc_rx_init(smc); smc_tx_init(smc); @@ -2464,6 +2474,17 @@ static void smc_listen_work(struct work_struct *work) mutex_unlock(&smc_server_lgr_pending); } smc_conn_save_peer_info(new_smc, cclc); + + /* special for smcd loopback + * conns above smcd loopback dev only create their rmbs. + * their sndbufs are 'maps' of peer rmbs. + */ + if (ini->is_smcd && new_smc->conn.lgr->smcd->is_loopback) { + rc = smcd_buf_attach(&new_smc->conn); + if (rc) + goto out_decl; + new_smc->sk.sk_sndbuf = 2 * (new_smc->conn.sndbuf_desc->len); + } smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); goto out_free; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c305d8d..bf40ad3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1171,6 +1171,10 @@ void smc_conn_free(struct smc_connection *conn) if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); + + /* detach sndbuf from peer rmb */ + if (lgr->smcd->is_loopback) + smcd_buf_detach(conn); } else { smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) @@ -2423,6 +2427,14 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) { int rc; + if (is_smcd && smc->conn.lgr->smcd->is_loopback) { + /* Conns above smcd loopback device only create and maintain + * their RMBs. The sndbufs will be attached to peer RMBs once + * getting the tokens. + */ + return __smc_buf_create(smc, is_smcd, true); + } + /* create send buffer */ rc = __smc_buf_create(smc, is_smcd, false); if (rc) @@ -2439,6 +2451,56 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) return rc; } +/* for smcd loopback conns, attach local sndbuf to peer RMB. + * The data copy to sndbuf is equal to data copy to peer RMB. + */ +int smcd_buf_attach(struct smc_connection *conn) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + u64 peer_token = conn->peer_token; + struct smc_buf_desc *buf_desc; + int rc; + + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return -ENOMEM; + rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc); + if (rc) { + rc = SMC_CLC_DECL_ERR_RTOK; + goto free; + } + + /* attach local sndbuf to peer RMB. + * refer to local sndbuf is equal to refer to peer RMB. + */ + /* align with peer rmb */ + buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg); + buf_desc->len -= sizeof(struct smcd_cdc_msg); + conn->sndbuf_desc = buf_desc; + conn->sndbuf_desc->used = 1; + //smc->sk.sk_sndbuf = 2 * (smc->conn->sndbuf_desc->len); + atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len); + return 0; + +free: + kfree(buf_desc); + return rc; +} + +void smcd_buf_detach(struct smc_connection *conn) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + u64 peer_token = conn->peer_token; + + if (!conn->sndbuf_desc) + return; + + smc_ism_detach_dmb(smcd, peer_token); + + kfree(conn->sndbuf_desc); + conn->sndbuf_desc = NULL; +} + static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) { int i; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 285f9bd..b51b020 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -518,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); +int smcd_buf_attach(struct smc_connection *conn); +void smcd_buf_detach(struct smc_connection *conn); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link, struct smc_clc_msg_accept_confirm *clc); From patchwork Tue Dec 20 03:21:45 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Gu X-Patchwork-Id: 13077482 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E22F6C4167B for ; Tue, 20 Dec 2022 03:22:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232858AbiLTDWP (ORCPT ); Mon, 19 Dec 2022 22:22:15 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52952 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233118AbiLTDWI (ORCPT ); Mon, 19 Dec 2022 22:22:08 -0500 Received: from out30-44.freemail.mail.aliyun.com (out30-44.freemail.mail.aliyun.com [115.124.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DBE4810FF4; Mon, 19 Dec 2022 19:22:06 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R141e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046050;MF=guwen@linux.alibaba.com;NM=1;PH=DS;RN=10;SR=0;TI=SMTPD_---0VXjWtup_1671506522; Received: from localhost(mailfrom:guwen@linux.alibaba.com fp:SMTPD_---0VXjWtup_1671506522) by smtp.aliyun-inc.com; Tue, 20 Dec 2022 11:22:04 +0800 From: Wen Gu To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC PATCH net-next v2 5/5] net/smc: logic of cursors update in SMC-D loopback connections Date: Tue, 20 Dec 2022 11:21:45 +0800 Message-Id: <1671506505-104676-6-git-send-email-guwen@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> References: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC Since local sndbuf of SMC-D loopback connection shares the same physical memory region with peer RMB, the logic of cursors update needs to be adapted. The main difference from original implementation is need to ensure that the data copied to local sndbuf won't overwrite the unconsumed data of peer. So, for SMC-D loopback connections: 1. TX a. don't update fin_curs when send out cdc msg. b. fin_curs and sndbuf_space update will be deferred until receiving peer cons_curs update. 2. RX a. same as before. peer sndbuf is as large as local rmb, which guarantees that prod_curs will behind prep_curs. Signed-off-by: Wen Gu --- net/smc/smc_cdc.c | 53 +++++++++++++++++++++++++++++++++++++++----------- net/smc/smc_loopback.c | 7 +++++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 61f5ff7..586472a 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -253,17 +253,26 @@ int smcd_cdc_msg_send(struct smc_connection *conn) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; - /* Calculate transmitted data and increment free send buffer space */ - diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, - &conn->tx_curs_sent); - /* increased by confirmed number of bytes */ - smp_mb__before_atomic(); - atomic_add(diff, &conn->sndbuf_space); - /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ - smp_mb__after_atomic(); - smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); + if (!conn->lgr->smcd->is_loopback) { + /* Note: + * For smcd loopback device: + * + * Don't update the fin_curs and sndbuf_space here. + * Update fin_curs when peer consumes the data in RMB. + */ - smc_tx_sndbuf_nonfull(smc); + /* Calculate transmitted data and increment free send buffer space */ + diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, + &conn->tx_curs_sent); + /* increased by confirmed number of bytes */ + smp_mb__before_atomic(); + atomic_add(diff, &conn->sndbuf_space); + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ + smp_mb__after_atomic(); + smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); + + smc_tx_sndbuf_nonfull(smc); + } return rc; } @@ -321,7 +330,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, { union smc_host_cursor cons_old, prod_old; struct smc_connection *conn = &smc->conn; - int diff_cons, diff_prod; + int diff_cons, diff_prod, diff_tx; smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn); smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn); @@ -337,6 +346,28 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, atomic_add(diff_cons, &conn->peer_rmbe_space); /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ smp_mb__after_atomic(); + + /* For smcd loopback device: + * Update of peer cons_curs indicates that + * 1. peer rmbe space increases. + * 2. local sndbuf space increases. + * + * So local sndbuf fin_curs should be equal to peer RMB cons_curs. + */ + if (conn->lgr->is_smcd && + conn->lgr->smcd->is_loopback) { + /* calculate peer rmb consumed data */ + diff_tx = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, + &conn->local_rx_ctrl.cons); + /* increase local sndbuf space and fin_curs */ + smp_mb__before_atomic(); + atomic_add(diff_tx, &conn->sndbuf_space); + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ + smp_mb__after_atomic(); + smc_curs_copy(&conn->tx_curs_fin, &conn->local_rx_ctrl.cons, conn); + + smc_tx_sndbuf_nonfull(smc); + } } diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old, diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c index bc3ff82..43f0287 100644 --- a/net/smc/smc_loopback.c +++ b/net/smc/smc_loopback.c @@ -216,6 +216,13 @@ static int lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, struct lo_dmb_node *rmb_node = NULL, *tmp_node; struct lo_dev *ldev = smcd->priv; + if (!sf) { + /* no need to move data. + * sndbuf is equal to peer rmb. + */ + return 0; + } + read_lock(&ldev->dmb_ht_lock); hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) { if (tmp_node->token == dmb_tok) {