@@ -1073,7 +1073,6 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
* The RFC patch hasn't resolved this, just simply always
* chooses loopback device first, and fallback if loopback
* communication is impossible.
- *
*/
/* check if there is an ism or loopback device available */
if (!(ini->smcd_version & SMC_V1) ||
@@ -1397,6 +1396,17 @@ static int smc_connect_ism(struct smc_sock *smc,
}
smc_conn_save_peer_info(smc, aclc);
+
+ /* special for smcd loopback
+ * conns above smcd loopback dev only create their rmbs.
+ * their sndbufs are 'maps' of peer rmbs.
+ */
+ if (smc->conn.lgr->smcd->is_loopback) {
+ rc = smcd_buf_attach(&smc->conn);
+ if (rc)
+ goto connect_abort;
+ smc->sk.sk_sndbuf = 2 * (smc->conn.sndbuf_desc->len);
+ }
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -2464,6 +2474,17 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, cclc);
+
+ /* special for smcd loopback
+ * conns above smcd loopback dev only create their rmbs.
+ * their sndbufs are 'maps' of peer rmbs.
+ */
+ if (ini->is_smcd && new_smc->conn.lgr->smcd->is_loopback) {
+ rc = smcd_buf_attach(&new_smc->conn);
+ if (rc)
+ goto out_decl;
+ new_smc->sk.sk_sndbuf = 2 * (new_smc->conn.sndbuf_desc->len);
+ }
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
@@ -1171,6 +1171,10 @@ void smc_conn_free(struct smc_connection *conn)
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
+
+ /* detach sndbuf from peer rmb */
+ if (lgr->smcd->is_loopback)
+ smcd_buf_detach(conn);
} else {
smc_cdc_wait_pend_tx_wr(conn);
if (current_work() != &conn->abort_work)
@@ -2423,6 +2427,14 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
{
int rc;
+ if (is_smcd && smc->conn.lgr->smcd->is_loopback) {
+ /* Conns above smcd loopback device only create and maintain
+ * their RMBs. The sndbufs will be attached to peer RMBs once
+ * getting the tokens.
+ */
+ return __smc_buf_create(smc, is_smcd, true);
+ }
+
/* create send buffer */
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
@@ -2439,6 +2451,56 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
}
+/* for smcd loopback conns, attach local sndbuf to peer RMB.
+ * The data copy to sndbuf is equal to data copy to peer RMB.
+ */
+int smcd_buf_attach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+ struct smc_buf_desc *buf_desc;
+ int rc;
+
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return -ENOMEM;
+ rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+ if (rc) {
+ rc = SMC_CLC_DECL_ERR_RTOK;
+ goto free;
+ }
+
+ /* attach local sndbuf to peer RMB.
+ * refer to local sndbuf is equal to refer to peer RMB.
+ */
+ /* align with peer rmb */
+ buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+ buf_desc->len -= sizeof(struct smcd_cdc_msg);
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_desc->used = 1;
+ //smc->sk.sk_sndbuf = 2 * (smc->conn->sndbuf_desc->len);
+ atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+ return 0;
+
+free:
+ kfree(buf_desc);
+ return rc;
+}
+
+void smcd_buf_detach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+
+ if (!conn->sndbuf_desc)
+ return;
+
+ smc_ism_detach_dmb(smcd, peer_token);
+
+ kfree(conn->sndbuf_desc);
+ conn->sndbuf_desc = NULL;
+}
+
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
int i;
@@ -518,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_connection *conn);
+void smcd_buf_detach(struct smc_connection *conn);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
This patch aims to improve SMC-D loopback performance by avoiding data copy from local sndbuf to peer RMB. The main idea is to let local sndbuf and peer RMB share the same physical memory. +----------+ +----------+ | socket A | | socket B | +----------+ +----------+ | ^ | +---------+ | regard as | | ----------| local sndbuf | B's | regard as | | RMB | local RMB |-------> | | +---------+ For connections using smcd loopback device: 1. Only create and maintain local RMB. a. Create or reuse RMB when create connection; b. Free RMB when lgr free; 2. Attach local sndbuf to peer RMB. a. sndbuf_desc describes the same memory region as peer rmb_desc. b. sndbuf_desc is exclusive to specific connection and won't be added to lgr buffer pool for reuse. c. sndbuf is attached to peer RMB when receive remote token after CLC accept/confirm message. d. sndbuf is detached from peer RMB when connection is freed. Therefore, the data copied from the userspace to local sndbuf directly reaches the peer RMB. Signed-off-by: Wen Gu <guwen@linux.alibaba.com> --- net/smc/af_smc.c | 23 +++++++++++++++++++- net/smc/smc_core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_core.h | 2 ++ 3 files changed, 86 insertions(+), 1 deletion(-)