Message ID | 20241113071405.67421-4-alibuda@linux.alibaba.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Reduce locks scope of-smc_xxx_lgr_pending | expand |
On 2024-11-13 15:14:05, D. Wythe wrote: >It is widely known that SMC introduced a global lock to protect the >creation of the first connection. This lock not only brings performance >issues but also poses a serious security risk. In a multi-tenant >container environment, malicious tenants can construct attacks that keep >the lock occupied for an extended period, thereby affecting the >connections of other tenants. > >Considering that this lock is essentially meant to protect the QP, which >belongs to a device, we can limit the scope of the lock to within the >device rather than having it be global. This way, when a container >exclusively occupies the device, it can avoid being affected by other >malicious tenants. > >Also make on impact on SMC-D since the path of SMC-D is shorter. > >Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >--- > net/smc/af_smc.c | 18 ++++++++++-------- > net/smc/smc_ib.c | 2 ++ > net/smc/smc_ib.h | 2 ++ > 3 files changed, 14 insertions(+), 8 deletions(-) > >diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >index 19480d8affb0..d5b9ea7661db 100644 >--- a/net/smc/af_smc.c >+++ b/net/smc/af_smc.c >@@ -56,11 +56,8 @@ > #include "smc_loopback.h" > #include "smc_inet.h" > >-static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group >- * creation on server >- */ >-static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group >- * creation on client >+static DEFINE_MUTEX(smcd_server_lgr_pending); /* serialize link group >+ * creation on server for SMC-D > */ Why not move the smcd_server_lgr_pending lock to the smcd_device level as well ? > > static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */ >@@ -1251,7 +1248,9 @@ static int smc_connect_rdma(struct smc_sock *smc, > if (reason_code) > return reason_code; > >- smc_lgr_pending_lock(ini, &smc_client_lgr_pending); >+ smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ? >+ &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending : >+ &ini->ib_dev->smc_server_lgr_pending); > reason_code = smc_conn_create(smc, ini); > if (reason_code) { > smc_lgr_pending_unlock(ini); >@@ -1412,7 +1411,7 @@ static int smc_connect_ism(struct smc_sock *smc, > ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); > > /* there is only one lgr role for SMC-D; use server lock */ >- smc_lgr_pending_lock(ini, &smc_server_lgr_pending); >+ smc_lgr_pending_lock(ini, &smcd_server_lgr_pending); > rc = smc_conn_create(smc, ini); > if (rc) { > smc_lgr_pending_unlock(ini); >@@ -2044,6 +2043,9 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, > { > int rc; > >+ smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ? >+ &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending : >+ &ini->ib_dev->smc_server_lgr_pending); > /* allocate connection / link group */ > rc = smc_conn_create(new_smc, ini); > if (rc) >@@ -2064,6 +2066,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, > { > int rc; > >+ smc_lgr_pending_lock(ini, &smcd_server_lgr_pending); > rc = smc_conn_create(new_smc, ini); > if (rc) > return rc; >@@ -2478,7 +2481,6 @@ static void smc_listen_work(struct work_struct *work) > if (rc) > goto out_decl; > >- smc_lgr_pending_lock(ini, &smc_server_lgr_pending); > smc_close_init(new_smc); > smc_rx_init(new_smc); > smc_tx_init(new_smc); >diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c >index 9c563cdbea90..fb8b81b628b8 100644 >--- a/net/smc/smc_ib.c >+++ b/net/smc/smc_ib.c >@@ -952,6 +952,8 @@ static int smc_ib_add_dev(struct ib_device *ibdev) > init_waitqueue_head(&smcibdev->lnks_deleted); > mutex_init(&smcibdev->mutex); > mutex_lock(&smc_ib_devices.mutex); >+ mutex_init(&smcibdev->smc_server_lgr_pending); >+ mutex_init(&smcibdev->smc_client_lgr_pending); > list_add_tail(&smcibdev->list, &smc_ib_devices.list); > mutex_unlock(&smc_ib_devices.mutex); > ib_set_client_data(ibdev, &smc_ib_client, smcibdev); >diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h >index ef8ac2b7546d..322547a5a23d 100644 >--- a/net/smc/smc_ib.h >+++ b/net/smc/smc_ib.h >@@ -57,6 +57,8 @@ struct smc_ib_device { /* ib-device infos for smc */ > atomic_t lnk_cnt_by_port[SMC_MAX_PORTS]; > /* number of links per port */ > int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ >+ struct mutex smc_server_lgr_pending; /* serialize link group creation on server */ >+ struct mutex smc_client_lgr_pending; /* serialize link group creation on client */ Align please. Best regards, Dust
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 19480d8affb0..d5b9ea7661db 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -56,11 +56,8 @@ #include "smc_loopback.h" #include "smc_inet.h" -static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group - * creation on server - */ -static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group - * creation on client +static DEFINE_MUTEX(smcd_server_lgr_pending); /* serialize link group + * creation on server for SMC-D */ static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */ @@ -1251,7 +1248,9 @@ static int smc_connect_rdma(struct smc_sock *smc, if (reason_code) return reason_code; - smc_lgr_pending_lock(ini, &smc_client_lgr_pending); + smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ? + &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending : + &ini->ib_dev->smc_server_lgr_pending); reason_code = smc_conn_create(smc, ini); if (reason_code) { smc_lgr_pending_unlock(ini); @@ -1412,7 +1411,7 @@ static int smc_connect_ism(struct smc_sock *smc, ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ - smc_lgr_pending_lock(ini, &smc_server_lgr_pending); + smc_lgr_pending_lock(ini, &smcd_server_lgr_pending); rc = smc_conn_create(smc, ini); if (rc) { smc_lgr_pending_unlock(ini); @@ -2044,6 +2043,9 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, { int rc; + smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ? + &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending : + &ini->ib_dev->smc_server_lgr_pending); /* allocate connection / link group */ rc = smc_conn_create(new_smc, ini); if (rc) @@ -2064,6 +2066,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, { int rc; + smc_lgr_pending_lock(ini, &smcd_server_lgr_pending); rc = smc_conn_create(new_smc, ini); if (rc) return rc; @@ -2478,7 +2481,6 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; - smc_lgr_pending_lock(ini, &smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9c563cdbea90..fb8b81b628b8 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -952,6 +952,8 @@ static int smc_ib_add_dev(struct ib_device *ibdev) init_waitqueue_head(&smcibdev->lnks_deleted); mutex_init(&smcibdev->mutex); mutex_lock(&smc_ib_devices.mutex); + mutex_init(&smcibdev->smc_server_lgr_pending); + mutex_init(&smcibdev->smc_client_lgr_pending); list_add_tail(&smcibdev->list, &smc_ib_devices.list); mutex_unlock(&smc_ib_devices.mutex); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index ef8ac2b7546d..322547a5a23d 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -57,6 +57,8 @@ struct smc_ib_device { /* ib-device infos for smc */ atomic_t lnk_cnt_by_port[SMC_MAX_PORTS]; /* number of links per port */ int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ + struct mutex smc_server_lgr_pending; /* serialize link group creation on server */ + struct mutex smc_client_lgr_pending; /* serialize link group creation on client */ }; static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
It is widely known that SMC introduced a global lock to protect the creation of the first connection. This lock not only brings performance issues but also poses a serious security risk. In a multi-tenant container environment, malicious tenants can construct attacks that keep the lock occupied for an extended period, thereby affecting the connections of other tenants. Considering that this lock is essentially meant to protect the QP, which belongs to a device, we can limit the scope of the lock to within the device rather than having it be global. This way, when a container exclusively occupies the device, it can avoid being affected by other malicious tenants. Also make on impact on SMC-D since the path of SMC-D is shorter. Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> --- net/smc/af_smc.c | 18 ++++++++++-------- net/smc/smc_ib.c | 2 ++ net/smc/smc_ib.h | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-)