diff mbox series

[dlm/next,1/8] dlm: using rcu to avoid rsb lookup again

Message ID 20240603215558.2722969-2-aahringo@redhat.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series dlm: md: introduce DLM_LSFL_SOFTIRQ_SAFE | expand

Checks

Context Check Description
mdraidci/vmtest-md-6_11-PR fail merge-conflict
mdraidci/vmtest-md-6_11-VM_Test-0 success Logs for Lint
mdraidci/vmtest-md-6_11-VM_Test-1 success Logs for ShellCheck
mdraidci/vmtest-md-6_11-VM_Test-2 success Logs for Unittests
mdraidci/vmtest-md-6_11-VM_Test-12 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-13 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-14 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-15 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
mdraidci/vmtest-md-6_11-VM_Test-16 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-17 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-18 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-3 success Logs for Validate matrix.py
mdraidci/vmtest-md-6_11-VM_Test-6 success Logs for x86_64-gcc / build-release
mdraidci/vmtest-md-6_11-VM_Test-7 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-5 success Logs for x86_64-gcc / build / build for x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-19 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-4 success Logs for set-matrix
mdraidci/vmtest-md-6_11-VM_Test-8 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-20 success Logs for x86_64-llvm-17 / veristat
mdraidci/vmtest-md-6_11-VM_Test-11 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-9 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-10 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-21 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-22 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
mdraidci/vmtest-md-6_11-VM_Test-23 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-24 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-25 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-26 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-27 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-28 success Logs for x86_64-llvm-18 / veristat

Commit Message

Alexander Aring June 3, 2024, 9:55 p.m. UTC
This patch converts the rsb to be protected under rcu. When the rcu lock
is held it cannot be freed. In combination with the new introduced flag
RSB_HASHED we can check if the rsb is still part of the ls_rsbtbl
hashtable, it cannot be part of another table. If its still part of the
ls_rsbtbl we can avoid a second dlm_search_rsb_tree() call.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/dlm_internal.h |   3 ++
 fs/dlm/lock.c         | 117 ++++++++++++++++++++++++++++++++++--------
 fs/dlm/memory.c       |  15 +++++-
 3 files changed, 112 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 9e68e68bf0cf..6f578527c6d8 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -333,6 +333,7 @@  struct dlm_rsb {
 	struct list_head	res_recover_list;   /* used for recovery */
 	struct list_head	res_toss_q_list;
 	int			res_recover_locks_count;
+	struct rcu_head		rcu;
 
 	char			*res_lvbptr;
 	char			res_name[DLM_RESNAME_MAXLEN+1];
@@ -366,6 +367,8 @@  enum rsb_flags {
 	RSB_RECOVER_GRANT,
 	RSB_RECOVER_LVB_INVAL,
 	RSB_TOSS,
+	/* if rsb is part of ls_rsbtbl or not */
+	RSB_HASHED,
 };
 
 static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a29de48849ef..e1e990b09068 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -582,6 +582,7 @@  void dlm_rsb_toss_timer(struct timer_list *timer)
 		list_del(&r->res_rsbs_list);
 		rhashtable_remove_fast(&ls->ls_rsbtbl, &r->res_node,
 				       dlm_rhash_rsb_params);
+		rsb_clear_flag(r, RSB_HASHED);
 
 		/* not necessary to held the ls_rsbtbl_lock when
 		 * calling send_remove()
@@ -658,8 +659,14 @@  int dlm_search_rsb_tree(struct rhashtable *rhash, const void *name, int len,
 
 static int rsb_insert(struct dlm_rsb *rsb, struct rhashtable *rhash)
 {
-	return rhashtable_insert_fast(rhash, &rsb->res_node,
-				      dlm_rhash_rsb_params);
+	int rv;
+
+	rv = rhashtable_insert_fast(rhash, &rsb->res_node,
+				    dlm_rhash_rsb_params);
+	if (!rv)
+		rsb_set_flag(rsb, RSB_HASHED);
+
+	return rv;
 }
 
 /*
@@ -773,12 +780,24 @@  static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len,
 
  do_toss:
 	write_lock_bh(&ls->ls_rsbtbl_lock);
-
-	/* retry lookup under write lock to see if its still in toss state
-	 * if not it's in keep state and we relookup - unlikely path.
+	/* toss handling move to out of toss handling requires ls_rsbtbl_lock
+	 * to held in write lock state. However during the transition from
+	 * previously read lock state to write lock state the rsb can be
+	 * removed out of the ls_rsbtbl hash or change it's state to keep
+	 * state. If RSB_HASHED is not set anymore somebody else removed
+	 * the entry out of the ls_rsbtbl hash, the lookup is protected by
+	 * rcu read lock and is an rcu read critical section, a possible free
+	 * of a rsb (that usually occurs when a rsb is removed out of the hash)
+	 * cannot be done between the read to write lock state transition as
+	 * kfree_rcu() of rsb will not occur during this rcu read critical
+	 * section for this reason a check on if RSB_HASHED is enough to check
+	 * if this situation happened. The recheck on RSB_TOSS is required to
+	 * check if the rsb is still in toss state, because somebody else
+	 * could take it out of RSB_TOSS state. If this is the case we do a
+	 * relookup under read lock as this is a very unlikely case.
+	 * The RSB_HASHED case can happen and we need to be prepared for this.
 	 */
-	error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
-	if (!error) {
+	if (rsb_flag(r, RSB_HASHED)) {
 		if (!rsb_flag(r, RSB_TOSS)) {
 			write_unlock_bh(&ls->ls_rsbtbl_lock);
 			goto retry;
@@ -950,11 +969,24 @@  static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
  do_toss:
 	write_lock_bh(&ls->ls_rsbtbl_lock);
 
-	/* retry lookup under write lock to see if its still in toss state
-	 * if not it's in keep state and we relookup - unlikely path.
+	/* toss handling move to out of toss handling requires ls_rsbtbl_lock
+	 * to held in write lock state. However during the transition from
+	 * previously read lock state to write lock state the rsb can be
+	 * removed out of the ls_rsbtbl hash or change it's state to keep
+	 * state. If RSB_HASHED is not set anymore somebody else removed
+	 * the entry out of the ls_rsbtbl hash, the lookup is protected by
+	 * rcu read lock and is an rcu read critical section, a possible free
+	 * of a rsb (that usually occurs when a rsb is removed out of the hash)
+	 * cannot be done between the read to write lock state transition as
+	 * kfree_rcu() of rsb will not occur during this rcu read critical
+	 * section for this reason a check on if RSB_HASHED is enough to check
+	 * if this situation happened. The recheck on RSB_TOSS is required to
+	 * check if the rsb is still in toss state, because somebody else
+	 * could take it out of RSB_TOSS state. If this is the case we do a
+	 * relookup under read lock as this is a very unlikely case.
+	 * The RSB_HASHED case can happen and we need to be prepared for this.
 	 */
-	error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
-	if (!error) {
+	if (rsb_flag(r, RSB_HASHED)) {
 		if (!rsb_flag(r, RSB_TOSS)) {
 			write_unlock_bh(&ls->ls_rsbtbl_lock);
 			goto retry;
@@ -1046,6 +1078,7 @@  static int find_rsb(struct dlm_ls *ls, const void *name, int len,
 {
 	int dir_nodeid;
 	uint32_t hash;
+	int rv;
 
 	if (len > DLM_RESNAME_MAXLEN)
 		return -EINVAL;
@@ -1053,12 +1086,20 @@  static int find_rsb(struct dlm_ls *ls, const void *name, int len,
 	hash = jhash(name, len, 0);
 	dir_nodeid = dlm_hash2nodeid(ls, hash);
 
+	/* hold the rcu lock here to prevent freeing of the rsb
+	 * while looking it up, there are currently a optimization
+	 * to not lookup the rsb twice instead look if its still
+	 * part of the rsbtbl hash.
+	 */
+	rcu_read_lock();
 	if (dlm_no_directory(ls))
-		return find_rsb_nodir(ls, name, len, hash, dir_nodeid,
-				      from_nodeid, flags, r_ret);
-	else
-		return find_rsb_dir(ls, name, len, hash, dir_nodeid,
+		rv = find_rsb_nodir(ls, name, len, hash, dir_nodeid,
 				    from_nodeid, flags, r_ret);
+	else
+		rv = find_rsb_dir(ls, name, len, hash, dir_nodeid,
+				  from_nodeid, flags, r_ret);
+	rcu_read_unlock();
+	return rv;
 }
 
 /* we have received a request and found that res_master_nodeid != our_nodeid,
@@ -1215,8 +1256,9 @@  static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_no
  * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0)
  */
 
-int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
-		      int len, unsigned int flags, int *r_nodeid, int *result)
+static int _dlm_master_lookup(struct dlm_ls *ls, int from_nodeid,
+			      const char *name, int len, unsigned int flags,
+			      int *r_nodeid, int *result)
 {
 	struct dlm_rsb *r = NULL;
 	uint32_t hash;
@@ -1243,7 +1285,6 @@  int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
 	}
 
  retry:
-
 	/* check if the rsb is in keep state under read lock - likely path */
 	read_lock_bh(&ls->ls_rsbtbl_lock);
 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
@@ -1278,11 +1319,24 @@  int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
 	/* unlikely path - relookup under write */
 	write_lock_bh(&ls->ls_rsbtbl_lock);
 
-	/* rsb_mod_timer() requires to held ls_rsbtbl_lock in write lock
-	 * check if the rsb is still in toss state, if not relookup
+	/* toss handling move to out of toss handling requires ls_rsbtbl_lock
+	 * to held in write lock state. However during the transition from
+	 * previously read lock state to write lock state the rsb can be
+	 * removed out of the ls_rsbtbl hash or change it's state to keep
+	 * state. If RSB_HASHED is not set anymore somebody else removed
+	 * the entry out of the ls_rsbtbl hash, the lookup is protected by
+	 * rcu read lock and is an rcu read critical section, a possible free
+	 * of a rsb (that usually occurs when a rsb is removed out of the hash)
+	 * cannot be done between the read to write lock state transition as
+	 * kfree_rcu() of rsb will not occur during this rcu read critical
+	 * section for this reason a check on if RSB_HASHED is enough to check
+	 * if this situation happened. The recheck on RSB_TOSS is required to
+	 * check if the rsb is still in toss state, because somebody else
+	 * could take it out of RSB_TOSS state. If this is the case we do a
+	 * relookup under read lock as this is a very unlikely case.
+	 * The RSB_HASHED case can happen and we need to be prepared for this.
 	 */
-	error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
-	if (!error) {
+	if (rsb_flag(r, RSB_HASHED)) {
 		if (!rsb_flag(r, RSB_TOSS)) {
 			write_unlock_bh(&ls->ls_rsbtbl_lock);
 			/* something as changed, very unlikely but
@@ -1290,6 +1344,7 @@  int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
 			 */
 			goto retry;
 		}
+
 	} else {
 		write_unlock_bh(&ls->ls_rsbtbl_lock);
 		goto not_found;
@@ -1346,6 +1401,23 @@  int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
 	return error;
 }
 
+int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
+		      int len, unsigned int flags, int *r_nodeid, int *result)
+{
+	int rv;
+
+	/* hold the rcu lock here to prevent freeing of the rsb
+	 * while looking it up, there are currently a optimization
+	 * to not lookup the rsb twice instead look if its still
+	 * part of the rsbtbl hash.
+	 */
+	rcu_read_lock();
+	rv = _dlm_master_lookup(ls, from_nodeid, name, len, flags, r_nodeid,
+				result);
+	rcu_read_unlock();
+	return rv;
+}
+
 static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash)
 {
 	struct dlm_rsb *r;
@@ -4308,6 +4380,7 @@  static void receive_remove(struct dlm_ls *ls, const struct dlm_message *ms)
 	list_del(&r->res_rsbs_list);
 	rhashtable_remove_fast(&ls->ls_rsbtbl, &r->res_node,
 			       dlm_rhash_rsb_params);
+	rsb_clear_flag(r, RSB_HASHED);
 	write_unlock_bh(&ls->ls_rsbtbl_lock);
 
 	free_toss_rsb(r);
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 15a8b1cee433..d6cb7a1c700d 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -101,13 +101,26 @@  struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
 	return r;
 }
 
-void dlm_free_rsb(struct dlm_rsb *r)
+static void __dlm_free_rsb(struct rcu_head *rcu)
 {
+	struct dlm_rsb *r = container_of(rcu, struct dlm_rsb, rcu);
+
 	if (r->res_lvbptr)
 		dlm_free_lvb(r->res_lvbptr);
 	kmem_cache_free(rsb_cache, r);
 }
 
+void dlm_free_rsb(struct dlm_rsb *r)
+{
+	/* wait until all current rcu read critical sections
+	 * are done until calling __dlm_free_rsb. Currently
+	 * this is used to avoid an use after free when checking
+	 * of the rsb is still part of ls_rsbtbl by testing on
+	 * RSB_HASHED flag.
+	 */
+	call_rcu(&r->rcu, __dlm_free_rsb);
+}
+
 struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;