diff mbox series

[23/27] afs: Fix the by-UUID server tree to allow servers with the same UUID

Message ID 159078977077.679399.159019304395285864.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series afs: Improvements | expand

Commit Message

David Howells May 29, 2020, 10:02 p.m. UTC
Whilst it shouldn't happen, it is possible for multiple fileservers to
share a UUID, particularly if an entire cell has been duplicated, UUIDs and
all.  In such a case, it's not necessarily possible to map the effect of
the CB.InitCallBackState3 incoming RPC to a specific server unambiguously
by UUID and thus to a specific cell.

Indeed, there's a problem whereby multiple server records may need to
occupy the same spot in the rb_tree rooted in the afs_net struct.

Fix this by allowing servers to form a list, with the head of the list in
the tree.  When the front entry in the list is removed, the second in the
list just replaces it.  afs_init_callback_state() then just goes down the
line, poking each server in the list.

This means that some servers will be unnecessarily poked, unfortunately.
An alternative would be to route by call parameters.

Reported-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation")
---

 fs/afs/callback.c |   10 ++++++++-
 fs/afs/internal.h |    4 +++-
 fs/afs/server.c   |   56 +++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 59 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b4cb9bb63f0a..7d9b23d981bf 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -21,11 +21,17 @@ 
 #include "internal.h"
 
 /*
- * allow the fileserver to request callback state (re-)initialisation
+ * Allow the fileserver to request callback state (re-)initialisation.
+ * Unfortunately, UUIDs are not guaranteed unique.
  */
 void afs_init_callback_state(struct afs_server *server)
 {
-	server->cb_s_break++;
+	rcu_read_lock();
+	do {
+		server->cb_s_break++;
+		server = rcu_dereference(server->uuid_next);
+	} while (0);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c64c2b47ece7..e0dc14d4d8b9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -486,7 +486,9 @@  struct afs_server {
 
 	struct afs_addr_list	__rcu *addresses;
 	struct afs_cell		*cell;		/* Cell to which belongs (pins ref) */
-	struct rb_node		uuid_rb;	/* Link in cell->fs_servers */
+	struct rb_node		uuid_rb;	/* Link in net->fs_servers */
+	struct afs_server __rcu	*uuid_next;	/* Next server with same UUID */
+	struct afs_server	*uuid_prev;	/* Previous server with same UUID */
 	struct list_head	probe_link;	/* Link in net->fs_probe_list */
 	struct hlist_node	addr4_link;	/* Link in net->fs_addresses4 */
 	struct hlist_node	addr6_link;	/* Link in net->fs_addresses6 */
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c51039a077cd..88593ffcb54e 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -130,13 +130,15 @@  struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
 }
 
 /*
- * Install a server record in the namespace tree
+ * Install a server record in the namespace tree.  If there's a clash, we stick
+ * it into a list anchored on whichever afs_server struct is actually in the
+ * tree.
  */
 static struct afs_server *afs_install_server(struct afs_cell *cell,
 					     struct afs_server *candidate)
 {
 	const struct afs_addr_list *alist;
-	struct afs_server *server;
+	struct afs_server *server, *next;
 	struct afs_net *net = cell->net;
 	struct rb_node **pp, *p;
 	int diff;
@@ -153,12 +155,30 @@  static struct afs_server *afs_install_server(struct afs_cell *cell,
 		_debug("- consider %p", p);
 		server = rb_entry(p, struct afs_server, uuid_rb);
 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
-		if (diff < 0)
+		if (diff < 0) {
 			pp = &(*pp)->rb_left;
-		else if (diff > 0)
+		} else if (diff > 0) {
 			pp = &(*pp)->rb_right;
-		else
-			goto exists;
+		} else {
+			if (server->cell == cell)
+				goto exists;
+
+			/* We have the same UUID representing servers in
+			 * different cells.  Append the new server to the list.
+			 */
+			for (;;) {
+				next = rcu_dereference_protected(
+					server->uuid_next,
+					lockdep_is_held(&net->fs_lock.lock));
+				if (!next)
+					break;
+				server = next;
+			}
+			rcu_assign_pointer(server->uuid_next, candidate);
+			candidate->uuid_prev = server;
+			server = candidate;
+			goto added_dup;
+		}
 	}
 
 	server = candidate;
@@ -166,6 +186,7 @@  static struct afs_server *afs_install_server(struct afs_cell *cell,
 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
 
+added_dup:
 	write_seqlock(&net->fs_addr_lock);
 	alist = rcu_dereference_protected(server->addresses,
 					  lockdep_is_held(&net->fs_addr_lock.lock));
@@ -453,7 +474,7 @@  static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
  */
 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
 {
-	struct afs_server *server;
+	struct afs_server *server, *next, *prev;
 	int active;
 
 	while ((server = gc_list)) {
@@ -465,7 +486,26 @@  static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
 		if (active == 0) {
 			trace_afs_server(server, atomic_read(&server->ref),
 					 active, afs_server_trace_gc);
-			rb_erase(&server->uuid_rb, &net->fs_servers);
+			next = rcu_dereference_protected(
+				server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
+			prev = server->uuid_prev;
+			if (!prev) {
+				/* The one at the front is in the tree */
+				if (!next) {
+					rb_erase(&server->uuid_rb, &net->fs_servers);
+				} else {
+					rb_replace_node_rcu(&server->uuid_rb,
+							    &next->uuid_rb,
+							    &net->fs_servers);
+					next->uuid_prev = NULL;
+				}
+			} else {
+				/* This server is not at the front */
+				rcu_assign_pointer(prev->uuid_next, next);
+				if (next)
+					next->uuid_prev = prev;
+			}
+
 			list_del(&server->probe_link);
 			hlist_del_rcu(&server->proc_link);
 			if (!hlist_unhashed(&server->addr4_link))