diff mbox series

afs: Fix use-after-free due to get/remove race in volume tree

Message ID 1659974.1703167051@warthog.procyon.org.uk (mailing list archive)
State New
Headers show
Series afs: Fix use-after-free due to get/remove race in volume tree | expand

Commit Message

David Howells Dec. 21, 2023, 1:57 p.m. UTC
Hi Linus,

Could you apply this fix please?

David
---
When an afs_volume struct is put, its refcount is reduced to 0 before the
cell->volume_lock is taken and the volume removed from the cell->volumes
tree.  Unfortunately, this means that the lookup code can race and see a
volume with a zero ref in the tree, resulting in a use-after-free:

        refcount_t: addition on 0; use-after-free.
        WARNING: CPU: 3 PID: 130782 at lib/refcount.c:25 refcount_warn_saturate+0x7a/0xda
        ...
        RIP: 0010:refcount_warn_saturate+0x7a/0xda
        ...
        Call Trace:
         <TASK>
         ? __warn+0x8b/0xf5
         ? report_bug+0xbf/0x11b
         ? refcount_warn_saturate+0x7a/0xda
         ? handle_bug+0x3c/0x5b
         ? exc_invalid_op+0x13/0x59
         ? asm_exc_invalid_op+0x16/0x20
         ? refcount_warn_saturate+0x7a/0xda
         ? refcount_warn_saturate+0x7a/0xda
         afs_get_volume+0x3d/0x55
         afs_create_volume+0x126/0x1de
         afs_validate_fc+0xfe/0x130
         afs_get_tree+0x20/0x2e5
         vfs_get_tree+0x1d/0xc9
         do_new_mount+0x13b/0x22e
         do_mount+0x5d/0x8a
         __do_sys_mount+0x100/0x12a
         do_syscall_64+0x3a/0x94
         entry_SYSCALL_64_after_hwframe+0x62/0x6a

Fix this by:

 (1) When putting, use a flag to indicate if the volume has been removed
     from the tree and skip the rb_erase if it has.

 (2) When looking up, use a conditional ref increment and if it fails
     because the refcount is 0, replace the node in the tree and set the
     removal flag.

Fixes: 20325960f875 ("afs: Reorganise volume and server trees to be rooted on the cell")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 fs/afs/internal.h |    2 ++
 fs/afs/volume.c   |   26 +++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a812952be1c9..7385d62c8cf5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -586,6 +586,7 @@  struct afs_volume {
 #define AFS_VOLUME_OFFLINE	4	/* - T if volume offline notice given */
 #define AFS_VOLUME_BUSY		5	/* - T if volume busy notice given */
 #define AFS_VOLUME_MAYBE_NO_IBULK 6	/* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE	7	/* - Set if volume removed from cell->volumes */
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_volume	*cache;		/* Caching cookie */
 #endif
@@ -1513,6 +1514,7 @@  extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
 extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
 extern int afs_activate_volume(struct afs_volume *);
 extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
 extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
 extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
 extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 29d483c80281..115c081a8e2c 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -32,8 +32,13 @@  static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
 		} else if (p->vid > volume->vid) {
 			pp = &(*pp)->rb_right;
 		} else {
-			volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
-			goto found;
+			if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+				volume = p;
+				goto found;
+			}
+
+			set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+			rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
 		}
 	}
 
@@ -56,7 +61,8 @@  static void afs_remove_volume_from_cell(struct afs_volume *volume)
 				 afs_volume_trace_remove);
 		write_seqlock(&cell->volume_lock);
 		hlist_del_rcu(&volume->proc_link);
-		rb_erase(&volume->cell_node, &cell->volumes);
+		if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+			rb_erase(&volume->cell_node, &cell->volumes);
 		write_sequnlock(&cell->volume_lock);
 	}
 }
@@ -231,6 +237,20 @@  static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
 	_leave(" [destroyed]");
 }
 
+/*
+ * Try to get a reference on a volume record.
+ */
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+	int r;
+
+	if (__refcount_inc_not_zero(&volume->ref, &r)) {
+		trace_afs_volume(volume->vid, r + 1, reason);
+		return true;
+	}
+	return false;
+}
+
 /*
  * Get a reference on a volume record.
  */