diff mbox series

[27/28] rwsem: introduce down/up_write_non_owner

Message ID 20191031234618.15403-28-david@fromorbit.com (mailing list archive)
State Deferred, archived
Headers show
Series mm, xfs: non-blocking inode reclaim | expand

Commit Message

Dave Chinner Oct. 31, 2019, 11:46 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

To serialise freeing of inodes against unreferenced lookups, XFS
wants to hold the inode locked from the reclaim context that queues
it from RCU freeing until the grace period that actually frees the
inode. THis means the inode is being unlocked by a context that
didn't lock it, and that makes lockdep unhappy.

This is a very special use case - inodes can be found once marked
for reclaim because of lockless RCU lookups, so we need some
synchronisation that will prevent such inodes from being locked.  To
access an unreferenced inode we need to take the ILOCK rwsem without
blocking and still under rcu_read_lock() to hold off reclaim of the
inode. If the inode has been reclaimed and is queued for freeing,
holding the ILOCK rwsem until the RCU grace period expires means
no lookup that finds it in that grace period will be able to lock it
and use it. Once the grace period expires we are guaranteed that
nothing will ever find the inode again, and we can unlock it and
free it.

This requires down_write_trylock_non_owner() in the reclaim context
before we mark the inode as reclaimed and run call_rcu() to free it.
It require up_write_non_owner() in the RCU callback before we free
the inode.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/rwsem.h  |  6 ++++++
 kernel/locking/rwsem.c | 23 +++++++++++++++++++++++
 2 files changed, 29 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 00d6054687dd..e557bd994d0e 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -191,6 +191,9 @@  do {								\
  */
 extern void down_read_non_owner(struct rw_semaphore *sem);
 extern void up_read_non_owner(struct rw_semaphore *sem);
+extern void down_write_non_owner(struct rw_semaphore *sem);
+extern int down_write_trylock_non_owner(struct rw_semaphore *sem);
+extern void up_write_non_owner(struct rw_semaphore *sem);
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
 # define down_write_nest_lock(sem, nest_lock)	down_write(sem)
@@ -198,6 +201,9 @@  extern void up_read_non_owner(struct rw_semaphore *sem);
 # define down_write_killable_nested(sem, subclass)	down_write_killable(sem)
 # define down_read_non_owner(sem)		down_read(sem)
 # define up_read_non_owner(sem)			up_read(sem)
+# define down_write_non_owner(sem)		down_write(sem)
+# define down_write_trylock_non_owner(sem)	down_write_trylock(sem)
+# define up_write_non_owner(sem)		up_write(sem)
 #endif
 
 #endif /* _LINUX_RWSEM_H */
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index eef04551eae7..36162d42fe09 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1654,4 +1654,27 @@  void up_read_non_owner(struct rw_semaphore *sem)
 }
 EXPORT_SYMBOL(up_read_non_owner);
 
+void down_write_non_owner(struct rw_semaphore *sem)
+{
+	might_sleep();
+	__down_write(sem);
+}
+EXPORT_SYMBOL(down_write_non_owner);
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+int down_write_trylock_non_owner(struct rw_semaphore *sem)
+{
+	return __down_write_trylock(sem);
+}
+EXPORT_SYMBOL(down_write_trylock_non_owner);
+
+void up_write_non_owner(struct rw_semaphore *sem)
+{
+	rwsem_set_owner(sem);
+	__up_write(sem);
+}
+EXPORT_SYMBOL(up_write_non_owner);
+
 #endif