@@ -234,32 +234,25 @@ static inline int arch_test_and_change_bit(unsigned long nr,
}
#ifdef CONFIG_PPC64
-static inline unsigned long
-clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
{
unsigned long old, t;
- unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
- unsigned long mask = BIT_MASK(nr);
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
"1:" PPC_LLARX "%0,0,%3,0\n"
- "andc %1,%0,%2\n"
+ "xor %1,%0,%2\n"
PPC_STLCX "%1,0,%3\n"
"bne- 1b\n"
: "=&r" (old), "=&r" (t)
: "r" (mask), "r" (p)
: "cc", "memory");
- return old;
+ return (old & BIT_MASK(7)) != 0;
}
-/*
- * This is a special function for mm/filemap.c
- * Bit 7 corresponds to PG_waiters.
- */
-#define arch_clear_bit_unlock_is_negative_byte(nr, addr) \
- (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7))
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
#endif /* CONFIG_PPC64 */
@@ -94,18 +94,17 @@ arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-static __always_inline bool
-arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "andb %2,%1"
+ asm volatile(LOCK_PREFIX "xorb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), WBYTE_ADDR(addr)
- : "ir" ((char) ~(1 << nr)) : "memory");
+ : "iq" ((char)mask) : "memory");
return negative;
}
-#define arch_clear_bit_unlock_is_negative_byte \
- arch_clear_bit_unlock_is_negative_byte
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
static __always_inline void
arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
@@ -58,27 +58,30 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
return arch_test_and_set_bit_lock(nr, addr);
}
-#if defined(arch_clear_bit_unlock_is_negative_byte)
+#if defined(arch_xor_unlock_is_negative_byte)
/**
- * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
+ * xor_unlock_is_negative_byte - XOR a single byte in memory and test if
+ * it is negative, for unlock.
+ * @mask: Change the bits which are set in this mask.
+ * @addr: The address of the word containing the byte to change.
*
+ * Changes some of bits 0-6 in the word pointed to by @addr.
* This operation is atomic and provides release barrier semantics.
+ * Used to optimise some folio operations which are commonly paired
+ * with an unlock or end of writeback. Bit 7 is used as PG_waiters to
+ * indicate whether anybody is waiting for the unlock.
*
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
+ * Return: Whether the top bit of the byte is set.
*/
-static inline bool
-clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
kcsan_release();
- instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
- return arch_clear_bit_unlock_is_negative_byte(nr, addr);
+ instrument_atomic_write(addr, sizeof(long));
+ return arch_xor_unlock_is_negative_byte(mask, addr);
}
/* Let everybody know we have it. */
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
#endif
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */
@@ -66,27 +66,16 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
raw_atomic_long_set_release((atomic_long_t *)p, old);
}
-/**
- * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
- *
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
- */
-#ifndef arch_clear_bit_unlock_is_negative_byte
-static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr,
- volatile unsigned long *p)
+#ifndef arch_xor_unlock_is_negative_byte
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
{
long old;
- unsigned long mask = BIT_MASK(nr);
- p += BIT_WORD(nr);
- old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+ old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p);
return !!(old & BIT(7));
}
-#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
#endif
#include <asm-generic/bitops/instrumented-lock.h>
@@ -700,10 +700,10 @@ static void test_barrier_nothreads(struct kunit *test)
KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false);
KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true);
-#ifdef clear_bit_unlock_is_negative_byte
- KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
- KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
- KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
+#ifdef xor_unlock_is_negative_byte
+ KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
+ KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
+ KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
#endif
kcsan_nestable_atomic_end();
}
@@ -228,10 +228,10 @@ static bool __init test_barrier(void)
spin_lock(&test_spinlock);
KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock));
-#ifdef clear_bit_unlock_is_negative_byte
- KCSAN_CHECK_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
- KCSAN_CHECK_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
- KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
+#ifdef xor_unlock_is_negative_byte
+ KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
+ KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
+ KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
#endif
kcsan_nestable_atomic_end();
@@ -1484,6 +1484,11 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
}
EXPORT_SYMBOL_GPL(folio_add_wait_queue);
+#ifdef xor_unlock_is_negative_byte
+#define clear_bit_unlock_is_negative_byte(nr, p) \
+ xor_unlock_is_negative_byte(1 << nr, p)
+#endif
+
#ifndef clear_bit_unlock_is_negative_byte
/*
@@ -1098,9 +1098,10 @@ static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
-#if defined(clear_bit_unlock_is_negative_byte)
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
- clear_bit_unlock_is_negative_byte(nr, addr));
+#if defined(xor_unlock_is_negative_byte)
+ if (nr < 7)
+ KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
+ xor_unlock_is_negative_byte(1 << nr, addr));
#endif
}
Replace clear_bit_and_unlock_is_negative_byte() with xor_unlock_is_negative_byte(). We have a few places that like to lock a folio, set a flag and unlock it again. Allow for the possibility of combining the latter two operations for efficiency. We are guaranteed that the caller holds the lock, so it is safe to unlock it with the xor. The caller must guarantee that nobody else will set the flag without holding the lock; it is not safe to do this with the PG_dirty flag, for example. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- arch/powerpc/include/asm/bitops.h | 17 ++++-------- arch/x86/include/asm/bitops.h | 11 ++++---- .../asm-generic/bitops/instrumented-lock.h | 27 ++++++++++--------- include/asm-generic/bitops/lock.h | 21 ++++----------- kernel/kcsan/kcsan_test.c | 8 +++--- kernel/kcsan/selftest.c | 8 +++--- mm/filemap.c | 5 ++++ mm/kasan/kasan_test.c | 7 ++--- 8 files changed, 47 insertions(+), 57 deletions(-)