@@ -58,19 +58,99 @@ static void zram_free_page(struct zram *zram, size_t index);
static int zram_read_from_zspool(struct zram *zram, struct page *page,
u32 index);
-static int zram_slot_trylock(struct zram *zram, u32 index)
+static void zram_slot_lock_init(struct zram *zram, u32 index)
{
- return spin_trylock(&zram->table[index].lock);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_init_map(&zram->table[index].dep_map,
+ "zram->table[index].lock",
+ &zram->lock_class, 0);
+#endif
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline bool __slot_trylock(struct zram *zram, u32 index)
+{
+ struct lockdep_map *dep_map = &zram->table[index].dep_map;
+ unsigned long *lock = &zram->table[index].flags;
+
+ if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
+ mutex_acquire(dep_map, 0, 1, _RET_IP_);
+ lock_acquired(dep_map, _RET_IP_);
+ return true;
+ }
+
+ lock_contended(dep_map, _RET_IP_);
+ return false;
+}
+
+static inline void __slot_lock(struct zram *zram, u32 index)
+{
+ struct lockdep_map *dep_map = &zram->table[index].dep_map;
+ unsigned long *lock = &zram->table[index].flags;
+
+ mutex_acquire(dep_map, 0, 0, _RET_IP_);
+ wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
+ lock_acquired(dep_map, _RET_IP_);
+}
+
+static inline void __slot_unlock(struct zram *zram, u32 index)
+{
+ struct lockdep_map *dep_map = &zram->table[index].dep_map;
+ unsigned long *lock = &zram->table[index].flags;
+
+ mutex_release(dep_map, _RET_IP_);
+ clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
+}
+#else
+static inline bool __slot_trylock(struct zram *zram, u32 index)
+{
+ unsigned long *lock = &zram->table[index].flags;
+
+ if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock))
+ return true;
+ return false;
+}
+
+static inline void __slot_lock(struct zram *zram, u32 index)
+{
+ unsigned long *lock = &zram->table[index].flags;
+
+ wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
+}
+
+static inline void __slot_unlock(struct zram *zram, u32 index)
+{
+ unsigned long *lock = &zram->table[index].flags;
+
+ clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
+}
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+
+/*
+ * entry locking rules:
+ *
+ * 1) Lock is exclusive
+ *
+ * 2) lock() function can sleep waiting for the lock
+ *
+ * 3) Lock owner can sleep
+ *
+ * 4) Use TRY lock variant when in atomic context
+ * - must check return value and handle locking failers
+ */
+static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
+{
+ return __slot_trylock(zram, index);
}
static void zram_slot_lock(struct zram *zram, u32 index)
{
- spin_lock(&zram->table[index].lock);
+ return __slot_lock(zram, index);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- spin_unlock(&zram->table[index].lock);
+ return __slot_unlock(zram, index);
}
static inline bool init_done(struct zram *zram)
@@ -93,7 +173,6 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
zram->table[index].handle = handle;
}
-/* flag operations require table entry bit_spin_lock() being held */
static bool zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
@@ -1473,15 +1552,11 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
huge_class_size = zs_huge_class_size(zram->mem_pool);
for (index = 0; index < num_pages; index++)
- spin_lock_init(&zram->table[index].lock);
+ zram_slot_lock_init(zram, index);
+
return true;
}
-/*
- * To protect concurrent access to the same index entry,
- * caller should hold this table index entry's bit_spinlock to
- * indicate this index entry is accessing.
- */
static void zram_free_page(struct zram *zram, size_t index)
{
unsigned long handle;
@@ -2625,6 +2700,10 @@ static int zram_add(void)
if (ret)
goto out_cleanup_disk;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_register_key(&zram->lock_class);
+#endif
+
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
@@ -2681,6 +2760,10 @@ static int zram_remove(struct zram *zram)
*/
zram_reset_device(zram);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_unregister_key(&zram->lock_class);
+#endif
+
put_disk(zram->disk);
kfree(zram);
return 0;
@@ -28,7 +28,6 @@
#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
-
/*
* ZRAM is mainly used for memory efficiency so we want to keep memory
* footprint small and thus squeeze size and zram pageflags into a flags
@@ -46,6 +45,7 @@
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
ZRAM_SAME = ZRAM_FLAG_SHIFT, /* Page consists the same element */
+ ZRAM_ENTRY_LOCK, /* entry access lock bit */
ZRAM_WB, /* page is stored on backing_device */
ZRAM_PP_SLOT, /* Selected for post-processing */
ZRAM_HUGE, /* Incompressible page */
@@ -58,13 +58,18 @@ enum zram_pageflags {
__NR_ZRAM_PAGEFLAGS,
};
-/*-- Data structures */
-
-/* Allocated for each disk page */
+/*
+ * Allocated for each disk page. We use bit-lock (ZRAM_ENTRY_LOCK bit
+ * of flags) to save memory. There can be plenty of entries and standard
+ * locking primitives (e.g. mutex) will significantly increase sizeof()
+ * of each entry and hence of the meta table.
+ */
struct zram_table_entry {
unsigned long handle;
- unsigned int flags;
- spinlock_t lock;
+ unsigned long flags;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
ktime_t ac_time;
#endif
@@ -137,5 +142,8 @@ struct zram {
struct dentry *debugfs_dir;
#endif
atomic_t pp_in_progress;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lock_class_key lock_class;
+#endif
};
#endif
Concurrent modifications of meta table entries is now handled by per-entry spin-lock. This has a number of shortcomings. First, this imposes atomic requirements on compression backends. zram can call both zcomp_compress() and zcomp_decompress() under entry spin-lock, which implies that we can use only compression algorithms that don't schedule/sleep/wait during compression and decompression. This, for instance, makes it impossible to use some of the ASYNC compression algorithms (H/W compression, etc.) implementations. Second, this can potentially trigger watchdogs. For example, entry re-compression with secondary algorithms is performed under entry spin-lock. Given that we chain secondary compression algorithms and that some of them can be configured for best compression ratio (and worst compression speed) zram can stay under spin-lock for quite some time. Having a per-entry mutex (or, for instance, a rw-semaphore) significantly increases sizeof() of each entry and hence the meta table. Therefore entry locking returns back to bit locking, as before, however, this time also preempt-rt friendly, because if waits-on-bit instead of spinning-on-bit. Lock owners are also now permitted to schedule, which is a first step on the path of making zram non-atomic. Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> --- drivers/block/zram/zram_drv.c | 105 ++++++++++++++++++++++++++++++---- drivers/block/zram/zram_drv.h | 20 +++++-- 2 files changed, 108 insertions(+), 17 deletions(-)