diff mbox

[v2,4/4] lockdep: Assign a lock_class per gendisk used for wait_for_completion()

Message ID 1508396607-25362-5-git-send-email-byungchul.park@lge.com (mailing list archive)
State New, archived
Headers show

Commit Message

Byungchul Park Oct. 19, 2017, 7:03 a.m. UTC
Darrick and Dave Chinner posted the following warning:

> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc1-fixes #1 Tainted: G        W
> ------------------------------------------------------
> loop0/31693 is trying to acquire lock:
>  (&(&ip->i_mmaplock)->mr_lock){++++}, at: [<ffffffffa00f1b0c>] xfs_ilock+0x23c/0x330 [xfs]
>
> but now in release context of a crosslock acquired at the following:
>  ((complete)&ret.event){+.+.}, at: [<ffffffff81326c1f>] submit_bio_wait+0x7f/0xb0
>
> which lock already depends on the new lock.
>
> the existing dependency chain (in reverse order) is:
>
> -> #2 ((complete)&ret.event){+.+.}:
>        lock_acquire+0xab/0x200
>        wait_for_completion_io+0x4e/0x1a0
>        submit_bio_wait+0x7f/0xb0
>        blkdev_issue_zeroout+0x71/0xa0
>        xfs_bmapi_convert_unwritten+0x11f/0x1d0 [xfs]
>        xfs_bmapi_write+0x374/0x11f0 [xfs]
>        xfs_iomap_write_direct+0x2ac/0x430 [xfs]
>        xfs_file_iomap_begin+0x20d/0xd50 [xfs]
>        iomap_apply+0x43/0xe0
>        dax_iomap_rw+0x89/0xf0
>        xfs_file_dax_write+0xcc/0x220 [xfs]
>        xfs_file_write_iter+0xf0/0x130 [xfs]
>        __vfs_write+0xd9/0x150
>        vfs_write+0xc8/0x1c0
>        SyS_write+0x45/0xa0
>        entry_SYSCALL_64_fastpath+0x1f/0xbe
>
> -> #1 (&xfs_nondir_ilock_class){++++}:
>        lock_acquire+0xab/0x200
>        down_write_nested+0x4a/0xb0
>        xfs_ilock+0x263/0x330 [xfs]
>        xfs_setattr_size+0x152/0x370 [xfs]
>        xfs_vn_setattr+0x6b/0x90 [xfs]
>        notify_change+0x27d/0x3f0
>        do_truncate+0x5b/0x90
>        path_openat+0x237/0xa90
>        do_filp_open+0x8a/0xf0
>        do_sys_open+0x11c/0x1f0
>        entry_SYSCALL_64_fastpath+0x1f/0xbe
>
> -> #0 (&(&ip->i_mmaplock)->mr_lock){++++}:
>        up_write+0x1c/0x40
>        xfs_iunlock+0x1d0/0x310 [xfs]
>        xfs_file_fallocate+0x8a/0x310 [xfs]
>        loop_queue_work+0xb7/0x8d0
>        kthread_worker_fn+0xb9/0x1f0
>
> Chain exists of:
>   &(&ip->i_mmaplock)->mr_lock --> &xfs_nondir_ilock_class --> (complete)&ret.event
>
>  Possible unsafe locking scenario by crosslock:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(&xfs_nondir_ilock_class);
>   lock((complete)&ret.event);
>                                lock(&(&ip->i_mmaplock)->mr_lock);
>                                unlock((complete)&ret.event);
>
>                *** DEADLOCK ***

The warning is a false positive, caused by the fact that all
wait_for_completion()s in submit_bio_wait() are waiting with the same
lock class.

However, some bios have nothing to do with others, for example, the case
might happen while using loop devices, between bios of an upper device
and a lower device(=loop device).

The safest way to assign different lock classes to different devices is
to do it for each gendisk. In other words, this patch assigns a
lockdep_map per gendisk and uses it when initializing completion in
submit_bio_wait().

Of course, it might be too conservative. But, making it safest for now
and extended by block layer experts later is good, atm.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
---
 block/bio.c           |  2 +-
 block/genhd.c         | 13 +++++--------
 include/linux/genhd.h | 22 ++++++++++++++++++++--
 3 files changed, 26 insertions(+), 11 deletions(-)

Comments

kernel test robot Oct. 21, 2017, 7:17 p.m. UTC | #1
Hi Byungchul,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.14-rc5 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Byungchul-Park/Fix-false-positives-by-cross-release-feature/20171022-022121
config: i386-randconfig-x002-201743 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   block/genhd.c: In function '__alloc_disk_node':
>> block/genhd.c:1407:24: error: 'struct gendisk' has no member named 'lockdep_map'
     lockdep_init_map(&disk->lockdep_map, lock_name, key, 0);
                           ^~

vim +1407 block/genhd.c

  1356	
  1357	struct gendisk *__alloc_disk_node(int minors, int node_id, struct lock_class_key *key, const char *lock_name)
  1358	{
  1359		struct gendisk *disk;
  1360		struct disk_part_tbl *ptbl;
  1361	
  1362		if (minors > DISK_MAX_PARTS) {
  1363			printk(KERN_ERR
  1364				"block: can't allocated more than %d partitions\n",
  1365				DISK_MAX_PARTS);
  1366			minors = DISK_MAX_PARTS;
  1367		}
  1368	
  1369		disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
  1370		if (disk) {
  1371			if (!init_part_stats(&disk->part0)) {
  1372				kfree(disk);
  1373				return NULL;
  1374			}
  1375			disk->node_id = node_id;
  1376			if (disk_expand_part_tbl(disk, 0)) {
  1377				free_part_stats(&disk->part0);
  1378				kfree(disk);
  1379				return NULL;
  1380			}
  1381			ptbl = rcu_dereference_protected(disk->part_tbl, 1);
  1382			rcu_assign_pointer(ptbl->part[0], &disk->part0);
  1383	
  1384			/*
  1385			 * set_capacity() and get_capacity() currently don't use
  1386			 * seqcounter to read/update the part0->nr_sects. Still init
  1387			 * the counter as we can read the sectors in IO submission
  1388			 * patch using seqence counters.
  1389			 *
  1390			 * TODO: Ideally set_capacity() and get_capacity() should be
  1391			 * converted to make use of bd_mutex and sequence counters.
  1392			 */
  1393			seqcount_init(&disk->part0.nr_sects_seq);
  1394			if (hd_ref_init(&disk->part0)) {
  1395				hd_free_part(&disk->part0);
  1396				kfree(disk);
  1397				return NULL;
  1398			}
  1399	
  1400			disk->minors = minors;
  1401			rand_initialize_disk(disk);
  1402			disk_to_dev(disk)->class = &block_class;
  1403			disk_to_dev(disk)->type = &disk_type;
  1404			device_initialize(disk_to_dev(disk));
  1405		}
  1406	
> 1407		lockdep_init_map(&disk->lockdep_map, lock_name, key, 0);
  1408	
  1409		return disk;
  1410	}
  1411	EXPORT_SYMBOL(__alloc_disk_node);
  1412	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/block/bio.c b/block/bio.c
index 101c2a9..6dd640d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -945,7 +945,7 @@  int submit_bio_wait(struct bio *bio)
 {
 	struct submit_bio_ret ret;
 
-	init_completion(&ret.event);
+	init_completion_with_map(&ret.event, &bio->bi_disk->lockdep_map);
 	bio->bi_private = &ret;
 	bio->bi_end_io = submit_bio_wait_endio;
 	bio->bi_opf |= REQ_SYNC;
diff --git a/block/genhd.c b/block/genhd.c
index dd305c6..f195d22 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1354,13 +1354,7 @@  dev_t blk_lookup_devt(const char *name, int partno)
 }
 EXPORT_SYMBOL(blk_lookup_devt);
 
-struct gendisk *alloc_disk(int minors)
-{
-	return alloc_disk_node(minors, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(alloc_disk);
-
-struct gendisk *alloc_disk_node(int minors, int node_id)
+struct gendisk *__alloc_disk_node(int minors, int node_id, struct lock_class_key *key, const char *lock_name)
 {
 	struct gendisk *disk;
 	struct disk_part_tbl *ptbl;
@@ -1409,9 +1403,12 @@  struct gendisk *alloc_disk_node(int minors, int node_id)
 		disk_to_dev(disk)->type = &disk_type;
 		device_initialize(disk_to_dev(disk));
 	}
+
+	lockdep_init_map(&disk->lockdep_map, lock_name, key, 0);
+
 	return disk;
 }
-EXPORT_SYMBOL(alloc_disk_node);
+EXPORT_SYMBOL(__alloc_disk_node);
 
 struct kobject *get_disk(struct gendisk *disk)
 {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6d85a75..9832e3c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -206,6 +206,9 @@  struct gendisk {
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
 	int node_id;
 	struct badblocks *bb;
+#ifdef CONFIG_LOCKDEP_COMPLETIONS
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@ -590,8 +593,7 @@  extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
 extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
-extern struct gendisk *alloc_disk_node(int minors, int node_id);
-extern struct gendisk *alloc_disk(int minors);
+extern struct gendisk *__alloc_disk_node(int minors, int node_id, struct lock_class_key *key, const char *lock_name);
 extern struct kobject *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
@@ -615,6 +617,22 @@  extern ssize_t part_fail_store(struct device *dev,
 			       const char *buf, size_t count);
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
+#ifdef CONFIG_LOCKDEP_COMPLETIONS
+#define alloc_disk_node(m, id) \
+({									\
+	static struct lock_class_key __key;				\
+	const char *__lock_name;					\
+									\
+	__lock_name = "(complete)"#m"("#id")";				\
+									\
+	__alloc_disk_node(m, id, &__key, __lock_name);			\
+})
+#else
+#define alloc_disk_node(m, id)	__alloc_disk_node(m, id, NULL, NULL)
+#endif
+
+#define alloc_disk(m)		alloc_disk_node(m, NUMA_NO_NODE)
+
 static inline int hd_ref_init(struct hd_struct *part)
 {
 	if (percpu_ref_init(&part->ref, __delete_partition, 0,