diff mbox series

[1/4] block: fix use-after-free on cached last_lookup partition

Message ID 20200507085239.1354854-2-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: fix partition use-after-free and optimization | expand

Commit Message

Ming Lei May 7, 2020, 8:52 a.m. UTC
delete_partition() clears the cached last_lookup partition. However the
.last_lookup cache may be overwritten by one IO path after it is cleared
from delete_partition(). Then another IO path may use the cached deleting
partition after hd_struct_free() is called, then use-after-free is triggered
on the cached partition.

Fixes the issue by the following approach:

1) always get the partition's refcount via hd_struct_try_get() before
setting .last_lookup

2) move clearing .last_lookup from delete_partition() to hd_struct_free()
which is the release handle of the partition's percpu-refcount, so that no
IO path can overwrite .last_lookup after it is cleared in hd_struct_free().

It is one candidate approach of Yufen's patch[1] which adds overhead
in fast path by indirect lookup which may introduce one extra cacheline
in IO path. Also this patch relies on percpu-refcount's protection, and
it is easier to understand and verify.

[1] https://lore.kernel.org/linux-block/20200109013551.GB9655@ming.t460p/T/#t

Reported-by: Yufen Yu <yuyufen@huawei.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hou Tao <houtao1@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-core.c        | 12 ------------
 block/genhd.c           |  6 +++++-
 block/partitions/core.c | 12 +++++++++++-
 include/linux/genhd.h   |  1 +
 4 files changed, 17 insertions(+), 14 deletions(-)

Comments

Christoph Hellwig May 7, 2020, 2:16 p.m. UTC | #1
On Thu, May 07, 2020 at 04:52:36PM +0800, Ming Lei wrote:
>  	for (i = 1; i < ptbl->len; i++) {
>  		part = rcu_dereference(ptbl->part[i]);
>  
>  		if (part && sector_in_part(part, sector)) {
> +			if (!hd_struct_try_get(part))
> +				goto exit;

I think this needs the comment that was in blk_account_io_start to
explain the logic.  Also no need for the goto, this can just be a break.

> -	rcu_assign_pointer(ptbl->last_lookup, NULL);
>  	kobject_put(part->holder_dir);
>  	device_del(part_to_dev(part));
>  
> @@ -393,6 +402,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	p->nr_sects = len;
>  	p->partno = partno;
>  	p->policy = get_disk_ro(disk);
> +	p->disk = disk;

Do we really need this pointer?  Can't we use part_to_disk in the
free path for some reason?
Ming Lei May 8, 2020, 1:54 a.m. UTC | #2
On Thu, May 07, 2020 at 07:16:26AM -0700, Christoph Hellwig wrote:
> On Thu, May 07, 2020 at 04:52:36PM +0800, Ming Lei wrote:
> >  	for (i = 1; i < ptbl->len; i++) {
> >  		part = rcu_dereference(ptbl->part[i]);
> >  
> >  		if (part && sector_in_part(part, sector)) {
> > +			if (!hd_struct_try_get(part))
> > +				goto exit;
> 
> I think this needs the comment that was in blk_account_io_start to
> explain the logic.  Also no need for the goto, this can just be a break.

OK, not did it because I thought the logic is straightforward.

> 
> > -	rcu_assign_pointer(ptbl->last_lookup, NULL);
> >  	kobject_put(part->holder_dir);
> >  	device_del(part_to_dev(part));
> >  
> > @@ -393,6 +402,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
> >  	p->nr_sects = len;
> >  	p->partno = partno;
> >  	p->policy = get_disk_ro(disk);
> > +	p->disk = disk;
> 
> Do we really need this pointer?  Can't we use part_to_disk in the
> free path for some reason?

Looks I did miss this function, :-)

Thanks, 
Ming
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index ec50d7e6be21..826a8980997d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1363,18 +1363,6 @@  void blk_account_io_start(struct request *rq, bool new_io)
 		part_stat_inc(part, merges[rw]);
 	} else {
 		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-		if (!hd_struct_try_get(part)) {
-			/*
-			 * The partition is already being removed,
-			 * the request will be accounted on the disk only
-			 *
-			 * We take a reference on disk->part0 although that
-			 * partition will never be deleted, so we can treat
-			 * it as any other partition.
-			 */
-			part = &rq->rq_disk->part0;
-			hd_struct_get(part);
-		}
 		part_inc_in_flight(rq->q, part, rw);
 		rq->part = part;
 	}
diff --git a/block/genhd.c b/block/genhd.c
index c05d509877fa..8b33f0e54356 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -359,17 +359,21 @@  struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 	ptbl = rcu_dereference(disk->part_tbl);
 
 	part = rcu_dereference(ptbl->last_lookup);
-	if (part && sector_in_part(part, sector))
+	if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
 		return part;
 
 	for (i = 1; i < ptbl->len; i++) {
 		part = rcu_dereference(ptbl->part[i]);
 
 		if (part && sector_in_part(part, sector)) {
+			if (!hd_struct_try_get(part))
+				goto exit;
 			rcu_assign_pointer(ptbl->last_lookup, part);
 			return part;
 		}
 	}
+ exit:
+	hd_struct_get(&disk->part0);
 	return &disk->part0;
 }
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index c085bf85509b..b6cbc9b98426 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -288,6 +288,11 @@  static void hd_struct_free_work(struct work_struct *work)
 static void hd_struct_free(struct percpu_ref *ref)
 {
 	struct hd_struct *part = container_of(ref, struct hd_struct, ref);
+	struct disk_part_tbl *ptbl =
+		rcu_dereference_protected(part->disk->part_tbl, 1);
+
+	rcu_assign_pointer(ptbl->last_lookup, NULL);
+	put_device(disk_to_dev(part->disk));
 
 	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
 	queue_rcu_work(system_wq, &part->rcu_work);
@@ -309,8 +314,12 @@  void delete_partition(struct gendisk *disk, struct hd_struct *part)
 	struct disk_part_tbl *ptbl =
 		rcu_dereference_protected(disk->part_tbl, 1);
 
+	/*
+	 * make sure disk won't be gone because ->part_tbl is referenced in
+	 * this part's release handler
+	 */
+	get_device(disk_to_dev(disk));
 	rcu_assign_pointer(ptbl->part[part->partno], NULL);
-	rcu_assign_pointer(ptbl->last_lookup, NULL);
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
 
@@ -393,6 +402,7 @@  static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
+	p->disk = disk;
 
 	if (info) {
 		struct partition_meta_info *pinfo;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index f9c226f9546a..c28d1d9bfa72 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -85,6 +85,7 @@  struct hd_struct {
 	struct disk_stats dkstats;
 #endif
 	struct percpu_ref ref;
+	struct gendisk *disk;
 	struct rcu_work rcu_work;
 };