diff mbox

[08/29] drivers, md: convert mddev.active from atomic_t to refcount_t

Message ID 1488810076-3754-9-git-send-email-elena.reshetova@intel.com (mailing list archive)
State Superseded
Headers show

Commit Message

Reshetova, Elena March 6, 2017, 2:20 p.m. UTC
refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
---
 drivers/md/md.c | 6 +++---
 drivers/md/md.h | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

Comments

Shaohua Li March 7, 2017, 7:04 p.m. UTC | #1
On Mon, Mar 06, 2017 at 04:20:55PM +0200, Elena Reshetova wrote:
> refcount_t type and corresponding API should be
> used instead of atomic_t when the variable is used as
> a reference counter. This allows to avoid accidental
> refcounter overflows that might lead to use-after-free
> situations.

Looks good. Let me know how do you want to route the patch to upstream.
 
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: David Windsor <dwindsor@gmail.com>
> ---
>  drivers/md/md.c | 6 +++---
>  drivers/md/md.h | 3 ++-
>  2 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 985374f..94c8ebf 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -449,7 +449,7 @@ EXPORT_SYMBOL(md_unplug);
>  
>  static inline struct mddev *mddev_get(struct mddev *mddev)
>  {
> -	atomic_inc(&mddev->active);
> +	refcount_inc(&mddev->active);
>  	return mddev;
>  }
>  
> @@ -459,7 +459,7 @@ static void mddev_put(struct mddev *mddev)
>  {
>  	struct bio_set *bs = NULL;
>  
> -	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
> +	if (!refcount_dec_and_lock(&mddev->active, &all_mddevs_lock))
>  		return;
>  	if (!mddev->raid_disks && list_empty(&mddev->disks) &&
>  	    mddev->ctime == 0 && !mddev->hold_active) {
> @@ -495,7 +495,7 @@ void mddev_init(struct mddev *mddev)
>  	INIT_LIST_HEAD(&mddev->all_mddevs);
>  	setup_timer(&mddev->safemode_timer, md_safemode_timeout,
>  		    (unsigned long) mddev);
> -	atomic_set(&mddev->active, 1);
> +	refcount_set(&mddev->active, 1);
>  	atomic_set(&mddev->openers, 0);
>  	atomic_set(&mddev->active_io, 0);
>  	spin_lock_init(&mddev->lock);
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index b8859cb..4811663 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -22,6 +22,7 @@
>  #include <linux/list.h>
>  #include <linux/mm.h>
>  #include <linux/mutex.h>
> +#include <linux/refcount.h>
>  #include <linux/timer.h>
>  #include <linux/wait.h>
>  #include <linux/workqueue.h>
> @@ -360,7 +361,7 @@ struct mddev {
>  	 */
>  	struct mutex			open_mutex;
>  	struct mutex			reconfig_mutex;
> -	atomic_t			active;		/* general refcount */
> +	refcount_t			active;		/* general refcount */
>  	atomic_t			openers;	/* number of active opens */
>  
>  	int				changed;	/* True if we might need to
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reshetova, Elena March 8, 2017, 9:42 a.m. UTC | #2
> On Mon, Mar 06, 2017 at 04:20:55PM +0200, Elena Reshetova wrote:
> > refcount_t type and corresponding API should be
> > used instead of atomic_t when the variable is used as
> > a reference counter. This allows to avoid accidental
> > refcounter overflows that might lead to use-after-free
> > situations.
> 
> Looks good. Let me know how do you want to route the patch to upstream.

Greg, you previously mentioned that driver's conversions can go via your tree. Does this still apply?
Or should I be asking maintainers to merge these patches via their trees? 
I am not sure about the correct (and easier for everyone) way, please suggest.  

Best Regards,
Elena.
> 
> > Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> > Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
> > Signed-off-by: Kees Cook <keescook@chromium.org>
> > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > ---
> >  drivers/md/md.c | 6 +++---
> >  drivers/md/md.h | 3 ++-
> >  2 files changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index 985374f..94c8ebf 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -449,7 +449,7 @@ EXPORT_SYMBOL(md_unplug);
> >
> >  static inline struct mddev *mddev_get(struct mddev *mddev)
> >  {
> > -	atomic_inc(&mddev->active);
> > +	refcount_inc(&mddev->active);
> >  	return mddev;
> >  }
> >
> > @@ -459,7 +459,7 @@ static void mddev_put(struct mddev *mddev)
> >  {
> >  	struct bio_set *bs = NULL;
> >
> > -	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
> > +	if (!refcount_dec_and_lock(&mddev->active, &all_mddevs_lock))
> >  		return;
> >  	if (!mddev->raid_disks && list_empty(&mddev->disks) &&
> >  	    mddev->ctime == 0 && !mddev->hold_active) {
> > @@ -495,7 +495,7 @@ void mddev_init(struct mddev *mddev)
> >  	INIT_LIST_HEAD(&mddev->all_mddevs);
> >  	setup_timer(&mddev->safemode_timer, md_safemode_timeout,
> >  		    (unsigned long) mddev);
> > -	atomic_set(&mddev->active, 1);
> > +	refcount_set(&mddev->active, 1);
> >  	atomic_set(&mddev->openers, 0);
> >  	atomic_set(&mddev->active_io, 0);
> >  	spin_lock_init(&mddev->lock);
> > diff --git a/drivers/md/md.h b/drivers/md/md.h
> > index b8859cb..4811663 100644
> > --- a/drivers/md/md.h
> > +++ b/drivers/md/md.h
> > @@ -22,6 +22,7 @@
> >  #include <linux/list.h>
> >  #include <linux/mm.h>
> >  #include <linux/mutex.h>
> > +#include <linux/refcount.h>
> >  #include <linux/timer.h>
> >  #include <linux/wait.h>
> >  #include <linux/workqueue.h>
> > @@ -360,7 +361,7 @@ struct mddev {
> >  	 */
> >  	struct mutex			open_mutex;
> >  	struct mutex			reconfig_mutex;
> > -	atomic_t			active;
> 	/* general refcount */
> > +	refcount_t			active;
> 	/* general refcount */
> >  	atomic_t			openers;	/*
> number of active opens */
> >
> >  	int
> 	changed;	/* True if we might need to
> > --
> > 2.7.4
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg Kroah-Hartman March 8, 2017, 10:19 a.m. UTC | #3
On Wed, Mar 08, 2017 at 09:42:09AM +0000, Reshetova, Elena wrote:
> > On Mon, Mar 06, 2017 at 04:20:55PM +0200, Elena Reshetova wrote:
> > > refcount_t type and corresponding API should be
> > > used instead of atomic_t when the variable is used as
> > > a reference counter. This allows to avoid accidental
> > > refcounter overflows that might lead to use-after-free
> > > situations.
> > 
> > Looks good. Let me know how do you want to route the patch to upstream.
> 
> Greg, you previously mentioned that driver's conversions can go via your tree. Does this still apply?
> Or should I be asking maintainers to merge these patches via their trees? 

You should ask them to take them through their trees, if they have them.
I'll be glad to scoop up all of the remaining ones that get missed, or
for subsystems that do not have trees.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Ellerman March 14, 2017, 12:11 p.m. UTC | #4
Elena Reshetova <elena.reshetova@intel.com> writes:

> refcount_t type and corresponding API should be
> used instead of atomic_t when the variable is used as
> a reference counter. This allows to avoid accidental
> refcounter overflows that might lead to use-after-free
> situations.
>
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: David Windsor <dwindsor@gmail.com>
> ---
>  drivers/md/md.c | 6 +++---
>  drivers/md/md.h | 3 ++-
>  2 files changed, 5 insertions(+), 4 deletions(-)

When booting linux-next (specifically 5be4921c9958ec) I'm seeing the
backtrace below. I suspect this patch is just exposing an existing
issue?

cheers


[    0.230738] md: Waiting for all devices to be available before autodetect
[    0.230742] md: If you don't use raid, use raid=noautodetect
[    0.230962] refcount_t: increment on 0; use-after-free.
[    0.230988] ------------[ cut here ]------------
[    0.230996] WARNING: CPU: 0 PID: 1 at lib/refcount.c:114 .refcount_inc+0x5c/0x70
[    0.231001] Modules linked in:
[    0.231006] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-gccN-next-20170310-g5be4921 #1
[    0.231012] task: c000000049400000 task.stack: c000000049440000
[    0.231016] NIP: c0000000005ac6bc LR: c0000000005ac6b8 CTR: c000000000743390
[    0.231021] REGS: c000000049443160 TRAP: 0700   Not tainted  (4.11.0-rc1-gccN-next-20170310-g5be4921)
[    0.231026] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI>
[    0.231033]   CR: 24024422  XER: 0000000c
[    0.231038] CFAR: c000000000a5356c SOFTE: 1 
[    0.231038] GPR00: c0000000005ac6b8 c0000000494433e0 c000000001079d00 000000000000002b 
[    0.231038] GPR04: 0000000000000000 00000000000000ef 0000000000000000 c0000000010418a0 
[    0.231038] GPR08: 000000004af80000 c000000000ecc9a8 c000000000ecc9a8 0000000000000000 
[    0.231038] GPR12: 0000000028024824 c000000006bb0000 0000000000000000 c000000049443a00 
[    0.231038] GPR16: 0000000000000000 c000000049443a10 0000000000000000 0000000000000000 
[    0.231038] GPR20: 0000000000000000 0000000000000000 c000000000f7dd20 0000000000000000 
[    0.231038] GPR24: 00000000014080c0 c0000000012060b8 c000000001206080 0000000000000009 
[    0.231038] GPR28: c000000000f7dde0 0000000000900000 0000000000000000 c0000000461ae800 
[    0.231100] NIP [c0000000005ac6bc] .refcount_inc+0x5c/0x70
[    0.231104] LR [c0000000005ac6b8] .refcount_inc+0x58/0x70
[    0.231108] Call Trace:
[    0.231112] [c0000000494433e0] [c0000000005ac6b8] .refcount_inc+0x58/0x70 (unreliable)
[    0.231120] [c000000049443450] [c00000000086c008] .mddev_find+0x1e8/0x430
[    0.231125] [c000000049443530] [c000000000872b6c] .md_open+0x2c/0x140
[    0.231132] [c0000000494435c0] [c0000000003962a4] .__blkdev_get+0xd4/0x520
[    0.231138] [c000000049443690] [c000000000396cc0] .blkdev_get+0x1c0/0x4f0
[    0.231145] [c000000049443790] [c000000000336d64] .do_dentry_open.isra.1+0x2a4/0x410
[    0.231152] [c000000049443830] [c0000000003523f4] .path_openat+0x624/0x1580
[    0.231157] [c000000049443990] [c000000000354ce4] .do_filp_open+0x84/0x120
[    0.231163] [c000000049443b10] [c000000000338d74] .do_sys_open+0x214/0x300
[    0.231170] [c000000049443be0] [c000000000da69ac] .md_run_setup+0xa0/0xec
[    0.231176] [c000000049443c60] [c000000000da4fbc] .prepare_namespace+0x60/0x240
[    0.231182] [c000000049443ce0] [c000000000da47a8] .kernel_init_freeable+0x330/0x36c
[    0.231190] [c000000049443db0] [c00000000000dc44] .kernel_init+0x24/0x160
[    0.231197] [c000000049443e30] [c00000000000badc] .ret_from_kernel_thread+0x58/0x7c
[    0.231202] Instruction dump:
[    0.231206] 60000000 3d22ffee 89296bfb 2f890000 409effdc 3c62ffc6 39200001 3d42ffee 
[    0.231216] 38630928 992a6bfb 484a6e79 60000000 <0fe00000> 4bffffb8 60000000 60000000 
[    0.231226] ---[ end trace 8c51f269ad91ffc2 ]---
[    0.231233] md: Autodetecting RAID arrays.
[    0.231236] md: autorun ...
[    0.231239] md: ... autorun DONE.
[    0.234188] EXT4-fs (sda4): mounting ext3 file system using the ext4 subsystem
[    0.250506] refcount_t: underflow; use-after-free.
[    0.250531] ------------[ cut here ]------------
[    0.250537] WARNING: CPU: 0 PID: 3 at lib/refcount.c:207 .refcount_dec_not_one+0x104/0x120
[    0.250542] Modules linked in:
[    0.250546] CPU: 0 PID: 3 Comm: kworker/0:0 Tainted: G        W       4.11.0-rc1-gccN-next-20170310-g5be4921 #1
[    0.250553] Workqueue: events .delayed_fput
[    0.250557] task: c000000049404900 task.stack: c000000049448000
[    0.250562] NIP: c0000000005ac964 LR: c0000000005ac960 CTR: c000000000743390
[    0.250567] REGS: c00000004944b530 TRAP: 0700   Tainted: G        W        (4.11.0-rc1-gccN-next-20170310-g5be4921)
[    0.250572] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI>
[    0.250578]   CR: 24002422  XER: 00000007
[    0.250584] CFAR: c000000000a5356c SOFTE: 1 
[    0.250584] GPR00: c0000000005ac960 c00000004944b7b0 c000000001079d00 0000000000000026 
[    0.250584] GPR04: 0000000000000000 0000000000000113 0000000000000000 c0000000010418a0 
[    0.250584] GPR08: 000000004af80000 c000000000ecc9a8 c000000000ecc9a8 0000000000000000 
[    0.250584] GPR12: 0000000022002824 c000000006bb0000 c0000000001116d0 c000000049050200 
[    0.250584] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 
[    0.250584] GPR20: 0000000000000001 0000000000000000 c000000048030a98 0000000000000001 
[    0.250584] GPR24: 000000000002001d 0000000000000000 0000000000000000 c0000000461af000 
[    0.250584] GPR28: 0000000000000000 c000000048030bd8 c0000000461aea08 c0000000012060b8 
[    0.250645] NIP [c0000000005ac964] .refcount_dec_not_one+0x104/0x120
[    0.250650] LR [c0000000005ac960] .refcount_dec_not_one+0x100/0x120
[    0.250654] Call Trace:
[    0.250658] [c00000004944b7b0] [c0000000005ac960] .refcount_dec_not_one+0x100/0x120 (unreliable)
[    0.250665] [c00000004944b820] [c0000000005ac9a0] .refcount_dec_and_lock+0x20/0xc0
[    0.250671] [c00000004944b8a0] [c000000000870fa4] .mddev_put+0x34/0x180
[    0.250677] [c00000004944b930] [c000000000396108] .__blkdev_put+0x288/0x350
[    0.250683] [c00000004944ba30] [c0000000003968f0] .blkdev_close+0x30/0x50
[    0.250689] [c00000004944bab0] [c00000000033e7d8] .__fput+0xc8/0x2a0
[    0.250695] [c00000004944bb60] [c00000000033ea08] .delayed_fput+0x58/0x80
[    0.250701] [c00000004944bbe0] [c000000000107ea0] .process_one_work+0x2a0/0x630
[    0.250707] [c00000004944bc80] [c0000000001082c8] .worker_thread+0x98/0x6a0
[    0.250713] [c00000004944bd70] [c000000000111868] .kthread+0x198/0x1a0
[    0.250719] [c00000004944be30] [c00000000000badc] .ret_from_kernel_thread+0x58/0x7c
[    0.250724] Instruction dump:
[    0.250728] 419e000c 38210070 4e800020 7c0802a6 3c62ffc6 39200001 3d42ffee 38630958 
[    0.250738] 992a6bfe f8010080 484a6bd1 60000000 <0fe00000> e8010080 38600001 7c0803a6 
[    0.250748] ---[ end trace 8c51f269ad91ffc3 ]---
[    0.262454] EXT4-fs (sda4): mounted filesystem with ordered data mode. Opts: (null)
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reshetova, Elena March 14, 2017, 12:29 p.m. UTC | #5
> Elena Reshetova <elena.reshetova@intel.com> writes:
> 
> > refcount_t type and corresponding API should be
> > used instead of atomic_t when the variable is used as
> > a reference counter. This allows to avoid accidental
> > refcounter overflows that might lead to use-after-free
> > situations.
> >
> > Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> > Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
> > Signed-off-by: Kees Cook <keescook@chromium.org>
> > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > ---
> >  drivers/md/md.c | 6 +++---
> >  drivers/md/md.h | 3 ++-
> >  2 files changed, 5 insertions(+), 4 deletions(-)
> 
> When booting linux-next (specifically 5be4921c9958ec) I'm seeing the
> backtrace below. I suspect this patch is just exposing an existing
> issue?

Yes, we have actually been following this issue in the another thread. 
It looks like the object is re-used somehow, but I can't quite understand how just by reading the code. 
This was what I put into the previous thread:

"The log below indicates that you are using your refcounter in a bit weird way in mddev_find(). 
However, I can't find the place (just by reading the code) where you would increment refcounter from zero (vs. setting it to one).
It looks like you either iterate over existing nodes (and increment their counters, which should be >= 1 at the time of increment) or create a new node, but then mddev_init() sets the counter to 1. "

If you can help to understand what is going on with the object creation/destruction, would be appreciated!

Also Shaohua Li stopped this patch coming from his tree since the issue was caught at that time, so we are not going to merge this until we figure it out. 

Best Regards,
Elena.

> 
> cheers
> 
> 
> [    0.230738] md: Waiting for all devices to be available before autodetect
> [    0.230742] md: If you don't use raid, use raid=noautodetect
> [    0.230962] refcount_t: increment on 0; use-after-free.
> [    0.230988] ------------[ cut here ]------------
> [    0.230996] WARNING: CPU: 0 PID: 1 at lib/refcount.c:114
> .refcount_inc+0x5c/0x70
> [    0.231001] Modules linked in:
> [    0.231006] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-gccN-next-
> 20170310-g5be4921 #1
> [    0.231012] task: c000000049400000 task.stack: c000000049440000
> [    0.231016] NIP: c0000000005ac6bc LR: c0000000005ac6b8 CTR:
> c000000000743390
> [    0.231021] REGS: c000000049443160 TRAP: 0700   Not tainted  (4.11.0-rc1-
> gccN-next-20170310-g5be4921)
> [    0.231026] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI>
> [    0.231033]   CR: 24024422  XER: 0000000c
> [    0.231038] CFAR: c000000000a5356c SOFTE: 1
> [    0.231038] GPR00: c0000000005ac6b8 c0000000494433e0 c000000001079d00
> 000000000000002b
> [    0.231038] GPR04: 0000000000000000 00000000000000ef 0000000000000000
> c0000000010418a0
> [    0.231038] GPR08: 000000004af80000 c000000000ecc9a8 c000000000ecc9a8
> 0000000000000000
> [    0.231038] GPR12: 0000000028024824 c000000006bb0000 0000000000000000
> c000000049443a00
> [    0.231038] GPR16: 0000000000000000 c000000049443a10 0000000000000000
> 0000000000000000
> [    0.231038] GPR20: 0000000000000000 0000000000000000 c000000000f7dd20
> 0000000000000000
> [    0.231038] GPR24: 00000000014080c0 c0000000012060b8 c000000001206080
> 0000000000000009
> [    0.231038] GPR28: c000000000f7dde0 0000000000900000 0000000000000000
> c0000000461ae800
> [    0.231100] NIP [c0000000005ac6bc] .refcount_inc+0x5c/0x70
> [    0.231104] LR [c0000000005ac6b8] .refcount_inc+0x58/0x70
> [    0.231108] Call Trace:
> [    0.231112] [c0000000494433e0] [c0000000005ac6b8] .refcount_inc+0x58/0x70
> (unreliable)
> [    0.231120] [c000000049443450] [c00000000086c008]
> .mddev_find+0x1e8/0x430
> [    0.231125] [c000000049443530] [c000000000872b6c] .md_open+0x2c/0x140
> [    0.231132] [c0000000494435c0] [c0000000003962a4]
> .__blkdev_get+0xd4/0x520
> [    0.231138] [c000000049443690] [c000000000396cc0] .blkdev_get+0x1c0/0x4f0
> [    0.231145] [c000000049443790] [c000000000336d64]
> .do_dentry_open.isra.1+0x2a4/0x410
> [    0.231152] [c000000049443830] [c0000000003523f4]
> .path_openat+0x624/0x1580
> [    0.231157] [c000000049443990] [c000000000354ce4]
> .do_filp_open+0x84/0x120
> [    0.231163] [c000000049443b10] [c000000000338d74]
> .do_sys_open+0x214/0x300
> [    0.231170] [c000000049443be0] [c000000000da69ac]
> .md_run_setup+0xa0/0xec
> [    0.231176] [c000000049443c60] [c000000000da4fbc]
> .prepare_namespace+0x60/0x240
> [    0.231182] [c000000049443ce0] [c000000000da47a8]
> .kernel_init_freeable+0x330/0x36c
> [    0.231190] [c000000049443db0] [c00000000000dc44] .kernel_init+0x24/0x160
> [    0.231197] [c000000049443e30] [c00000000000badc]
> .ret_from_kernel_thread+0x58/0x7c
> [    0.231202] Instruction dump:
> [    0.231206] 60000000 3d22ffee 89296bfb 2f890000 409effdc 3c62ffc6 39200001
> 3d42ffee
> [    0.231216] 38630928 992a6bfb 484a6e79 60000000 <0fe00000> 4bffffb8
> 60000000 60000000
> [    0.231226] ---[ end trace 8c51f269ad91ffc2 ]---
> [    0.231233] md: Autodetecting RAID arrays.
> [    0.231236] md: autorun ...
> [    0.231239] md: ... autorun DONE.
> [    0.234188] EXT4-fs (sda4): mounting ext3 file system using the ext4 subsystem
> [    0.250506] refcount_t: underflow; use-after-free.
> [    0.250531] ------------[ cut here ]------------
> [    0.250537] WARNING: CPU: 0 PID: 3 at lib/refcount.c:207
> .refcount_dec_not_one+0x104/0x120
> [    0.250542] Modules linked in:
> [    0.250546] CPU: 0 PID: 3 Comm: kworker/0:0 Tainted: G        W       4.11.0-rc1-
> gccN-next-20170310-g5be4921 #1
> [    0.250553] Workqueue: events .delayed_fput
> [    0.250557] task: c000000049404900 task.stack: c000000049448000
> [    0.250562] NIP: c0000000005ac964 LR: c0000000005ac960 CTR:
> c000000000743390
> [    0.250567] REGS: c00000004944b530 TRAP: 0700   Tainted: G        W        (4.11.0-
> rc1-gccN-next-20170310-g5be4921)
> [    0.250572] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI>
> [    0.250578]   CR: 24002422  XER: 00000007
> [    0.250584] CFAR: c000000000a5356c SOFTE: 1
> [    0.250584] GPR00: c0000000005ac960 c00000004944b7b0 c000000001079d00
> 0000000000000026
> [    0.250584] GPR04: 0000000000000000 0000000000000113 0000000000000000
> c0000000010418a0
> [    0.250584] GPR08: 000000004af80000 c000000000ecc9a8 c000000000ecc9a8
> 0000000000000000
> [    0.250584] GPR12: 0000000022002824 c000000006bb0000 c0000000001116d0
> c000000049050200
> [    0.250584] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [    0.250584] GPR20: 0000000000000001 0000000000000000 c000000048030a98
> 0000000000000001
> [    0.250584] GPR24: 000000000002001d 0000000000000000 0000000000000000
> c0000000461af000
> [    0.250584] GPR28: 0000000000000000 c000000048030bd8 c0000000461aea08
> c0000000012060b8
> [    0.250645] NIP [c0000000005ac964] .refcount_dec_not_one+0x104/0x120
> [    0.250650] LR [c0000000005ac960] .refcount_dec_not_one+0x100/0x120
> [    0.250654] Call Trace:
> [    0.250658] [c00000004944b7b0] [c0000000005ac960]
> .refcount_dec_not_one+0x100/0x120 (unreliable)
> [    0.250665] [c00000004944b820] [c0000000005ac9a0]
> .refcount_dec_and_lock+0x20/0xc0
> [    0.250671] [c00000004944b8a0] [c000000000870fa4] .mddev_put+0x34/0x180
> [    0.250677] [c00000004944b930] [c000000000396108]
> .__blkdev_put+0x288/0x350
> [    0.250683] [c00000004944ba30] [c0000000003968f0] .blkdev_close+0x30/0x50
> [    0.250689] [c00000004944bab0] [c00000000033e7d8] .__fput+0xc8/0x2a0
> [    0.250695] [c00000004944bb60] [c00000000033ea08]
> .delayed_fput+0x58/0x80
> [    0.250701] [c00000004944bbe0] [c000000000107ea0]
> .process_one_work+0x2a0/0x630
> [    0.250707] [c00000004944bc80] [c0000000001082c8]
> .worker_thread+0x98/0x6a0
> [    0.250713] [c00000004944bd70] [c000000000111868] .kthread+0x198/0x1a0
> [    0.250719] [c00000004944be30] [c00000000000badc]
> .ret_from_kernel_thread+0x58/0x7c
> [    0.250724] Instruction dump:
> [    0.250728] 419e000c 38210070 4e800020 7c0802a6 3c62ffc6 39200001
> 3d42ffee 38630958
> [    0.250738] 992a6bfe f8010080 484a6bd1 60000000 <0fe00000> e8010080
> 38600001 7c0803a6
> [    0.250748] ---[ end trace 8c51f269ad91ffc3 ]---
> [    0.262454] EXT4-fs (sda4): mounted filesystem with ordered data mode. Opts:
> (null)
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley March 14, 2017, 2:58 p.m. UTC | #6
On Tue, 2017-03-14 at 12:29 +0000, Reshetova, Elena wrote:
> > Elena Reshetova <elena.reshetova@intel.com> writes:
> > 
> > > refcount_t type and corresponding API should be
> > > used instead of atomic_t when the variable is used as
> > > a reference counter. This allows to avoid accidental
> > > refcounter overflows that might lead to use-after-free
> > > situations.
> > > 
> > > Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> > > Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
> > > Signed-off-by: Kees Cook <keescook@chromium.org>
> > > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > > ---
> > >  drivers/md/md.c | 6 +++---
> > >  drivers/md/md.h | 3 ++-
> > >  2 files changed, 5 insertions(+), 4 deletions(-)
> > 
> > When booting linux-next (specifically 5be4921c9958ec) I'm seeing
> > the
> > backtrace below. I suspect this patch is just exposing an existing
> > issue?
> 
> Yes, we have actually been following this issue in the another
> thread. 
> It looks like the object is re-used somehow, but I can't quite
> understand how just by reading the code. 
> This was what I put into the previous thread:
> 
> "The log below indicates that you are using your refcounter in a bit
> weird way in mddev_find(). 
> However, I can't find the place (just by reading the code) where you
> would increment refcounter from zero (vs. setting it to one).
> It looks like you either iterate over existing nodes (and increment
> their counters, which should be >= 1 at the time of increment) or
> create a new node, but then mddev_init() sets the counter to 1. "
> 
> If you can help to understand what is going on with the object
> creation/destruction, would be appreciated!
> 
> Also Shaohua Li stopped this patch coming from his tree since the 
> issue was caught at that time, so we are not going to merge this 
> until we figure it out. 

Asking on the correct list (dm-devel) would have got you the easy
answer:  The refcount behind mddev->active is a genuine atomic.  It has
refcount properties but only if the array fails to initialise (in that
case, final put kills it).  Once it's added to the system as a gendisk,
it cannot be freed until md_free().  Thus its ->active count can go to
zero (when it becomes inactive; usually because of an unmount). On a
simple allocation regardless of outcome, the last executed statement in
md_alloc is mddev_put(): that destroys the device if we didn't manage
to create it or returns 0 and adds an inactive device to the system
which the user can get with mddev_find().

James


--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reshetova, Elena March 16, 2017, 6 p.m. UTC | #7
PiBPbiBUdWUsIDIwMTctMDMtMTQgYXQgMTI6MjkgKzAwMDAsIFJlc2hldG92YSwgRWxlbmEgd3Jv
dGU6DQo+ID4gPiBFbGVuYSBSZXNoZXRvdmEgPGVsZW5hLnJlc2hldG92YUBpbnRlbC5jb20+IHdy
aXRlczoNCj4gPiA+DQo+ID4gPiA+IHJlZmNvdW50X3QgdHlwZSBhbmQgY29ycmVzcG9uZGluZyBB
UEkgc2hvdWxkIGJlDQo+ID4gPiA+IHVzZWQgaW5zdGVhZCBvZiBhdG9taWNfdCB3aGVuIHRoZSB2
YXJpYWJsZSBpcyB1c2VkIGFzDQo+ID4gPiA+IGEgcmVmZXJlbmNlIGNvdW50ZXIuIFRoaXMgYWxs
b3dzIHRvIGF2b2lkIGFjY2lkZW50YWwNCj4gPiA+ID4gcmVmY291bnRlciBvdmVyZmxvd3MgdGhh
dCBtaWdodCBsZWFkIHRvIHVzZS1hZnRlci1mcmVlDQo+ID4gPiA+IHNpdHVhdGlvbnMuDQo+ID4g
PiA+DQo+ID4gPiA+IFNpZ25lZC1vZmYtYnk6IEVsZW5hIFJlc2hldG92YSA8ZWxlbmEucmVzaGV0
b3ZhQGludGVsLmNvbT4NCj4gPiA+ID4gU2lnbmVkLW9mZi1ieTogSGFucyBMaWxqZXN0cmFuZCA8
aXNoa2FtaWVsQGdtYWlsLmNvbT4NCj4gPiA+ID4gU2lnbmVkLW9mZi1ieTogS2VlcyBDb29rIDxr
ZWVzY29va0BjaHJvbWl1bS5vcmc+DQo+ID4gPiA+IFNpZ25lZC1vZmYtYnk6IERhdmlkIFdpbmRz
b3IgPGR3aW5kc29yQGdtYWlsLmNvbT4NCj4gPiA+ID4gLS0tDQo+ID4gPiA+ICBkcml2ZXJzL21k
L21kLmMgfCA2ICsrKy0tLQ0KPiA+ID4gPiAgZHJpdmVycy9tZC9tZC5oIHwgMyArKy0NCj4gPiA+
ID4gIDIgZmlsZXMgY2hhbmdlZCwgNSBpbnNlcnRpb25zKCspLCA0IGRlbGV0aW9ucygtKQ0KPiA+
ID4NCj4gPiA+IFdoZW4gYm9vdGluZyBsaW51eC1uZXh0IChzcGVjaWZpY2FsbHkgNWJlNDkyMWM5
OTU4ZWMpIEknbSBzZWVpbmcNCj4gPiA+IHRoZQ0KPiA+ID4gYmFja3RyYWNlIGJlbG93LiBJIHN1
c3BlY3QgdGhpcyBwYXRjaCBpcyBqdXN0IGV4cG9zaW5nIGFuIGV4aXN0aW5nDQo+ID4gPiBpc3N1
ZT8NCj4gPg0KPiA+IFllcywgd2UgaGF2ZSBhY3R1YWxseSBiZWVuIGZvbGxvd2luZyB0aGlzIGlz
c3VlIGluIHRoZSBhbm90aGVyDQo+ID4gdGhyZWFkLg0KPiA+IEl0IGxvb2tzIGxpa2UgdGhlIG9i
amVjdCBpcyByZS11c2VkIHNvbWVob3csIGJ1dCBJIGNhbid0IHF1aXRlDQo+ID4gdW5kZXJzdGFu
ZCBob3cganVzdCBieSByZWFkaW5nIHRoZSBjb2RlLg0KPiA+IFRoaXMgd2FzIHdoYXQgSSBwdXQg
aW50byB0aGUgcHJldmlvdXMgdGhyZWFkOg0KPiA+DQo+ID4gIlRoZSBsb2cgYmVsb3cgaW5kaWNh
dGVzIHRoYXQgeW91IGFyZSB1c2luZyB5b3VyIHJlZmNvdW50ZXIgaW4gYSBiaXQNCj4gPiB3ZWly
ZCB3YXkgaW4gbWRkZXZfZmluZCgpLg0KPiA+IEhvd2V2ZXIsIEkgY2FuJ3QgZmluZCB0aGUgcGxh
Y2UgKGp1c3QgYnkgcmVhZGluZyB0aGUgY29kZSkgd2hlcmUgeW91DQo+ID4gd291bGQgaW5jcmVt
ZW50IHJlZmNvdW50ZXIgZnJvbSB6ZXJvICh2cy4gc2V0dGluZyBpdCB0byBvbmUpLg0KPiA+IEl0
IGxvb2tzIGxpa2UgeW91IGVpdGhlciBpdGVyYXRlIG92ZXIgZXhpc3Rpbmcgbm9kZXMgKGFuZCBp
bmNyZW1lbnQNCj4gPiB0aGVpciBjb3VudGVycywgd2hpY2ggc2hvdWxkIGJlID49IDEgYXQgdGhl
IHRpbWUgb2YgaW5jcmVtZW50KSBvcg0KPiA+IGNyZWF0ZSBhIG5ldyBub2RlLCBidXQgdGhlbiBt
ZGRldl9pbml0KCkgc2V0cyB0aGUgY291bnRlciB0byAxLiAiDQo+ID4NCj4gPiBJZiB5b3UgY2Fu
IGhlbHAgdG8gdW5kZXJzdGFuZCB3aGF0IGlzIGdvaW5nIG9uIHdpdGggdGhlIG9iamVjdA0KPiA+
IGNyZWF0aW9uL2Rlc3RydWN0aW9uLCB3b3VsZCBiZSBhcHByZWNpYXRlZCENCj4gPg0KPiA+IEFs
c28gU2hhb2h1YSBMaSBzdG9wcGVkIHRoaXMgcGF0Y2ggY29taW5nIGZyb20gaGlzIHRyZWUgc2lu
Y2UgdGhlDQo+ID4gaXNzdWUgd2FzIGNhdWdodCBhdCB0aGF0IHRpbWUsIHNvIHdlIGFyZSBub3Qg
Z29pbmcgdG8gbWVyZ2UgdGhpcw0KPiA+IHVudGlsIHdlIGZpZ3VyZSBpdCBvdXQuDQo+IA0KPiBB
c2tpbmcgb24gdGhlIGNvcnJlY3QgbGlzdCAoZG0tZGV2ZWwpIHdvdWxkIGhhdmUgZ290IHlvdSB0
aGUgZWFzeQ0KPiBhbnN3ZXI6ICBUaGUgcmVmY291bnQgYmVoaW5kIG1kZGV2LT5hY3RpdmUgaXMg
YSBnZW51aW5lIGF0b21pYy4gIEl0IGhhcw0KPiByZWZjb3VudCBwcm9wZXJ0aWVzIGJ1dCBvbmx5
IGlmIHRoZSBhcnJheSBmYWlscyB0byBpbml0aWFsaXNlIChpbiB0aGF0DQo+IGNhc2UsIGZpbmFs
IHB1dCBraWxscyBpdCkuICBPbmNlIGl0J3MgYWRkZWQgdG8gdGhlIHN5c3RlbSBhcyBhIGdlbmRp
c2ssDQo+IGl0IGNhbm5vdCBiZSBmcmVlZCB1bnRpbCBtZF9mcmVlKCkuICBUaHVzIGl0cyAtPmFj
dGl2ZSBjb3VudCBjYW4gZ28gdG8NCj4gemVybyAod2hlbiBpdCBiZWNvbWVzIGluYWN0aXZlOyB1
c3VhbGx5IGJlY2F1c2Ugb2YgYW4gdW5tb3VudCkuIE9uIGENCj4gc2ltcGxlIGFsbG9jYXRpb24g
cmVnYXJkbGVzcyBvZiBvdXRjb21lLCB0aGUgbGFzdCBleGVjdXRlZCBzdGF0ZW1lbnQgaW4NCj4g
bWRfYWxsb2MgaXMgbWRkZXZfcHV0KCk6IHRoYXQgZGVzdHJveXMgdGhlIGRldmljZSBpZiB3ZSBk
aWRuJ3QgbWFuYWdlDQo+IHRvIGNyZWF0ZSBpdCBvciByZXR1cm5zIDAgYW5kIGFkZHMgYW4gaW5h
Y3RpdmUgZGV2aWNlIHRvIHRoZSBzeXN0ZW0NCj4gd2hpY2ggdGhlIHVzZXIgY2FuIGdldCB3aXRo
IG1kZGV2X2ZpbmQoKS4NCg0KVGhhbmsgeW91IEphbWVzIGZvciBleHBsYWluaW5nIHRoaXMhIEkg
Z3Vlc3MgaW4gdGhpcyBjYXNlLCB0aGUgY29udmVyc2lvbiBkb2Vzbid0IG1ha2Ugc2Vuc2UuIA0K
QW5kIHNvcnJ5IGFib3V0IG5vdCBhc2tpbmcgaW4gYSBjb3JyZWN0IHBsYWNlOiB3ZSBhcmUgaGFu
ZGxpbmcgbWFueSBzaW1pbGFyIHBhdGNoZXMgbm93IGFuZCB3aGlsZSBJIHRyeSB0byByZWFjaCB0
aGUgcmlnaHQgYXVkaWVuY2UgdXNpbmcgZ2V0X21haW50YWluZXIgc2NyaXB0LCBpdCBkb2Vzbid0
IGFsd2F5cyBzdWNjZWVkcy4gDQoNCkJlc3QgUmVnYXJkcywNCkVsZW5hLg0KDQo+IA0KPiBKYW1l
cw0KPiANCg0K
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 985374f..94c8ebf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -449,7 +449,7 @@  EXPORT_SYMBOL(md_unplug);
 
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
-	atomic_inc(&mddev->active);
+	refcount_inc(&mddev->active);
 	return mddev;
 }
 
@@ -459,7 +459,7 @@  static void mddev_put(struct mddev *mddev)
 {
 	struct bio_set *bs = NULL;
 
-	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
+	if (!refcount_dec_and_lock(&mddev->active, &all_mddevs_lock))
 		return;
 	if (!mddev->raid_disks && list_empty(&mddev->disks) &&
 	    mddev->ctime == 0 && !mddev->hold_active) {
@@ -495,7 +495,7 @@  void mddev_init(struct mddev *mddev)
 	INIT_LIST_HEAD(&mddev->all_mddevs);
 	setup_timer(&mddev->safemode_timer, md_safemode_timeout,
 		    (unsigned long) mddev);
-	atomic_set(&mddev->active, 1);
+	refcount_set(&mddev->active, 1);
 	atomic_set(&mddev->openers, 0);
 	atomic_set(&mddev->active_io, 0);
 	spin_lock_init(&mddev->lock);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b8859cb..4811663 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -22,6 +22,7 @@ 
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/refcount.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
@@ -360,7 +361,7 @@  struct mddev {
 	 */
 	struct mutex			open_mutex;
 	struct mutex			reconfig_mutex;
-	atomic_t			active;		/* general refcount */
+	refcount_t			active;		/* general refcount */
 	atomic_t			openers;	/* number of active opens */
 
 	int				changed;	/* True if we might need to