diff mbox series

[V2] block: rename 'q->debugfs_dir' and 'q->blk_trace->dir' in blk_unregister_queue()

Message ID 20200213081252.32395-1-yukuai3@huawei.com (mailing list archive)
State New, archived
Headers show
Series [V2] block: rename 'q->debugfs_dir' and 'q->blk_trace->dir' in blk_unregister_queue() | expand

Commit Message

Yu Kuai Feb. 13, 2020, 8:12 a.m. UTC
syzbot is reporting use after free bug in debugfs_remove[1].

This is because in request_queue, 'q->debugfs_dir' and
'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
blk_mq_debugfs_unregister() will remove everything inside the dir.

With futher investigation of the reporduce repro, the problem can be
reporduced by following procedure:

1. LOOP_CTL_ADD, create a request_queue q1, blk_mq_debugfs_register() will
create the dir.
2. LOOP_CTL_REMOVE, blk_release_queue() will add q1 to release queue.
3. LOOP_CTL_ADD, create another request_queue q2,blk_mq_debugfs_register()
will fail because the dir aready exist.
4. BLKTRACESETUP, create two files(msg and dropped) inside the dir.
5. call __blk_release_queue() for q1, debugfs_remove_recursive() will
delete the files created in step 4.
6. LOOP_CTL_REMOVE, blk_release_queue() will add q2 to release queue.
And when __blk_release_queue() is called for q2, blk_trace_shutdown() will
try to release the two files created in step 4, wich are aready released
in step 5.

thread1		         |kworker                   |thread2
 ----------------------- | ------------------------ | --------------------
loop_control_ioctl       |                          |
 loop_add                |                          |
  blk_mq_debugfs_register|                          |
   debugfs_create_dir    |                          |
loop_control_ioctl       |                          |
 loop_remove		 |                          |
  blk_release_queue      |                          |
   schedule_work         |                          |
                         |                          |loop_control_ioctl
                         |                          | loop_add
                         |                          |  ...
                         |                          |blk_trace_ioctl
                         |                          | __blk_trace_setup
                         |                          |   debugfs_create_file
                         |__blk_release_queue       |
                         | blk_mq_debugfs_unregister|
                         |  debugfs_remove_recursive|
                         |                          |loop_control_ioctl
                         |                          | loop_remove
                         |                          |  ...
                         |__blk_release_queue       |
                         | blk_trace_shutdown       |
                         |  debugfs_remove          |

commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
final release of request_queue to a workqueue, so, when loop_add() is
called again(step 3), __blk_release_queue() might not been called yet,
which causes the problem.

Fix the problem by renaming 'q->debugfs_dir' or 'q->blk_trace->dir'
in blk_unregister_queue() if they exist.

[1] https://syzkaller.appspot.com/bug?extid=903b72a010ad6b7a40f2
References: CVE-2019-19770
Fixes: commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression")
Reported-by: syzbot <syz...@syzkaller.appspotmail.com>
Signed-off-by: yu kuai <yukuai3@huawei.com>
---
Changes in V2:
 add device name to the new dir name
 fix compile err when 'CONFIG_BLK_DEBUG_FS' is not enabled
 add treatment of 'q->blk_trace->dir'

 block/blk-sysfs.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

Comments

Yu Kuai Feb. 28, 2020, 10:14 a.m. UTC | #1
ping...

On 2020/2/13 16:12, yu kuai wrote:
> syzbot is reporting use after free bug in debugfs_remove[1].
> 
> This is because in request_queue, 'q->debugfs_dir' and
> 'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
> blk_mq_debugfs_unregister() will remove everything inside the dir.
> 
> With futher investigation of the reporduce repro, the problem can be
> reporduced by following procedure:
> 
> 1. LOOP_CTL_ADD, create a request_queue q1, blk_mq_debugfs_register() will
> create the dir.
> 2. LOOP_CTL_REMOVE, blk_release_queue() will add q1 to release queue.
> 3. LOOP_CTL_ADD, create another request_queue q2,blk_mq_debugfs_register()
> will fail because the dir aready exist.
> 4. BLKTRACESETUP, create two files(msg and dropped) inside the dir.
> 5. call __blk_release_queue() for q1, debugfs_remove_recursive() will
> delete the files created in step 4.
> 6. LOOP_CTL_REMOVE, blk_release_queue() will add q2 to release queue.
> And when __blk_release_queue() is called for q2, blk_trace_shutdown() will
> try to release the two files created in step 4, wich are aready released
> in step 5.
> 
> thread1		         |kworker                   |thread2
>   ----------------------- | ------------------------ | --------------------
> loop_control_ioctl       |                          |
>   loop_add                |                          |
>    blk_mq_debugfs_register|                          |
>     debugfs_create_dir    |                          |
> loop_control_ioctl       |                          |
>   loop_remove		 |                          |
>    blk_release_queue      |                          |
>     schedule_work         |                          |
>                           |                          |loop_control_ioctl
>                           |                          | loop_add
>                           |                          |  ...
>                           |                          |blk_trace_ioctl
>                           |                          | __blk_trace_setup
>                           |                          |   debugfs_create_file
>                           |__blk_release_queue       |
>                           | blk_mq_debugfs_unregister|
>                           |  debugfs_remove_recursive|
>                           |                          |loop_control_ioctl
>                           |                          | loop_remove
>                           |                          |  ...
>                           |__blk_release_queue       |
>                           | blk_trace_shutdown       |
>                           |  debugfs_remove          |
> 
> commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
> final release of request_queue to a workqueue, so, when loop_add() is
> called again(step 3), __blk_release_queue() might not been called yet,
> which causes the problem.
> 
> Fix the problem by renaming 'q->debugfs_dir' or 'q->blk_trace->dir'
> in blk_unregister_queue() if they exist.
> 
> [1] https://syzkaller.appspot.com/bug?extid=903b72a010ad6b7a40f2
> References: CVE-2019-19770
> Fixes: commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression")
> Reported-by: syzbot <syz...@syzkaller.appspotmail.com>
> Signed-off-by: yu kuai <yukuai3@huawei.com>
> ---
> Changes in V2:
>   add device name to the new dir name
>   fix compile err when 'CONFIG_BLK_DEBUG_FS' is not enabled
>   add treatment of 'q->blk_trace->dir'
> 
>   block/blk-sysfs.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 42 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index fca9b158f4a0..1da8d4a4915a 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -11,6 +11,7 @@
>   #include <linux/blktrace_api.h>
>   #include <linux/blk-mq.h>
>   #include <linux/blk-cgroup.h>
> +#include <linux/debugfs.h>
>   
>   #include "blk.h"
>   #include "blk-mq.h"
> @@ -1011,6 +1012,46 @@ int blk_register_queue(struct gendisk *disk)
>   }
>   EXPORT_SYMBOL_GPL(blk_register_queue);
>   
> +#ifdef CONFIG_DEBUG_FS
> +/*
> + * blk_prepare_release_queue - rename q->debugfs_dir and q->blk_trace->dir
> + * @q: request_queue of which the dir to be renamed belong to.
> + *
> + * Because the final release of request_queue is in a workqueue, the
> + * cleanup might not been finished yet while the same device start to
> + * create. It's not correct if q->debugfs_dir still exist while trying
> + * to create a new one.
> + */
> +static void blk_prepare_release_queue(struct request_queue *q)
> +{
> +	struct dentry *new = NULL;
> +	struct dentry **old = NULL;
> +	char name[DNAME_INLINE_LEN];
> +	int i = 0;
> +
> +#ifdef CONFIG_BLK_DEBUG_FS
> +	if (!IS_ERR_OR_NULL(q->debugfs_dir))
> +		old = &q->debugfs_dir;
> +#endif
> +#ifdef CONFIG_BLK_DEV_IO_TRACE
> +	/* q->debugfs_dir and q->blk_trace->dir can't both exist */
> +	if (q->blk_trace && !IS_ERR_OR_NULL(q->blk_trace->dir))
> +		old = &q->blk_trace->dir;
> +#endif
> +	if (old == NULL)
> +		return;
> +	while (new == NULL) {
> +		sprintf(name, "ready_to_remove_%s_%d",
> +				kobject_name(q->kobj.parent), i++);
> +		new = debugfs_rename(blk_debugfs_root, *old,
> +				     blk_debugfs_root, name);
> +	}
> +	*old = new;
> +}
> +#else
> +#define blk_prepare_release_queue(q)		do { } while (0)
> +#endif
> +
>   /**
>    * blk_unregister_queue - counterpart of blk_register_queue()
>    * @disk: Disk of which the request queue should be unregistered from sysfs.
> @@ -1039,6 +1080,7 @@ void blk_unregister_queue(struct gendisk *disk)
>   	mutex_unlock(&q->sysfs_lock);
>   
>   	mutex_lock(&q->sysfs_dir_lock);
> +	blk_prepare_release_queue(q);
>   	/*
>   	 * Remove the sysfs attributes before unregistering the queue data
>   	 * structures that can be modified through sysfs.
>
Ming Lei Feb. 28, 2020, 11:15 p.m. UTC | #2
On Thu, Feb 13, 2020 at 04:12:52PM +0800, yu kuai wrote:
> syzbot is reporting use after free bug in debugfs_remove[1].
> 
> This is because in request_queue, 'q->debugfs_dir' and
> 'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
> blk_mq_debugfs_unregister() will remove everything inside the dir.
> 
> With futher investigation of the reporduce repro, the problem can be
> reporduced by following procedure:
> 
> 1. LOOP_CTL_ADD, create a request_queue q1, blk_mq_debugfs_register() will
> create the dir.
> 2. LOOP_CTL_REMOVE, blk_release_queue() will add q1 to release queue.
> 3. LOOP_CTL_ADD, create another request_queue q2,blk_mq_debugfs_register()
> will fail because the dir aready exist.
> 4. BLKTRACESETUP, create two files(msg and dropped) inside the dir.
> 5. call __blk_release_queue() for q1, debugfs_remove_recursive() will
> delete the files created in step 4.
> 6. LOOP_CTL_REMOVE, blk_release_queue() will add q2 to release queue.
> And when __blk_release_queue() is called for q2, blk_trace_shutdown() will
> try to release the two files created in step 4, wich are aready released
> in step 5.
> 
> thread1		         |kworker                   |thread2
>  ----------------------- | ------------------------ | --------------------
> loop_control_ioctl       |                          |
>  loop_add                |                          |
>   blk_mq_debugfs_register|                          |
>    debugfs_create_dir    |                          |
> loop_control_ioctl       |                          |
>  loop_remove		 |                          |
>   blk_release_queue      |                          |
>    schedule_work         |                          |
>                          |                          |loop_control_ioctl
>                          |                          | loop_add
>                          |                          |  ...
>                          |                          |blk_trace_ioctl
>                          |                          | __blk_trace_setup
>                          |                          |   debugfs_create_file
>                          |__blk_release_queue       |
>                          | blk_mq_debugfs_unregister|
>                          |  debugfs_remove_recursive|
>                          |                          |loop_control_ioctl
>                          |                          | loop_remove
>                          |                          |  ...
>                          |__blk_release_queue       |
>                          | blk_trace_shutdown       |
>                          |  debugfs_remove          |
> 
> commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
> final release of request_queue to a workqueue, so, when loop_add() is
> called again(step 3), __blk_release_queue() might not been called yet,
> which causes the problem.
> 
> Fix the problem by renaming 'q->debugfs_dir' or 'q->blk_trace->dir'
> in blk_unregister_queue() if they exist.
> 
> [1] https://syzkaller.appspot.com/bug?extid=903b72a010ad6b7a40f2
> References: CVE-2019-19770
> Fixes: commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression")
> Reported-by: syzbot <syz...@syzkaller.appspotmail.com>
> Signed-off-by: yu kuai <yukuai3@huawei.com>
> ---
> Changes in V2:
>  add device name to the new dir name
>  fix compile err when 'CONFIG_BLK_DEBUG_FS' is not enabled
>  add treatment of 'q->blk_trace->dir'
> 
>  block/blk-sysfs.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index fca9b158f4a0..1da8d4a4915a 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -11,6 +11,7 @@
>  #include <linux/blktrace_api.h>
>  #include <linux/blk-mq.h>
>  #include <linux/blk-cgroup.h>
> +#include <linux/debugfs.h>
>  
>  #include "blk.h"
>  #include "blk-mq.h"
> @@ -1011,6 +1012,46 @@ int blk_register_queue(struct gendisk *disk)
>  }
>  EXPORT_SYMBOL_GPL(blk_register_queue);
>  
> +#ifdef CONFIG_DEBUG_FS
> +/*
> + * blk_prepare_release_queue - rename q->debugfs_dir and q->blk_trace->dir
> + * @q: request_queue of which the dir to be renamed belong to.
> + *
> + * Because the final release of request_queue is in a workqueue, the
> + * cleanup might not been finished yet while the same device start to
> + * create. It's not correct if q->debugfs_dir still exist while trying
> + * to create a new one.
> + */
> +static void blk_prepare_release_queue(struct request_queue *q)
> +{
> +	struct dentry *new = NULL;
> +	struct dentry **old = NULL;
> +	char name[DNAME_INLINE_LEN];
> +	int i = 0;
> +
> +#ifdef CONFIG_BLK_DEBUG_FS
> +	if (!IS_ERR_OR_NULL(q->debugfs_dir))
> +		old = &q->debugfs_dir;
> +#endif
> +#ifdef CONFIG_BLK_DEV_IO_TRACE
> +	/* q->debugfs_dir and q->blk_trace->dir can't both exist */
> +	if (q->blk_trace && !IS_ERR_OR_NULL(q->blk_trace->dir))
> +		old = &q->blk_trace->dir;
> +#endif

If blk_trace->dir isn't same with .debugfs_dir, you will just rename
blk_trace->dir, this way can't avoid the failure in step3, can it?

I understand that we just need to rename .debugfs_dir, meantime making
sure blktrace code removes correct debugfs dir, is that enough for fixing
this issue?

> +	if (old == NULL)
> +		return;
> +	while (new == NULL) {
> +		sprintf(name, "ready_to_remove_%s_%d",
> +				kobject_name(q->kobj.parent), i++);
> +		new = debugfs_rename(blk_debugfs_root, *old,
> +				     blk_debugfs_root, name);
> +	}

The above code can be run concurrently with blktrace shutdown, so race might
exit between the two code paths, then bt->dir may has been renamed or being
renamed in debugfs_remove(bt->dir), can this function work as expected or
correct?

And there is dead loop risk, so suggest to not rename this way.


Thanks, 
Ming
Yu Kuai Feb. 29, 2020, 2:50 a.m. UTC | #3
On 2020/2/29 7:15, Ming Lei wrote:
> If blk_trace->dir isn't same with .debugfs_dir, you will just rename
> blk_trace->dir, this way can't avoid the failure in step3, can it?
Thank you for your response!
It's true that the problem still exist if they are not the same(I
thougt they can't both exist). Perhaps I can do the renaming for both.
> 
> I understand that we just need to rename .debugfs_dir, meantime making
> sure blktrace code removes correct debugfs dir, is that enough for fixing
> this issue?
> 
>> +	if (old == NULL)
>> +		return;
>> +	while (new == NULL) {
>> +		sprintf(name, "ready_to_remove_%s_%d",
>> +				kobject_name(q->kobj.parent), i++);
>> +		new = debugfs_rename(blk_debugfs_root, *old,
>> +				     blk_debugfs_root, name);
>> +	}
> The above code can be run concurrently with blktrace shutdown, so race might
> exit between the two code paths, then bt->dir may has been renamed or being
> renamed in debugfs_remove(bt->dir), can this function work as expected or
> correct?
To avoid the race, I think we can take the lock 'blk_trace_mutex' while
renaming 'bt->dir'.
> 
> And there is dead loop risk, so suggest to not rename this way.
The deap loop will exist if 'debugfs_rename' keep failing for some
reason. I thougt about setting a limit for max loop count, however, It's
probably not a good solution.

Thanks!
Yu Kuai
>
diff mbox series

Patch

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fca9b158f4a0..1da8d4a4915a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@ 
 #include <linux/blktrace_api.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-cgroup.h>
+#include <linux/debugfs.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -1011,6 +1012,46 @@  int blk_register_queue(struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(blk_register_queue);
 
+#ifdef CONFIG_DEBUG_FS
+/*
+ * blk_prepare_release_queue - rename q->debugfs_dir and q->blk_trace->dir
+ * @q: request_queue of which the dir to be renamed belong to.
+ *
+ * Because the final release of request_queue is in a workqueue, the
+ * cleanup might not been finished yet while the same device start to
+ * create. It's not correct if q->debugfs_dir still exist while trying
+ * to create a new one.
+ */
+static void blk_prepare_release_queue(struct request_queue *q)
+{
+	struct dentry *new = NULL;
+	struct dentry **old = NULL;
+	char name[DNAME_INLINE_LEN];
+	int i = 0;
+
+#ifdef CONFIG_BLK_DEBUG_FS
+	if (!IS_ERR_OR_NULL(q->debugfs_dir))
+		old = &q->debugfs_dir;
+#endif
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+	/* q->debugfs_dir and q->blk_trace->dir can't both exist */
+	if (q->blk_trace && !IS_ERR_OR_NULL(q->blk_trace->dir))
+		old = &q->blk_trace->dir;
+#endif
+	if (old == NULL)
+		return;
+	while (new == NULL) {
+		sprintf(name, "ready_to_remove_%s_%d",
+				kobject_name(q->kobj.parent), i++);
+		new = debugfs_rename(blk_debugfs_root, *old,
+				     blk_debugfs_root, name);
+	}
+	*old = new;
+}
+#else
+#define blk_prepare_release_queue(q)		do { } while (0)
+#endif
+
 /**
  * blk_unregister_queue - counterpart of blk_register_queue()
  * @disk: Disk of which the request queue should be unregistered from sysfs.
@@ -1039,6 +1080,7 @@  void blk_unregister_queue(struct gendisk *disk)
 	mutex_unlock(&q->sysfs_lock);
 
 	mutex_lock(&q->sysfs_dir_lock);
+	blk_prepare_release_queue(q);
 	/*
 	 * Remove the sysfs attributes before unregistering the queue data
 	 * structures that can be modified through sysfs.