diff mbox series

[RESEND,v2] platform/chrome: cros_ec_debugfs: detach log reader wq from devm

Message ID 20220209051130.386175-1-tzungbi@google.com (mailing list archive)
State Accepted
Commit a9e896f70fbe2aa1df588e31f0a7d8b604e9deb7
Headers show
Series [RESEND,v2] platform/chrome: cros_ec_debugfs: detach log reader wq from devm | expand

Commit Message

Tzung-Bi Shih Feb. 9, 2022, 5:11 a.m. UTC
Debugfs console_log uses devm memory (e.g. debug_info in
cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
are independent.  An use-after-free issue is observed if userland
program operates the debugfs after the memory has been freed.

The call trace:
 do_raw_spin_lock
 _raw_spin_lock_irqsave
 remove_wait_queue
 ep_unregister_pollwait
 ep_remove
 do_epoll_ctl

A Python example to reproduce the issue:
... import select
... p = select.epoll()
... f = open('/sys/kernel/debug/cros_scp/console_log')
... p.register(f, select.POLLIN)
... p.poll(1)
[(4, 1)]                    # 4=fd, 1=select.POLLIN

[ shutdown cros_scp at the point ]

... p.poll(1)
[(4, 16)]                   # 4=fd, 16=select.POLLHUP
... p.unregister(f)

An use-after-free issue raises here.  It called epoll_ctl with
EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e.
log_wq).

Detaches log reader's workqueue from devm to make sure it is persistent
even if the device has been removed.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
---
As for 2 other related cases I could image:

Case 1. userland program opens the debugfs after the device has been removed

ENOENT.  cros_ec_debugfs_remove() calls debugfs_remove_recursive().

Case 2. userland program is reading when the device is removing

If the userland program is waiting in cros_ec_console_log_read(), the device
removal will wait for it.

See the calling stack for the case:
 wait_for_completion
 __debugfs_file_removed
 remove_one
 simple_recursive_removal
 debugfs_remove
 cros_ec_debugfs_remove
 platform_drv_remove
 device_release_driver_internal
 device_release_driver
 bus_remove_device
 device_del
 platform_device_del
 platform_device_unregister

Changes from v1:
- rebase to next-20211029.
- change the commit messages.

 drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Guenter Roeck Feb. 9, 2022, 4:38 p.m. UTC | #1
On Tue, Feb 8, 2022 at 9:11 PM Tzung-Bi Shih <tzungbi@google.com> wrote:
>
> Debugfs console_log uses devm memory (e.g. debug_info in
> cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
> are independent.  An use-after-free issue is observed if userland
> program operates the debugfs after the memory has been freed.
>
> The call trace:
>  do_raw_spin_lock
>  _raw_spin_lock_irqsave
>  remove_wait_queue
>  ep_unregister_pollwait
>  ep_remove
>  do_epoll_ctl
>
> A Python example to reproduce the issue:
> ... import select
> ... p = select.epoll()
> ... f = open('/sys/kernel/debug/cros_scp/console_log')
> ... p.register(f, select.POLLIN)
> ... p.poll(1)
> [(4, 1)]                    # 4=fd, 1=select.POLLIN
>
> [ shutdown cros_scp at the point ]
>
> ... p.poll(1)
> [(4, 16)]                   # 4=fd, 16=select.POLLHUP
> ... p.unregister(f)
>
> An use-after-free issue raises here.  It called epoll_ctl with
> EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e.
> log_wq).
>
> Detaches log reader's workqueue from devm to make sure it is persistent
> even if the device has been removed.
>
> Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>

Reviewed-by: Guenter Roeck <groeck@google.com>

> ---
> As for 2 other related cases I could image:
>
> Case 1. userland program opens the debugfs after the device has been removed
>
> ENOENT.  cros_ec_debugfs_remove() calls debugfs_remove_recursive().
>
> Case 2. userland program is reading when the device is removing
>
> If the userland program is waiting in cros_ec_console_log_read(), the device
> removal will wait for it.
>
> See the calling stack for the case:
>  wait_for_completion
>  __debugfs_file_removed
>  remove_one
>  simple_recursive_removal
>  debugfs_remove
>  cros_ec_debugfs_remove
>  platform_drv_remove
>  device_release_driver_internal
>  device_release_driver
>  bus_remove_device
>  device_del
>  platform_device_del
>  platform_device_unregister
>
> Changes from v1:
> - rebase to next-20211029.
> - change the commit messages.
>
>  drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> index 272c89837d74..0dbceee87a4b 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -25,6 +25,9 @@
>
>  #define CIRC_ADD(idx, size, value)     (((idx) + (value)) & ((size) - 1))
>
> +/* waitqueue for log readers */
> +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq);
> +
>  /**
>   * struct cros_ec_debugfs - EC debugging information.
>   *
> @@ -33,7 +36,6 @@
>   * @log_buffer: circular buffer for console log information
>   * @read_msg: preallocated EC command and buffer to read console log
>   * @log_mutex: mutex to protect circular buffer
> - * @log_wq: waitqueue for log readers
>   * @log_poll_work: recurring task to poll EC for new console log data
>   * @panicinfo_blob: panicinfo debugfs blob
>   */
> @@ -44,7 +46,6 @@ struct cros_ec_debugfs {
>         struct circ_buf log_buffer;
>         struct cros_ec_command *read_msg;
>         struct mutex log_mutex;
> -       wait_queue_head_t log_wq;
>         struct delayed_work log_poll_work;
>         /* EC panicinfo */
>         struct debugfs_blob_wrapper panicinfo_blob;
> @@ -107,7 +108,7 @@ static void cros_ec_console_log_work(struct work_struct *__work)
>                         buf_space--;
>                 }
>
> -               wake_up(&debug_info->log_wq);
> +               wake_up(&cros_ec_debugfs_log_wq);
>         }
>
>         mutex_unlock(&debug_info->log_mutex);
> @@ -141,7 +142,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
>
>                 mutex_unlock(&debug_info->log_mutex);
>
> -               ret = wait_event_interruptible(debug_info->log_wq,
> +               ret = wait_event_interruptible(cros_ec_debugfs_log_wq,
>                                         CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
>                 if (ret < 0)
>                         return ret;
> @@ -173,7 +174,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file,
>         struct cros_ec_debugfs *debug_info = file->private_data;
>         __poll_t mask = 0;
>
> -       poll_wait(file, &debug_info->log_wq, wait);
> +       poll_wait(file, &cros_ec_debugfs_log_wq, wait);
>
>         mutex_lock(&debug_info->log_mutex);
>         if (CIRC_CNT(debug_info->log_buffer.head,
> @@ -377,7 +378,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
>         debug_info->log_buffer.tail = 0;
>
>         mutex_init(&debug_info->log_mutex);
> -       init_waitqueue_head(&debug_info->log_wq);
>
>         debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir,
>                             debug_info, &cros_ec_console_log_fops);
> --
> 2.35.0.263.gb82422642f-goog
>
Prashant Malani Feb. 9, 2022, 9:30 p.m. UTC | #2
On Tue, Feb 8, 2022 at 9:11 PM Tzung-Bi Shih <tzungbi@google.com> wrote:
>
> Debugfs console_log uses devm memory (e.g. debug_info in
> cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
> are independent.  An use-after-free issue is observed if userland
> program operates the debugfs after the memory has been freed.
>
> The call trace:
>  do_raw_spin_lock
>  _raw_spin_lock_irqsave
>  remove_wait_queue
>  ep_unregister_pollwait
>  ep_remove
>  do_epoll_ctl
>
> A Python example to reproduce the issue:
> ... import select
> ... p = select.epoll()
> ... f = open('/sys/kernel/debug/cros_scp/console_log')
> ... p.register(f, select.POLLIN)
> ... p.poll(1)
> [(4, 1)]                    # 4=fd, 1=select.POLLIN
>
> [ shutdown cros_scp at the point ]
>
> ... p.poll(1)
> [(4, 16)]                   # 4=fd, 16=select.POLLHUP
> ... p.unregister(f)
>
> An use-after-free issue raises here.  It called epoll_ctl with
> EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e.
> log_wq).
>
> Detaches log reader's workqueue from devm to make sure it is persistent
> even if the device has been removed.
>
> Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
> ---
> As for 2 other related cases I could image:
>
> Case 1. userland program opens the debugfs after the device has been removed
>
> ENOENT.  cros_ec_debugfs_remove() calls debugfs_remove_recursive().
>
> Case 2. userland program is reading when the device is removing
>
> If the userland program is waiting in cros_ec_console_log_read(), the device
> removal will wait for it.
>
> See the calling stack for the case:
>  wait_for_completion
>  __debugfs_file_removed
>  remove_one
>  simple_recursive_removal
>  debugfs_remove
>  cros_ec_debugfs_remove
>  platform_drv_remove
>  device_release_driver_internal
>  device_release_driver
>  bus_remove_device
>  device_del
>  platform_device_del
>  platform_device_unregister
>
> Changes from v1:
> - rebase to next-20211029.
> - change the commit messages.
>
>  drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> index 272c89837d74..0dbceee87a4b 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -25,6 +25,9 @@
>
>  #define CIRC_ADD(idx, size, value)     (((idx) + (value)) & ((size) - 1))
>
> +/* waitqueue for log readers */
> +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq);

Hmm, I'm always wary of introducing statics to address races like this.

Doesn't re-ordering cros_ec_debugfs_remove() avoid the race? :
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c
b/drivers/platform/chrome/cros_ec_debugfs.c
index 272c89837d74..80bd898c3271 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -485,8 +485,8 @@ static int cros_ec_debugfs_remove(struct
platform_device *pd)
 {
        struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);

-       debugfs_remove_recursive(ec->debug_info->dir);
        cros_ec_cleanup_console_log(ec->debug_info);
+       debugfs_remove_recursive(ec->debug_info->dir);

        return 0;
 }
Tzung-Bi Shih Feb. 10, 2022, 3:11 a.m. UTC | #3
On Wed, Feb 09, 2022 at 01:30:14PM -0800, Prashant Malani wrote:
> On Tue, Feb 8, 2022 at 9:11 PM Tzung-Bi Shih <tzungbi@google.com> wrote:
> > +/* waitqueue for log readers */
> > +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq);
> 
> Hmm, I'm always wary of introducing statics to address races like this.
> 
> Doesn't re-ordering cros_ec_debugfs_remove() avoid the race? :
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c
> b/drivers/platform/chrome/cros_ec_debugfs.c
> index 272c89837d74..80bd898c3271 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -485,8 +485,8 @@ static int cros_ec_debugfs_remove(struct
> platform_device *pd)
>  {
>         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> 
> -       debugfs_remove_recursive(ec->debug_info->dir);
>         cros_ec_cleanup_console_log(ec->debug_info);
> +       debugfs_remove_recursive(ec->debug_info->dir);
> 
>         return 0;
>  }

I have tested the patch and it doesn't help.  The issue is: cros_ec_debugfs
provides a wait queue to fs at [1]; fs remembers the address at [2].  The wait
queue is in devm memory.  After the memory has freed (i.e. the device has
removed), fs still gets chance to access the memory (e.g. [3] in the case).

[1]: https://elixir.bootlin.com/linux/v5.17-rc3/source/drivers/platform/chrome/cros_ec_debugfs.c#L176
[2]: https://elixir.bootlin.com/linux/v5.17-rc3/source/fs/eventpoll.c#L1256
[3]: https://elixir.bootlin.com/linux/v5.17-rc3/source/fs/eventpoll.c#L550
Benson Leung Feb. 18, 2022, 7:28 p.m. UTC | #4
Hi Tzung-Bi,

On Wed, 9 Feb 2022 13:11:30 +0800, Tzung-Bi Shih wrote:
> Debugfs console_log uses devm memory (e.g. debug_info in
> cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
> are independent.  An use-after-free issue is observed if userland
> program operates the debugfs after the memory has been freed.
> 
> The call trace:
>  do_raw_spin_lock
>  _raw_spin_lock_irqsave
>  remove_wait_queue
>  ep_unregister_pollwait
>  ep_remove
>  do_epoll_ctl
> 
> [...]

Applied, thanks!

[1/1] platform/chrome: cros_ec_debugfs: detach log reader wq from devm
      commit: a9e896f70fbe2aa1df588e31f0a7d8b604e9deb7

Best regards,
patchwork-bot+chrome-platform@kernel.org Feb. 18, 2022, 7:30 p.m. UTC | #5
Hello:

This patch was applied to chrome-platform/linux.git (for-kernelci)
by Benson Leung <bleung@chromium.org>:

On Wed,  9 Feb 2022 13:11:30 +0800 you wrote:
> Debugfs console_log uses devm memory (e.g. debug_info in
> cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
> are independent.  An use-after-free issue is observed if userland
> program operates the debugfs after the memory has been freed.
> 
> The call trace:
>  do_raw_spin_lock
>  _raw_spin_lock_irqsave
>  remove_wait_queue
>  ep_unregister_pollwait
>  ep_remove
>  do_epoll_ctl
> 
> [...]

Here is the summary with links:
  - [RESEND,v2] platform/chrome: cros_ec_debugfs: detach log reader wq from devm
    https://git.kernel.org/chrome-platform/c/a9e896f70fbe

You are awesome, thank you!
patchwork-bot+chrome-platform@kernel.org March 31, 2022, 4:50 p.m. UTC | #6
Hello:

This patch was applied to chrome-platform/linux.git (for-next)
by Benson Leung <bleung@chromium.org>:

On Wed,  9 Feb 2022 13:11:30 +0800 you wrote:
> Debugfs console_log uses devm memory (e.g. debug_info in
> cros_ec_console_log_poll()).  However, lifecycles of device and debugfs
> are independent.  An use-after-free issue is observed if userland
> program operates the debugfs after the memory has been freed.
> 
> The call trace:
>  do_raw_spin_lock
>  _raw_spin_lock_irqsave
>  remove_wait_queue
>  ep_unregister_pollwait
>  ep_remove
>  do_epoll_ctl
> 
> [...]

Here is the summary with links:
  - [RESEND,v2] platform/chrome: cros_ec_debugfs: detach log reader wq from devm
    https://git.kernel.org/chrome-platform/c/a9e896f70fbe

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 272c89837d74..0dbceee87a4b 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -25,6 +25,9 @@ 
 
 #define CIRC_ADD(idx, size, value)	(((idx) + (value)) & ((size) - 1))
 
+/* waitqueue for log readers */
+static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq);
+
 /**
  * struct cros_ec_debugfs - EC debugging information.
  *
@@ -33,7 +36,6 @@ 
  * @log_buffer: circular buffer for console log information
  * @read_msg: preallocated EC command and buffer to read console log
  * @log_mutex: mutex to protect circular buffer
- * @log_wq: waitqueue for log readers
  * @log_poll_work: recurring task to poll EC for new console log data
  * @panicinfo_blob: panicinfo debugfs blob
  */
@@ -44,7 +46,6 @@  struct cros_ec_debugfs {
 	struct circ_buf log_buffer;
 	struct cros_ec_command *read_msg;
 	struct mutex log_mutex;
-	wait_queue_head_t log_wq;
 	struct delayed_work log_poll_work;
 	/* EC panicinfo */
 	struct debugfs_blob_wrapper panicinfo_blob;
@@ -107,7 +108,7 @@  static void cros_ec_console_log_work(struct work_struct *__work)
 			buf_space--;
 		}
 
-		wake_up(&debug_info->log_wq);
+		wake_up(&cros_ec_debugfs_log_wq);
 	}
 
 	mutex_unlock(&debug_info->log_mutex);
@@ -141,7 +142,7 @@  static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
 
 		mutex_unlock(&debug_info->log_mutex);
 
-		ret = wait_event_interruptible(debug_info->log_wq,
+		ret = wait_event_interruptible(cros_ec_debugfs_log_wq,
 					CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
 		if (ret < 0)
 			return ret;
@@ -173,7 +174,7 @@  static __poll_t cros_ec_console_log_poll(struct file *file,
 	struct cros_ec_debugfs *debug_info = file->private_data;
 	__poll_t mask = 0;
 
-	poll_wait(file, &debug_info->log_wq, wait);
+	poll_wait(file, &cros_ec_debugfs_log_wq, wait);
 
 	mutex_lock(&debug_info->log_mutex);
 	if (CIRC_CNT(debug_info->log_buffer.head,
@@ -377,7 +378,6 @@  static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
 	debug_info->log_buffer.tail = 0;
 
 	mutex_init(&debug_info->log_mutex);
-	init_waitqueue_head(&debug_info->log_wq);
 
 	debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir,
 			    debug_info, &cros_ec_console_log_fops);