diff mbox series

tty: tty_io: fix race between tty_fops and hung_up_tty_fops

Message ID a11e31ab-6ffc-453f-ba6a-b7f6e512c55e@I-love.SAKURA.ne.jp (mailing list archive)
State New
Headers show
Series tty: tty_io: fix race between tty_fops and hung_up_tty_fops | expand

Commit Message

Tetsuo Handa July 19, 2024, 1:37 p.m. UTC
syzbot is reporting data race between __tty_hangup() and __fput(), and
Dmitry Vyukov mentioned that this race has possibility of NULL pointer
dereference, for tty_fops implements e.g. splice_read callback whereas
hung_up_tty_fops does not.

  CPU0                                  CPU1
  ----                                  ----
  do_splice_read() {
                                        __tty_hangup() {
    // f_op->splice_read was copy_splice_read
    if (unlikely(!in->f_op->splice_read))
      return warn_unsupported(in, "read");
                                          filp->f_op = &hung_up_tty_fops;
    // f_op->splice_read is now NULL
    return in->f_op->splice_read(in, ppos, pipe, len, flags);
                                        }
  }

Fix possibility of NULL pointer dereference by implementing missing
callbacks, and suppress KCSAN messages by adding __data_racy qualifier
to "struct file"->f_op .

Reported-by: syzbot <syzbot+b7c3ba8cdc2f6cf83c21@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=b7c3ba8cdc2f6cf83c21
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
This patch has been tested using linux-next tree via my tomoyo tree since 20240611,
and there was no response on
  [fs] Are you OK with updating "struct file"->f_op value dynamically?
at https://lkml.kernel.org/r/b221d2cf-7dc0-4624-a040-85c131ed72a1@I-love.SAKURA.ne.jp .
Thus, I guess we can go with this approach.

 drivers/tty/tty_io.c | 34 ++++++++++++++++++++++++++++++++++
 include/linux/fs.h   |  2 +-
 2 files changed, 35 insertions(+), 1 deletion(-)

Comments

Christian Brauner July 22, 2024, 2:41 p.m. UTC | #1
On Fri, Jul 19, 2024 at 10:37:47PM GMT, Tetsuo Handa wrote:
> syzbot is reporting data race between __tty_hangup() and __fput(), and
> Dmitry Vyukov mentioned that this race has possibility of NULL pointer
> dereference, for tty_fops implements e.g. splice_read callback whereas
> hung_up_tty_fops does not.
> 
>   CPU0                                  CPU1
>   ----                                  ----
>   do_splice_read() {
>                                         __tty_hangup() {
>     // f_op->splice_read was copy_splice_read
>     if (unlikely(!in->f_op->splice_read))
>       return warn_unsupported(in, "read");
>                                           filp->f_op = &hung_up_tty_fops;
>     // f_op->splice_read is now NULL
>     return in->f_op->splice_read(in, ppos, pipe, len, flags);
>                                         }
>   }
> 
> Fix possibility of NULL pointer dereference by implementing missing
> callbacks, and suppress KCSAN messages by adding __data_racy qualifier
> to "struct file"->f_op .

This f_op replacing without synchronization seems really iffy imho.
Why can't the hangup just be recorded in tty_file_private and then
checked in the corresponding f_op->$method()?

And if that's not possible for some reason I'd be willing to sacrifice
one of the FMODE_* bits I recently freed up and add e.g.,
FMODE_TTY_HANGUP instead of this f_op raciness (Why wasn't this using
replace_fops anyway so it'd be easy to grep for it?).

Something like the completely untested below:

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index bc9aebcb873f..219bf6391fed 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -583,6 +583,26 @@ static struct file *tty_release_redirect(struct tty_struct *tty)
        return f;
 }

+static noinline void noinline tty_mark_hungup(struct file *file)
+{
+       fmode_t fmode = READ_ONCE(file->f_mode);
+
+       do {
+               if (fmode & FMODE_TTY_HUNGUP)
+                       break;
+       } while (!try_cmpxchg(&file->f_mode, &fmode, fmode | FMODE_TTY_HUNGUP));
+}
+
+static noinline void noinline tty_clear_hungup(struct file *file)
+{
+       fmode_t fmode = READ_ONCE(file->f_mode);
+
+       do {
+               if (!(fmode & FMODE_TTY_HUNGUP))
+                       break;
+       } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_TTY_HUNGUP));
+}
+
 /**
  * __tty_hangup - actual handler for hangup events
  * @tty: tty device
@@ -652,7 +672,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
                        continue;
                closecount++;
                __tty_fasync(-1, filp, 0);      /* can't block */
-               filp->f_op = &hung_up_tty_fops;
+               tty_mark_hungup(filp);
        }
        spin_unlock(&tty->files_lock);

@@ -776,7 +796,7 @@ void tty_vhangup_session(struct tty_struct *tty)
  */
 int tty_hung_up_p(struct file *filp)
 {
-       return (filp && filp->f_op == &hung_up_tty_fops);
+       return (filp && (READ_ONCE(filp->f_mode) & FMODE_TTY_HUNGUP));
 }
 EXPORT_SYMBOL(tty_hung_up_p);

@@ -2204,7 +2224,7 @@ static int tty_open(struct inode *inode, struct file *filp)
                 * Need to reset f_op in case a hangup happened.
                 */
                if (tty_hung_up_p(filp))
-                       filp->f_op = &tty_fops;
+                       tty_clear_hungup(filp);
                goto retry_open;
        }
        clear_bit(TTY_HUPPED, &tty->flags);
@@ -2718,6 +2738,10 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        int retval;
        struct tty_ldisc *ld;

+       /* Check whether the tty hung up. */
+       if (tty_hung_up_p(file))
+               return hung_up_tty_ioctl(file, cmd, arg);
+
        if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
                return -EINVAL;

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea3df718c53e..e96b86bab356 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -128,7 +128,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports atomic writes */
 #define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7))

-/* FMODE_* bit 8 */
+/* File driver hung up (tty specific)  */
+#define FMODE_TTY_HUNGUP       ((__force fmode_t)(1 << 8))

 /* 32bit hashes as llseek() offset (for directories) */
 #define FMODE_32BITHASH         ((__force fmode_t)(1 << 9))


> 
> Reported-by: syzbot <syzbot+b7c3ba8cdc2f6cf83c21@syzkaller.appspotmail.com>
> Closes: https://syzkaller.appspot.com/bug?extid=b7c3ba8cdc2f6cf83c21
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Marco Elver <elver@google.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
> This patch has been tested using linux-next tree via my tomoyo tree since 20240611,
> and there was no response on
>   [fs] Are you OK with updating "struct file"->f_op value dynamically?
> at https://lkml.kernel.org/r/b221d2cf-7dc0-4624-a040-85c131ed72a1@I-love.SAKURA.ne.jp .
> Thus, I guess we can go with this approach.
> 
>  drivers/tty/tty_io.c | 34 ++++++++++++++++++++++++++++++++++
>  include/linux/fs.h   |  2 +-
>  2 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index 407b0d87b7c1..bc9aebcb873f 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -430,6 +430,24 @@ static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from)
>  	return -EIO;
>  }
>  
> +static ssize_t hung_up_copy_splice_read(struct file *in, loff_t *ppos,
> +					struct pipe_inode_info *pipe,
> +					size_t len, unsigned int flags)
> +{
> +	return -EINVAL;
> +}
> +
> +static ssize_t hung_up_iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
> +					      loff_t *ppos, size_t len, unsigned int flags)
> +{
> +	return -EINVAL;
> +}
> +
> +static int hung_up_no_open(struct inode *inode, struct file *file)
> +{
> +	return -ENXIO;
> +}
> +
>  /* No kernel lock held - none needed ;) */
>  static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait)
>  {
> @@ -462,6 +480,12 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file)
>  }
>  
>  static const struct file_operations tty_fops = {
> +	/*
> +	 * WARNING: You must implement all callbacks defined in tty_fops in
> +	 * hung_up_tty_fops, for tty_fops and hung_up_tty_fops are toggled
> +	 * after "struct file" is published. Failure to synchronize has a risk
> +	 * of NULL pointer dereference bug.
> +	 */
>  	.llseek		= no_llseek,
>  	.read_iter	= tty_read,
>  	.write_iter	= tty_write,
> @@ -491,14 +515,24 @@ static const struct file_operations console_fops = {
>  };
>  
>  static const struct file_operations hung_up_tty_fops = {
> +	/*
> +	 * WARNING: You must implement all callbacks defined in hung_up_tty_fops
> +	 * in tty_fops, for tty_fops and hung_up_tty_fops are toggled after
> +	 * "struct file" is published. Failure to synchronize has a risk of
> +	 * NULL pointer dereference bug.
> +	 */
>  	.llseek		= no_llseek,
>  	.read_iter	= hung_up_tty_read,
>  	.write_iter	= hung_up_tty_write,
> +	.splice_read    = hung_up_copy_splice_read,
> +	.splice_write   = hung_up_iter_file_splice_write,
>  	.poll		= hung_up_tty_poll,
>  	.unlocked_ioctl	= hung_up_tty_ioctl,
>  	.compat_ioctl	= hung_up_tty_compat_ioctl,
> +	.open           = hung_up_no_open,
>  	.release	= tty_release,
>  	.fasync		= hung_up_tty_fasync,
> +	.show_fdinfo    = tty_show_fdinfo,
>  };
>  
>  static DEFINE_SPINLOCK(redirect_lock);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 0283cf366c2a..636bcc59a3f5 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1008,7 +1008,7 @@ struct file {
>  	struct file_ra_state	f_ra;
>  	struct path		f_path;
>  	struct inode		*f_inode;	/* cached value */
> -	const struct file_operations	*f_op;
> +	const struct file_operations	*__data_racy f_op;
>  
>  	u64			f_version;
>  #ifdef CONFIG_SECURITY
> -- 
> 2.43.5
>
Jan Kara July 22, 2024, 4:10 p.m. UTC | #2
On Mon 22-07-24 16:41:22, Christian Brauner wrote:
> On Fri, Jul 19, 2024 at 10:37:47PM GMT, Tetsuo Handa wrote:
> > syzbot is reporting data race between __tty_hangup() and __fput(), and
> > Dmitry Vyukov mentioned that this race has possibility of NULL pointer
> > dereference, for tty_fops implements e.g. splice_read callback whereas
> > hung_up_tty_fops does not.
> > 
> >   CPU0                                  CPU1
> >   ----                                  ----
> >   do_splice_read() {
> >                                         __tty_hangup() {
> >     // f_op->splice_read was copy_splice_read
> >     if (unlikely(!in->f_op->splice_read))
> >       return warn_unsupported(in, "read");
> >                                           filp->f_op = &hung_up_tty_fops;
> >     // f_op->splice_read is now NULL
> >     return in->f_op->splice_read(in, ppos, pipe, len, flags);
> >                                         }
> >   }
> > 
> > Fix possibility of NULL pointer dereference by implementing missing
> > callbacks, and suppress KCSAN messages by adding __data_racy qualifier
> > to "struct file"->f_op .
> 
> This f_op replacing without synchronization seems really iffy imho.

Yeah, when I saw this I was also going "ouch". I was just waiting whether a
tty maintainer will comment ;). Anyway this replacement of ops in file /
inode has proven problematic in almost every single case where it was used
leading to subtle issues.

> Why can't the hangup just be recorded in tty_file_private and then
> checked in the corresponding f_op->$method()?
> 
> And if that's not possible for some reason I'd be willing to sacrifice
> one of the FMODE_* bits I recently freed up and add e.g.,
> FMODE_TTY_HANGUP instead of this f_op raciness (Why wasn't this using
> replace_fops anyway so it'd be easy to grep for it?).

I don't think burning FMODE bit is really warranted here. Tty layer has its
own structure at filp->private_data and in fact
filp->private_data->tty->flags has TTY_HUPPING and TTY_HUPPED bits already
which we could presumably use.

								Honza

> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index bc9aebcb873f..219bf6391fed 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -583,6 +583,26 @@ static struct file *tty_release_redirect(struct tty_struct *tty)
>         return f;
>  }
> 
> +static noinline void noinline tty_mark_hungup(struct file *file)
> +{
> +       fmode_t fmode = READ_ONCE(file->f_mode);
> +
> +       do {
> +               if (fmode & FMODE_TTY_HUNGUP)
> +                       break;
> +       } while (!try_cmpxchg(&file->f_mode, &fmode, fmode | FMODE_TTY_HUNGUP));
> +}
> +
> +static noinline void noinline tty_clear_hungup(struct file *file)
> +{
> +       fmode_t fmode = READ_ONCE(file->f_mode);
> +
> +       do {
> +               if (!(fmode & FMODE_TTY_HUNGUP))
> +                       break;
> +       } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_TTY_HUNGUP));
> +}
> +
>  /**
>   * __tty_hangup - actual handler for hangup events
>   * @tty: tty device
> @@ -652,7 +672,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
>                         continue;
>                 closecount++;
>                 __tty_fasync(-1, filp, 0);      /* can't block */
> -               filp->f_op = &hung_up_tty_fops;
> +               tty_mark_hungup(filp);
>         }
>         spin_unlock(&tty->files_lock);
> 
> @@ -776,7 +796,7 @@ void tty_vhangup_session(struct tty_struct *tty)
>   */
>  int tty_hung_up_p(struct file *filp)
>  {
> -       return (filp && filp->f_op == &hung_up_tty_fops);
> +       return (filp && (READ_ONCE(filp->f_mode) & FMODE_TTY_HUNGUP));
>  }
>  EXPORT_SYMBOL(tty_hung_up_p);
> 
> @@ -2204,7 +2224,7 @@ static int tty_open(struct inode *inode, struct file *filp)
>                  * Need to reset f_op in case a hangup happened.
>                  */
>                 if (tty_hung_up_p(filp))
> -                       filp->f_op = &tty_fops;
> +                       tty_clear_hungup(filp);
>                 goto retry_open;
>         }
>         clear_bit(TTY_HUPPED, &tty->flags);
> @@ -2718,6 +2738,10 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>         int retval;
>         struct tty_ldisc *ld;
> 
> +       /* Check whether the tty hung up. */
> +       if (tty_hung_up_p(file))
> +               return hung_up_tty_ioctl(file, cmd, arg);
> +
>         if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
>                 return -EINVAL;
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ea3df718c53e..e96b86bab356 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -128,7 +128,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
>  /* File supports atomic writes */
>  #define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7))
> 
> -/* FMODE_* bit 8 */
> +/* File driver hung up (tty specific)  */
> +#define FMODE_TTY_HUNGUP       ((__force fmode_t)(1 << 8))
> 
>  /* 32bit hashes as llseek() offset (for directories) */
>  #define FMODE_32BITHASH         ((__force fmode_t)(1 << 9))
> 
> 
> > 
> > Reported-by: syzbot <syzbot+b7c3ba8cdc2f6cf83c21@syzkaller.appspotmail.com>
> > Closes: https://syzkaller.appspot.com/bug?extid=b7c3ba8cdc2f6cf83c21
> > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Cc: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Marco Elver <elver@google.com>
> > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> > ---
> > This patch has been tested using linux-next tree via my tomoyo tree since 20240611,
> > and there was no response on
> >   [fs] Are you OK with updating "struct file"->f_op value dynamically?
> > at https://lkml.kernel.org/r/b221d2cf-7dc0-4624-a040-85c131ed72a1@I-love.SAKURA.ne.jp .
> > Thus, I guess we can go with this approach.
> > 
> >  drivers/tty/tty_io.c | 34 ++++++++++++++++++++++++++++++++++
> >  include/linux/fs.h   |  2 +-
> >  2 files changed, 35 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> > index 407b0d87b7c1..bc9aebcb873f 100644
> > --- a/drivers/tty/tty_io.c
> > +++ b/drivers/tty/tty_io.c
> > @@ -430,6 +430,24 @@ static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from)
> >  	return -EIO;
> >  }
> >  
> > +static ssize_t hung_up_copy_splice_read(struct file *in, loff_t *ppos,
> > +					struct pipe_inode_info *pipe,
> > +					size_t len, unsigned int flags)
> > +{
> > +	return -EINVAL;
> > +}
> > +
> > +static ssize_t hung_up_iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
> > +					      loff_t *ppos, size_t len, unsigned int flags)
> > +{
> > +	return -EINVAL;
> > +}
> > +
> > +static int hung_up_no_open(struct inode *inode, struct file *file)
> > +{
> > +	return -ENXIO;
> > +}
> > +
> >  /* No kernel lock held - none needed ;) */
> >  static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait)
> >  {
> > @@ -462,6 +480,12 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file)
> >  }
> >  
> >  static const struct file_operations tty_fops = {
> > +	/*
> > +	 * WARNING: You must implement all callbacks defined in tty_fops in
> > +	 * hung_up_tty_fops, for tty_fops and hung_up_tty_fops are toggled
> > +	 * after "struct file" is published. Failure to synchronize has a risk
> > +	 * of NULL pointer dereference bug.
> > +	 */
> >  	.llseek		= no_llseek,
> >  	.read_iter	= tty_read,
> >  	.write_iter	= tty_write,
> > @@ -491,14 +515,24 @@ static const struct file_operations console_fops = {
> >  };
> >  
> >  static const struct file_operations hung_up_tty_fops = {
> > +	/*
> > +	 * WARNING: You must implement all callbacks defined in hung_up_tty_fops
> > +	 * in tty_fops, for tty_fops and hung_up_tty_fops are toggled after
> > +	 * "struct file" is published. Failure to synchronize has a risk of
> > +	 * NULL pointer dereference bug.
> > +	 */
> >  	.llseek		= no_llseek,
> >  	.read_iter	= hung_up_tty_read,
> >  	.write_iter	= hung_up_tty_write,
> > +	.splice_read    = hung_up_copy_splice_read,
> > +	.splice_write   = hung_up_iter_file_splice_write,
> >  	.poll		= hung_up_tty_poll,
> >  	.unlocked_ioctl	= hung_up_tty_ioctl,
> >  	.compat_ioctl	= hung_up_tty_compat_ioctl,
> > +	.open           = hung_up_no_open,
> >  	.release	= tty_release,
> >  	.fasync		= hung_up_tty_fasync,
> > +	.show_fdinfo    = tty_show_fdinfo,
> >  };
> >  
> >  static DEFINE_SPINLOCK(redirect_lock);
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 0283cf366c2a..636bcc59a3f5 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1008,7 +1008,7 @@ struct file {
> >  	struct file_ra_state	f_ra;
> >  	struct path		f_path;
> >  	struct inode		*f_inode;	/* cached value */
> > -	const struct file_operations	*f_op;
> > +	const struct file_operations	*__data_racy f_op;
> >  
> >  	u64			f_version;
> >  #ifdef CONFIG_SECURITY
> > -- 
> > 2.43.5
> > 
>
Greg KH July 22, 2024, 4:24 p.m. UTC | #3
On Mon, Jul 22, 2024 at 06:10:41PM +0200, Jan Kara wrote:
> On Mon 22-07-24 16:41:22, Christian Brauner wrote:
> > On Fri, Jul 19, 2024 at 10:37:47PM GMT, Tetsuo Handa wrote:
> > > syzbot is reporting data race between __tty_hangup() and __fput(), and
> > > Dmitry Vyukov mentioned that this race has possibility of NULL pointer
> > > dereference, for tty_fops implements e.g. splice_read callback whereas
> > > hung_up_tty_fops does not.
> > > 
> > >   CPU0                                  CPU1
> > >   ----                                  ----
> > >   do_splice_read() {
> > >                                         __tty_hangup() {
> > >     // f_op->splice_read was copy_splice_read
> > >     if (unlikely(!in->f_op->splice_read))
> > >       return warn_unsupported(in, "read");
> > >                                           filp->f_op = &hung_up_tty_fops;
> > >     // f_op->splice_read is now NULL
> > >     return in->f_op->splice_read(in, ppos, pipe, len, flags);
> > >                                         }
> > >   }
> > > 
> > > Fix possibility of NULL pointer dereference by implementing missing
> > > callbacks, and suppress KCSAN messages by adding __data_racy qualifier
> > > to "struct file"->f_op .
> > 
> > This f_op replacing without synchronization seems really iffy imho.
> 
> Yeah, when I saw this I was also going "ouch". I was just waiting whether a
> tty maintainer will comment ;)

I really didn't want to :)

> Anyway this replacement of ops in file /
> inode has proven problematic in almost every single case where it was used
> leading to subtle issues.

Yeah, let's not do this.

Let me dig after -rc1 is out and see if there's a better way to handle
this contrived race condition...

thanks,

greg k-h
Tetsuo Handa July 22, 2024, 10:20 p.m. UTC | #4
On 2024/07/23 1:24, Greg Kroah-Hartman wrote:
> On Mon, Jul 22, 2024 at 06:10:41PM +0200, Jan Kara wrote:
>> On Mon 22-07-24 16:41:22, Christian Brauner wrote:
>>> On Fri, Jul 19, 2024 at 10:37:47PM GMT, Tetsuo Handa wrote:
>>>> syzbot is reporting data race between __tty_hangup() and __fput(), and
>>>> Dmitry Vyukov mentioned that this race has possibility of NULL pointer
>>>> dereference, for tty_fops implements e.g. splice_read callback whereas
>>>> hung_up_tty_fops does not.
>>>>
>>>>   CPU0                                  CPU1
>>>>   ----                                  ----
>>>>   do_splice_read() {
>>>>                                         __tty_hangup() {
>>>>     // f_op->splice_read was copy_splice_read
>>>>     if (unlikely(!in->f_op->splice_read))
>>>>       return warn_unsupported(in, "read");
>>>>                                           filp->f_op = &hung_up_tty_fops;
>>>>     // f_op->splice_read is now NULL
>>>>     return in->f_op->splice_read(in, ppos, pipe, len, flags);
>>>>                                         }
>>>>   }
>>>>
>>>> Fix possibility of NULL pointer dereference by implementing missing
>>>> callbacks, and suppress KCSAN messages by adding __data_racy qualifier
>>>> to "struct file"->f_op .
>>>
>>> This f_op replacing without synchronization seems really iffy imho.
>>
>> Yeah, when I saw this I was also going "ouch". I was just waiting whether a
>> tty maintainer will comment ;)
> 
> I really didn't want to :)
> 
>> Anyway this replacement of ops in file /
>> inode has proven problematic in almost every single case where it was used
>> leading to subtle issues.
> 
> Yeah, let's not do this.

https://lkml.kernel.org/r/18a58415-4aa9-4cba-97d2-b70384407313@I-love.SAKURA.ne.jp was a patch
that does not replace f_op, and
https://lkml.kernel.org/r/CAHk-=wgSOa_g+bxjNi+HQpC=6sHK2yKeoW-xOhb0-FVGMTDWjg@mail.gmail.com
was a comment from Linus.

> 
> Let me dig after -rc1 is out and see if there's a better way to handle
> this contrived race condition...
> 
> thanks,
> 
> greg k-h
Jan Kara July 25, 2024, 2:08 p.m. UTC | #5
On Tue 23-07-24 07:20:34, Tetsuo Handa wrote:
> On 2024/07/23 1:24, Greg Kroah-Hartman wrote:
> > On Mon, Jul 22, 2024 at 06:10:41PM +0200, Jan Kara wrote:
> >> On Mon 22-07-24 16:41:22, Christian Brauner wrote:
> >>> On Fri, Jul 19, 2024 at 10:37:47PM GMT, Tetsuo Handa wrote:
> >>>> syzbot is reporting data race between __tty_hangup() and __fput(), and
> >>>> Dmitry Vyukov mentioned that this race has possibility of NULL pointer
> >>>> dereference, for tty_fops implements e.g. splice_read callback whereas
> >>>> hung_up_tty_fops does not.
> >>>>
> >>>>   CPU0                                  CPU1
> >>>>   ----                                  ----
> >>>>   do_splice_read() {
> >>>>                                         __tty_hangup() {
> >>>>     // f_op->splice_read was copy_splice_read
> >>>>     if (unlikely(!in->f_op->splice_read))
> >>>>       return warn_unsupported(in, "read");
> >>>>                                           filp->f_op = &hung_up_tty_fops;
> >>>>     // f_op->splice_read is now NULL
> >>>>     return in->f_op->splice_read(in, ppos, pipe, len, flags);
> >>>>                                         }
> >>>>   }
> >>>>
> >>>> Fix possibility of NULL pointer dereference by implementing missing
> >>>> callbacks, and suppress KCSAN messages by adding __data_racy qualifier
> >>>> to "struct file"->f_op .
> >>>
> >>> This f_op replacing without synchronization seems really iffy imho.
> >>
> >> Yeah, when I saw this I was also going "ouch". I was just waiting whether a
> >> tty maintainer will comment ;)
> > 
> > I really didn't want to :)
> > 
> >> Anyway this replacement of ops in file /
> >> inode has proven problematic in almost every single case where it was used
> >> leading to subtle issues.
> > 
> > Yeah, let's not do this.
> 
> https://lkml.kernel.org/r/18a58415-4aa9-4cba-97d2-b70384407313@I-love.SAKURA.ne.jp was a patch
> that does not replace f_op, and
> https://lkml.kernel.org/r/CAHk-=wgSOa_g+bxjNi+HQpC=6sHK2yKeoW-xOhb0-FVGMTDWjg@mail.gmail.com
> was a comment from Linus.

OK, thanks for references. After doing some light audit of tty, I didn't
find a way how switching f_op could break the system. Still it is giving
me some creeps because usually there's some god-forgotten place somewhere
that caches some decision based on f_op content and when f_op change,
things go out of sync with strange results. And the splice operations
enabled by tty are complex enough to hide some potential problems.

In fact I'm not sure why tty switches f_op at all. The reliable check for
tty being hung up seems to be fetching ldisc pointer under a ldisc_lock and
most operations do this and handle it appropriately -> no need for special
f_op for hung up state for them. .ioctl notably might need some love but
overall it doesn't seem too hard to completely avoid changing f_op for tty.

								Honza
diff mbox series

Patch

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 407b0d87b7c1..bc9aebcb873f 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -430,6 +430,24 @@  static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from)
 	return -EIO;
 }
 
+static ssize_t hung_up_copy_splice_read(struct file *in, loff_t *ppos,
+					struct pipe_inode_info *pipe,
+					size_t len, unsigned int flags)
+{
+	return -EINVAL;
+}
+
+static ssize_t hung_up_iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+					      loff_t *ppos, size_t len, unsigned int flags)
+{
+	return -EINVAL;
+}
+
+static int hung_up_no_open(struct inode *inode, struct file *file)
+{
+	return -ENXIO;
+}
+
 /* No kernel lock held - none needed ;) */
 static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait)
 {
@@ -462,6 +480,12 @@  static void tty_show_fdinfo(struct seq_file *m, struct file *file)
 }
 
 static const struct file_operations tty_fops = {
+	/*
+	 * WARNING: You must implement all callbacks defined in tty_fops in
+	 * hung_up_tty_fops, for tty_fops and hung_up_tty_fops are toggled
+	 * after "struct file" is published. Failure to synchronize has a risk
+	 * of NULL pointer dereference bug.
+	 */
 	.llseek		= no_llseek,
 	.read_iter	= tty_read,
 	.write_iter	= tty_write,
@@ -491,14 +515,24 @@  static const struct file_operations console_fops = {
 };
 
 static const struct file_operations hung_up_tty_fops = {
+	/*
+	 * WARNING: You must implement all callbacks defined in hung_up_tty_fops
+	 * in tty_fops, for tty_fops and hung_up_tty_fops are toggled after
+	 * "struct file" is published. Failure to synchronize has a risk of
+	 * NULL pointer dereference bug.
+	 */
 	.llseek		= no_llseek,
 	.read_iter	= hung_up_tty_read,
 	.write_iter	= hung_up_tty_write,
+	.splice_read    = hung_up_copy_splice_read,
+	.splice_write   = hung_up_iter_file_splice_write,
 	.poll		= hung_up_tty_poll,
 	.unlocked_ioctl	= hung_up_tty_ioctl,
 	.compat_ioctl	= hung_up_tty_compat_ioctl,
+	.open           = hung_up_no_open,
 	.release	= tty_release,
 	.fasync		= hung_up_tty_fasync,
+	.show_fdinfo    = tty_show_fdinfo,
 };
 
 static DEFINE_SPINLOCK(redirect_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..636bcc59a3f5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1008,7 +1008,7 @@  struct file {
 	struct file_ra_state	f_ra;
 	struct path		f_path;
 	struct inode		*f_inode;	/* cached value */
-	const struct file_operations	*f_op;
+	const struct file_operations	*__data_racy f_op;
 
 	u64			f_version;
 #ifdef CONFIG_SECURITY