diff mbox series

inode: remove __I_DIO_WAKEUP

Message ID 20240816-vfs-misc-dio-v1-1-80fe21a2c710@kernel.org (mailing list archive)
State New
Headers show
Series inode: remove __I_DIO_WAKEUP | expand

Commit Message

Christian Brauner Aug. 16, 2024, 2:35 p.m. UTC
Afaict, we can just rely on inode->i_dio_count for waiting instead of
this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
and xfstests dio tests.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
---
 fs/inode.c         | 23 +++++++++++------------
 fs/netfs/locking.c | 18 +++---------------
 include/linux/fs.h |  9 ++++-----
 3 files changed, 18 insertions(+), 32 deletions(-)


---
base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9
change-id: 20240816-vfs-misc-dio-5cb07eaae155

Comments

Josef Bacik Aug. 16, 2024, 3:24 p.m. UTC | #1
On Fri, Aug 16, 2024 at 04:35:52PM +0200, Christian Brauner wrote:
> Afaict, we can just rely on inode->i_dio_count for waiting instead of
> this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
> and xfstests dio tests.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
Jeff Layton Aug. 16, 2024, 3:30 p.m. UTC | #2
On Fri, 2024-08-16 at 16:35 +0200, Christian Brauner wrote:
> Afaict, we can just rely on inode->i_dio_count for waiting instead of
> this awkward indirection through __I_DIO_WAKEUP. This survives LTP
> dio
> and xfstests dio tests.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> ---
>  fs/inode.c         | 23 +++++++++++------------
>  fs/netfs/locking.c | 18 +++---------------
>  include/linux/fs.h |  9 ++++-----
>  3 files changed, 18 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 7a4e27606fca..46bf05d826db 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable);
>  /*
>   * Direct i/o helper functions
>   */
> -static void __inode_dio_wait(struct inode *inode)
> +bool inode_dio_finished(const struct inode *inode)
>  {
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state,
> __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	do {
> -		prepare_to_wait(wq, &q.wq_entry,
> TASK_UNINTERRUPTIBLE);
> -		if (atomic_read(&inode->i_dio_count))
> -			schedule();
> -	} while (atomic_read(&inode->i_dio_count));
> -	finish_wait(wq, &q.wq_entry);
> +	smp_mb__before_atomic();
> +	return atomic_read(&inode->i_dio_count) == 0;
>  }
> +EXPORT_SYMBOL(inode_dio_finished);
>  
>  /**
>   * inode_dio_wait - wait for outstanding DIO requests to finish
> @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode
> *inode)
>   */
>  void inode_dio_wait(struct inode *inode)
>  {
> -	if (atomic_read(&inode->i_dio_count))
> -		__inode_dio_wait(inode);
> +	wait_var_event(&inode->i_dio_count, inode_dio_finished);
>  }
>  EXPORT_SYMBOL(inode_dio_wait);
>  
> +void inode_dio_wait_interruptible(struct inode *inode)
> +{
> +	wait_var_event_interruptible(&inode->i_dio_count,
> inode_dio_finished);
> +}
> +EXPORT_SYMBOL(inode_dio_wait_interruptible);
> +
>  /*
>   * inode_set_flags - atomically set some inode flags
>   *
> diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
> index 75dc52a49b3a..c2cfdda85230 100644
> --- a/fs/netfs/locking.c
> +++ b/fs/netfs/locking.c
> @@ -21,23 +21,11 @@
>   */
>  static int inode_dio_wait_interruptible(struct inode *inode)
>  {
> -	if (!atomic_read(&inode->i_dio_count))
> +	if (inode_dio_finished(inode))
>  		return 0;
>  
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state,
> __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	for (;;) {
> -		prepare_to_wait(wq, &q.wq_entry,
> TASK_INTERRUPTIBLE);
> -		if (!atomic_read(&inode->i_dio_count))
> -			break;
> -		if (signal_pending(current))
> -			break;
> -		schedule();
> -	}
> -	finish_wait(wq, &q.wq_entry);
> -
> -	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
> +	inode_dio_wait_interruptible(inode);
> +	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
>  }
>  
>  /* Call with exclusively locked inode->i_rwsem */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index b6f2e2a1e513..f744cd918259 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2380,8 +2380,6 @@ static inline void kiocb_clone(struct kiocb
> *kiocb, struct kiocb *kiocb_src,
>   *
>   * I_REFERENCED		Marks the inode as recently
> references on the LRU list.
>   *
> - * I_DIO_WAKEUP		Never set.  Only used as a key for
> wait_on_bit().
> - *
>   * I_WB_SWITCH		Cgroup bdi_writeback switching in progress. 
> Used to
>   *			synchronize competing switching instances
> and to tell
>   *			wb stat updates to grab the i_pages lock. 
> See
> @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb
> *kiocb, struct kiocb *kiocb_src,
>  #define __I_SYNC		7
>  #define I_SYNC			(1 << __I_SYNC)
>  #define I_REFERENCED		(1 << 8)
> -#define __I_DIO_WAKEUP		9
> -#define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
>  #define I_LINKABLE		(1 << 10)
>  #define I_DIRTY_TIME		(1 << 11)
>  #define I_WB_SWITCH		(1 << 13)
> @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct
> kiocb *iocb,
>  #endif
>  
>  void inode_dio_wait(struct inode *inode);
> +void inode_dio_wait_interruptible(struct inode *inode);
>  
>  /**
>   * inode_dio_begin - signal start of a direct I/O requests
> @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode);
>  static inline void inode_dio_begin(struct inode *inode)
>  {
>  	atomic_inc(&inode->i_dio_count);
> +	smp_mb__after_atomic();
>  }
>  
>  /**
> @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode
> *inode)
>   */
>  static inline void inode_dio_end(struct inode *inode)
>  {
> +	smp_mb__before_atomic();
>  	if (atomic_dec_and_test(&inode->i_dio_count))
> -		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
> +		wake_up_var(&inode->i_dio_count);
>  }
>  
>  extern void inode_set_flags(struct inode *inode, unsigned int flags,
> 
> ---
> base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9
> change-id: 20240816-vfs-misc-dio-5cb07eaae155
> 

Nice cleanup!

Reviewed-by: Jeff Layton <jlayton@kernel.org>
David Howells Aug. 16, 2024, 4:27 p.m. UTC | #3
cc'ing Christoph as he did the original (bd5fe6c5eb9c5).

Christian Brauner <brauner@kernel.org> wrote:

> Afaict, we can just rely on inode->i_dio_count for waiting instead of
> this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
> and xfstests dio tests.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> ---
>  fs/inode.c         | 23 +++++++++++------------
>  fs/netfs/locking.c | 18 +++---------------
>  include/linux/fs.h |  9 ++++-----
>  3 files changed, 18 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 7a4e27606fca..46bf05d826db 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable);
>  /*
>   * Direct i/o helper functions
>   */
> -static void __inode_dio_wait(struct inode *inode)
> +bool inode_dio_finished(const struct inode *inode)
>  {
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	do {
> -		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
> -		if (atomic_read(&inode->i_dio_count))
> -			schedule();
> -	} while (atomic_read(&inode->i_dio_count));
> -	finish_wait(wq, &q.wq_entry);
> +	smp_mb__before_atomic();
> +	return atomic_read(&inode->i_dio_count) == 0;

Can atomic_read_aquire() be used here?

>  }
> +EXPORT_SYMBOL(inode_dio_finished);
>  
>  /**
>   * inode_dio_wait - wait for outstanding DIO requests to finish
> @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode *inode)
>   */
>  void inode_dio_wait(struct inode *inode)
>  {
> -	if (atomic_read(&inode->i_dio_count))
> -		__inode_dio_wait(inode);
> +	wait_var_event(&inode->i_dio_count, inode_dio_finished);
>  }
>  EXPORT_SYMBOL(inode_dio_wait);
>  
> +void inode_dio_wait_interruptible(struct inode *inode)
> +{
> +	wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished);
> +}
> +EXPORT_SYMBOL(inode_dio_wait_interruptible);
> +
>  /*
>   * inode_set_flags - atomically set some inode flags
>   *
> diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
> index 75dc52a49b3a..c2cfdda85230 100644
> --- a/fs/netfs/locking.c
> +++ b/fs/netfs/locking.c
> @@ -21,23 +21,11 @@
>   */
>  static int inode_dio_wait_interruptible(struct inode *inode)
>  {
> -	if (!atomic_read(&inode->i_dio_count))
> +	if (inode_dio_finished(inode))
>  		return 0;
>  
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	for (;;) {
> -		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
> -		if (!atomic_read(&inode->i_dio_count))
> -			break;
> -		if (signal_pending(current))
> -			break;
> -		schedule();
> -	}
> -	finish_wait(wq, &q.wq_entry);
> -
> -	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
> +	inode_dio_wait_interruptible(inode);
> +	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
>  }
>  
>  /* Call with exclusively locked inode->i_rwsem */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index b6f2e2a1e513..f744cd918259 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2380,8 +2380,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
>   *
>   * I_REFERENCED		Marks the inode as recently references on the LRU list.
>   *
> - * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
> - *
>   * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
>   *			synchronize competing switching instances and to tell
>   *			wb stat updates to grab the i_pages lock.  See
> @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
>  #define __I_SYNC		7
>  #define I_SYNC			(1 << __I_SYNC)
>  #define I_REFERENCED		(1 << 8)
> -#define __I_DIO_WAKEUP		9
> -#define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
>  #define I_LINKABLE		(1 << 10)
>  #define I_DIRTY_TIME		(1 << 11)
>  #define I_WB_SWITCH		(1 << 13)
> @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
>  #endif
>  
>  void inode_dio_wait(struct inode *inode);
> +void inode_dio_wait_interruptible(struct inode *inode);
>  
>  /**
>   * inode_dio_begin - signal start of a direct I/O requests
> @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode);
>  static inline void inode_dio_begin(struct inode *inode)
>  {
>  	atomic_inc(&inode->i_dio_count);
> +	smp_mb__after_atomic();
>  }
>  
>  /**
> @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode *inode)
>   */
>  static inline void inode_dio_end(struct inode *inode)
>  {
> +	smp_mb__before_atomic();
>  	if (atomic_dec_and_test(&inode->i_dio_count))

Doesn't atomic_dec_and_test() imply full barriering?  See atomic_t.txt,
"ORDERING":

 - RMW operations that have a return value are fully ordered;

> -		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
> +		wake_up_var(&inode->i_dio_count);
>  }
>  
>  extern void inode_set_flags(struct inode *inode, unsigned int flags,
> 
> ---
> base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9
> change-id: 20240816-vfs-misc-dio-5cb07eaae155
>
Mateusz Guzik Aug. 17, 2024, 12:35 a.m. UTC | #4
On Fri, Aug 16, 2024 at 05:27:05PM +0100, David Howells wrote:
> Christian Brauner <brauner@kernel.org> wrote:
> 
> > Afaict, we can just rely on inode->i_dio_count for waiting instead of
> > this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
> > and xfstests dio tests.
> > 
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> > ---
> >  fs/inode.c         | 23 +++++++++++------------
> >  fs/netfs/locking.c | 18 +++---------------
> >  include/linux/fs.h |  9 ++++-----
> >  3 files changed, 18 insertions(+), 32 deletions(-)
> > 
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 7a4e27606fca..46bf05d826db 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable);
> >  /*
> >   * Direct i/o helper functions
> >   */
> > -static void __inode_dio_wait(struct inode *inode)
> > +bool inode_dio_finished(const struct inode *inode)
> >  {
> > -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
> > -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> > -
> > -	do {
> > -		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
> > -		if (atomic_read(&inode->i_dio_count))
> > -			schedule();
> > -	} while (atomic_read(&inode->i_dio_count));
> > -	finish_wait(wq, &q.wq_entry);
> > +	smp_mb__before_atomic();
> > +	return atomic_read(&inode->i_dio_count) == 0;
> 
> Can atomic_read_aquire() be used here?
> 

I think the key point is that smp_mb__before_atomic is not usable with
atomic_read and atomic_set -- these merely expand to regular movs, not
providing a full fence (in contrast to lock-prefixed instructions on
amd64).

As for what ordering is needed here (and in particular would acquire do
the job), I don't know yet.

> >  static inline void inode_dio_end(struct inode *inode)
> >  {
> > +	smp_mb__before_atomic();
> >  	if (atomic_dec_and_test(&inode->i_dio_count))
> 
> Doesn't atomic_dec_and_test() imply full barriering?  See atomic_t.txt,
> "ORDERING":
> 
>  - RMW operations that have a return value are fully ordered;
> 

smp_mb__before_atomic is indeed redundant here.

atomic_dec_and_test and its variants are used for refcounting relying on
the implicitly provided ordering (e.g., see fput)
David Howells Aug. 17, 2024, 7:24 a.m. UTC | #5
Mateusz Guzik <mjguzik@gmail.com> wrote:

> ... atomic_read and atomic_set -- these merely expand to regular movs, ...

Ah, no, that's not necessarily true.  See arch/parisc/include/asm/atomic.h.

David
Mateusz Guzik Aug. 17, 2024, 9:09 a.m. UTC | #6
On Sat, Aug 17, 2024 at 9:25 AM David Howells <dhowells@redhat.com> wrote:
>
> Mateusz Guzik <mjguzik@gmail.com> wrote:
>
> > ... atomic_read and atomic_set -- these merely expand to regular movs, ...
>
> Ah, no, that's not necessarily true.  See arch/parisc/include/asm/atomic.h.
>

In that context I was talking about amd64.

atomic_read/set do happen to provide some fencing on some archs, but
there are no guarantees as far as LKMM is concerned -- for example
atomic_read is explicitly documented to only provide relaxed ordering
(aka no guarantees).

In particular on amd64 the full memory barrier normally coming with
lock-prefixed instructions (and xchg with a memory operand) is not
present here because this merely expands to a regular mov, making the
code bogus if such a fence is indeed required in this code.
Dave Chinner Aug. 17, 2024, 11:48 p.m. UTC | #7
On Fri, Aug 16, 2024 at 04:35:52PM +0200, Christian Brauner wrote:
> Afaict, we can just rely on inode->i_dio_count for waiting instead of
> this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio
> and xfstests dio tests.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> ---
>  fs/inode.c         | 23 +++++++++++------------
>  fs/netfs/locking.c | 18 +++---------------
>  include/linux/fs.h |  9 ++++-----
>  3 files changed, 18 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 7a4e27606fca..46bf05d826db 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable);
>  /*
>   * Direct i/o helper functions
>   */
> -static void __inode_dio_wait(struct inode *inode)
> +bool inode_dio_finished(const struct inode *inode)
>  {
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	do {
> -		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
> -		if (atomic_read(&inode->i_dio_count))
> -			schedule();
> -	} while (atomic_read(&inode->i_dio_count));
> -	finish_wait(wq, &q.wq_entry);
> +	smp_mb__before_atomic();
> +	return atomic_read(&inode->i_dio_count) == 0;
>  }
> +EXPORT_SYMBOL(inode_dio_finished);

What is the memory barrier here for?  i_dio_count is not a reference
count - there are no dependent inode object changes before/after it
changes that it needs to be ordered against, so I'm not sure what
the purpose of this memory barrier is supposed to be...

Memory barriers -always- need a comment describing the race
condition they are avoiding.

>  /**
>   * inode_dio_wait - wait for outstanding DIO requests to finish
> @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode *inode)
>   */
>  void inode_dio_wait(struct inode *inode)
>  {
> -	if (atomic_read(&inode->i_dio_count))
> -		__inode_dio_wait(inode);
> +	wait_var_event(&inode->i_dio_count, inode_dio_finished);
>  }
>  EXPORT_SYMBOL(inode_dio_wait);
>  
> +void inode_dio_wait_interruptible(struct inode *inode)
> +{
> +	wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished);
> +}
> +EXPORT_SYMBOL(inode_dio_wait_interruptible);

Keep in mind that the prepare_to_wait() call inside
wait_var_event_interruptible() takes the waitqueue head lock before
checking the condition. This provides the necessary memory barriers
to prevent wait/wakeup races checking the inode_dio_finished()
state. Hence, AFAICT, no memory barriers are needed in
inode_dio_finished() at all....

>  /*
>   * inode_set_flags - atomically set some inode flags
>   *
> diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
> index 75dc52a49b3a..c2cfdda85230 100644
> --- a/fs/netfs/locking.c
> +++ b/fs/netfs/locking.c
> @@ -21,23 +21,11 @@
>   */
>  static int inode_dio_wait_interruptible(struct inode *inode)
>  {
> -	if (!atomic_read(&inode->i_dio_count))
> +	if (inode_dio_finished(inode))
>  		return 0;
>  
> -	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
> -	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
> -
> -	for (;;) {
> -		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
> -		if (!atomic_read(&inode->i_dio_count))
> -			break;
> -		if (signal_pending(current))
> -			break;
> -		schedule();
> -	}
> -	finish_wait(wq, &q.wq_entry);
> -
> -	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
> +	inode_dio_wait_interruptible(inode);
> +	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;

That looks broken. We have a private static function calling an
exported function of the same name. I suspect that this static
function needs to be named "netfs_dio_wait_interruptible()"....

> @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
>  #define __I_SYNC		7
>  #define I_SYNC			(1 << __I_SYNC)
>  #define I_REFERENCED		(1 << 8)
> -#define __I_DIO_WAKEUP		9
> -#define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
>  #define I_LINKABLE		(1 << 10)
>  #define I_DIRTY_TIME		(1 << 11)
>  #define I_WB_SWITCH		(1 << 13)
> @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
>  #endif
>  
>  void inode_dio_wait(struct inode *inode);
> +void inode_dio_wait_interruptible(struct inode *inode);
>  
>  /**
>   * inode_dio_begin - signal start of a direct I/O requests
> @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode);
>  static inline void inode_dio_begin(struct inode *inode)
>  {
>  	atomic_inc(&inode->i_dio_count);
> +	smp_mb__after_atomic();
>  }

Again I have no idea waht this barrier is doing. Why does this
operation need quasi-release semantics?

>  /**
> @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode *inode)
>   */
>  static inline void inode_dio_end(struct inode *inode)
>  {
> +	smp_mb__before_atomic();
>  	if (atomic_dec_and_test(&inode->i_dio_count))
> -		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
> +		wake_up_var(&inode->i_dio_count);
>  }

atomic_dec_and_test() is a RMW atomic operation with a return value,
so has has fully ordered semanitcs according to
Documentation/atomic_t.txt:

	 - RMW operations that have a return value are fully ordered.
	[...]
	Fully ordered primitives are ordered against everything prior and everything
	subsequent. Therefore a fully ordered primitive is like having an smp_mb()
	before and an smp_mb() after the primitive.

So there's never a need for explicit barriers before/after an
atomic_dec_and_test() operation, right?

-Dave.
Christian Brauner Aug. 19, 2024, 9:25 a.m. UTC | #8
> > -	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
> > +	inode_dio_wait_interruptible(inode);
> > +	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
> 
> That looks broken. We have a private static function calling an
> exported function of the same name. I suspect that this static
> function needs to be named "netfs_dio_wait_interruptible()"....

Yep. I already fixed that in the tree.

> atomic_dec_and_test() is a RMW atomic operation with a return value,
> so has has fully ordered semanitcs according to
> Documentation/atomic_t.txt:
> 
> 	 - RMW operations that have a return value are fully ordered.
> 	[...]
> 	Fully ordered primitives are ordered against everything prior and everything
> 	subsequent. Therefore a fully ordered primitive is like having an smp_mb()
> 	before and an smp_mb() after the primitive.
> 
> So there's never a need for explicit barriers before/after an
> atomic_dec_and_test() operation, right?

Thanks for the comments on the barrires.

Frankly, I added the barriers because the internal implementation of the
wait_*() functions tend to confuse me as they require memory barriers to
ensure that wait_var_event() sees the condition fulfilled or
waitqueue_active() sees the waiter. And apparently I'm not the only one
with that confusion around these apis (see Neil's series).
diff mbox series

Patch

diff --git a/fs/inode.c b/fs/inode.c
index 7a4e27606fca..46bf05d826db 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2465,18 +2465,12 @@  EXPORT_SYMBOL(inode_owner_or_capable);
 /*
  * Direct i/o helper functions
  */
-static void __inode_dio_wait(struct inode *inode)
+bool inode_dio_finished(const struct inode *inode)
 {
-	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
-	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
-	do {
-		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&inode->i_dio_count))
-			schedule();
-	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wq_entry);
+	smp_mb__before_atomic();
+	return atomic_read(&inode->i_dio_count) == 0;
 }
+EXPORT_SYMBOL(inode_dio_finished);
 
 /**
  * inode_dio_wait - wait for outstanding DIO requests to finish
@@ -2490,11 +2484,16 @@  static void __inode_dio_wait(struct inode *inode)
  */
 void inode_dio_wait(struct inode *inode)
 {
-	if (atomic_read(&inode->i_dio_count))
-		__inode_dio_wait(inode);
+	wait_var_event(&inode->i_dio_count, inode_dio_finished);
 }
 EXPORT_SYMBOL(inode_dio_wait);
 
+void inode_dio_wait_interruptible(struct inode *inode)
+{
+	wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished);
+}
+EXPORT_SYMBOL(inode_dio_wait_interruptible);
+
 /*
  * inode_set_flags - atomically set some inode flags
  *
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 75dc52a49b3a..c2cfdda85230 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -21,23 +21,11 @@ 
  */
 static int inode_dio_wait_interruptible(struct inode *inode)
 {
-	if (!atomic_read(&inode->i_dio_count))
+	if (inode_dio_finished(inode))
 		return 0;
 
-	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
-	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
-	for (;;) {
-		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
-		if (!atomic_read(&inode->i_dio_count))
-			break;
-		if (signal_pending(current))
-			break;
-		schedule();
-	}
-	finish_wait(wq, &q.wq_entry);
-
-	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+	inode_dio_wait_interruptible(inode);
+	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
 }
 
 /* Call with exclusively locked inode->i_rwsem */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b6f2e2a1e513..f744cd918259 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2380,8 +2380,6 @@  static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *
  * I_REFERENCED		Marks the inode as recently references on the LRU list.
  *
- * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
- *
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
  *			wb stat updates to grab the i_pages lock.  See
@@ -2413,8 +2411,6 @@  static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 #define I_REFERENCED		(1 << 8)
-#define __I_DIO_WAKEUP		9
-#define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
 #define I_LINKABLE		(1 << 10)
 #define I_DIRTY_TIME		(1 << 11)
 #define I_WB_SWITCH		(1 << 13)
@@ -3230,6 +3226,7 @@  static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
 #endif
 
 void inode_dio_wait(struct inode *inode);
+void inode_dio_wait_interruptible(struct inode *inode);
 
 /**
  * inode_dio_begin - signal start of a direct I/O requests
@@ -3241,6 +3238,7 @@  void inode_dio_wait(struct inode *inode);
 static inline void inode_dio_begin(struct inode *inode)
 {
 	atomic_inc(&inode->i_dio_count);
+	smp_mb__after_atomic();
 }
 
 /**
@@ -3252,8 +3250,9 @@  static inline void inode_dio_begin(struct inode *inode)
  */
 static inline void inode_dio_end(struct inode *inode)
 {
+	smp_mb__before_atomic();
 	if (atomic_dec_and_test(&inode->i_dio_count))
-		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+		wake_up_var(&inode->i_dio_count);
 }
 
 extern void inode_set_flags(struct inode *inode, unsigned int flags,