diff mbox series

[v2] xfs: improve handling of prjquot ENOSPC

Message ID 20231216153522.52767-1-wenjianhn@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v2] xfs: improve handling of prjquot ENOSPC | expand

Commit Message

Jian Wen Dec. 16, 2023, 3:35 p.m. UTC
Don't clear space of the whole fs when the project quota limit is
reached, since it affects the writing performance of files of
the directories that are under quota.

Changes since v1:
- use the want_blockgc_free_quota helper that written by Darrick

Signed-off-by: Jian Wen <wenjian1@xiaomi.com>
---
 fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

Comments

Dave Chinner Dec. 18, 2023, 10 p.m. UTC | #1
On Sat, Dec 16, 2023 at 11:35:22PM +0800, Jian Wen wrote:
> Don't clear space of the whole fs when the project quota limit is
> reached, since it affects the writing performance of files of
> the directories that are under quota.
> 
> Changes since v1:
> - use the want_blockgc_free_quota helper that written by Darrick

I'm not convinced this is correct behaviour.

That is, we can get a real full filesystem ENOSPC even when project
quotas are on and the the project quota space is low. With this
change we will only flush project quotas rather than the whole
filesystem.

That seems like scope for real world ENOSPC regressions when project
quotas are enabled.

Hence my suggestion that we should be returning -EDQUOT from project
quotas and only converting it to -ENOSPC once the project quota has
been flushed and failed with EDQUOT a second time.

Keep in mind that I'm not interested in changing this code to
simplify it - this EDQUOT/ENOSPC flushing is replicated across
multiple fuinctions and so -all- of them need to change, not just
the buffered write path.

IOWs, I'm interested in having the code behave correctly in these
situations. If correctness means the code has to become more
complex, then so be it. However, with some simple refactoring, we
can isolate the complexity and make the code simpler.

> Signed-off-by: Jian Wen <wenjian1@xiaomi.com>
> ---
>  fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index e33e5e13b95f..7764697e7822 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -24,6 +24,9 @@
>  #include "xfs_pnfs.h"
>  #include "xfs_iomap.h"
>  #include "xfs_reflink.h"
> +#include "xfs_quota.h"
> +#include "xfs_dquot_item.h"
> +#include "xfs_dquot.h"
>  
>  #include <linux/dax.h>
>  #include <linux/falloc.h>
> @@ -761,6 +764,20 @@ xfs_file_dax_write(
>  	return ret;
>  }
>  
> +static inline bool want_blockgc_free_quota(struct xfs_inode *ip, int ret)
> +{
> +	if (ret == -EDQUOT)
> +		return true;
> +	if (ret != -ENOSPC)
> +		return false;
> +
> +	if (XFS_IS_PQUOTA_ENFORCED(ip->i_mount) &&
> +	    ip->i_pdquot && xfs_dquot_lowsp(ip->i_pdquot))
> +		return true;
> +
> +	return false;
> +}

>  STATIC ssize_t
>  xfs_file_buffered_write(
>  	struct kiocb		*iocb,
> @@ -796,7 +813,7 @@ xfs_file_buffered_write(
>  	 * running at the same time.  Use a synchronous scan to increase the
>  	 * effectiveness of the scan.
>  	 */
> -	if (ret == -EDQUOT && !cleared_space) {
> +	if (want_blockgc_free_quota(ip, ret) && !cleared_space) {
>  		xfs_iunlock(ip, iolock);
>  		xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
>  		cleared_space = true;

IMO, this makes messy code even more messy, and makes it even more
inconsistent with all the EDQUOT/ENOSPC flushing that is done in the
xfs_trans_alloc_*() inode transaction setup helpers.

So, with the assumption that project quotas return EDQUOT and not
ENOSPC, we add this helper to fs/xfs/xfs_dquot.h:

static inline bool
xfs_dquot_is_enospc(
	struct xfs_dquot	*dqp)
{
	if (!dqp)
		return false;
	if (!xfs_dquot_is_enforced(dqp)
		return false;
	if (dqp->q_blk.hardlimit - dqp->q_blk.reserved > 0)
		return false;
	return true;
}

And this helper to fs/xfs/xfs_icache.c:

static void
xfs_blockgc_nospace_flush(
	struct xfs_inode	*ip,
	int			what)
{
	if (what == -EDQUOT) {
		xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
		return;
	}
	ASSERT(what == -ENOSPC);

	xfs_flush_inodes(ip->i_mount);
	icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
	xfs_blockgc_free_space(ip->i_mount, &icw);
}

The buffered write code ends up as:

	.....
	do {
		iolock = XFS_IOLOCK_EXCL;
		ret = xfs_ilock_iocb(iocb, iolock);
		if (ret)
			return ret;

		ret = xfs_file_write_checks(iocb, from, &iolock);
		if (ret)
			goto out;

		trace_xfs_file_buffered_write(iocb, from);
		ret = iomap_file_buffered_write(iocb, from,
				&xfs_buffered_write_iomap_ops);
		if (!(ret == -EDQUOT || ret = -ENOSPC))
			break;

		xfs_iunlock(ip, iolock);
		xfs_blockgc_nospace_flush(ip, ret);
	} while (retries++ == 0);

	if (ret == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
		ret = -ENOSPC;
	.....

This factors out the actual flushing behaviour when an error occurs
from the write code, and removes the clunky goto from the code. It
now clearly loops on a single retry after a ENOSPC/EDQUOT error,
and the high level code transforms the EDQUOT project quota error
once the loop errors out completely.

We can then do the same transformation to xfs_trans_alloc_icreate(),
xfs_trans_alloc_inode(), xfs_trans_alloc_ichange() and
xfs_trans_alloc_dir() using xfs_blockgc_nospace_flush() and
xfs_dquot_is_enospc().

This will then give us consistent project quota only flushing on
project quota failure, as well as consistent full filesystem ENOSPC
flushing behaviour across all types of inode operations.

Cheers,

Dave.
Christoph Hellwig Dec. 19, 2023, 5:47 a.m. UTC | #2
On Tue, Dec 19, 2023 at 09:00:38AM +1100, Dave Chinner wrote:
> I'm not convinced this is correct behaviour.
> 
> That is, we can get a real full filesystem ENOSPC even when project
> quotas are on and the the project quota space is low. With this
> change we will only flush project quotas rather than the whole
> filesystem.

Yes.

> quotas are enabled.
> 
> Hence my suggestion that we should be returning -EDQUOT from project
> quotas and only converting it to -ENOSPC once the project quota has
> been flushed and failed with EDQUOT a second time.

FYI, my suggestion of turning cleared_space into a counter and still
falling back to the normal ENOSPC clearing would also work.  But in
the long run moving this pretty messy abuse of ENOSPC for out of qupta
in the low-level code into the highest syscall boudary is probably a
good thing for maintainability.
Jian Wen Dec. 19, 2023, 1:50 p.m. UTC | #3
On Tue, Dec 19, 2023 at 6:00 AM Dave Chinner <david@fromorbit.com> wrote:
>
>
> This will then give us consistent project quota only flushing on
> project quota failure, as well as consistent full filesystem ENOSPC
> flushing behaviour across all types of inode operations.

Thanks for the detailed explanation. I will try to make it consistent this week.
Jian Wen Dec. 23, 2023, 11 a.m. UTC | #4
On Tue, Dec 19, 2023 at 6:00 AM Dave Chinner <david@fromorbit.com> wrote:
>
> So, with the assumption that project quotas return EDQUOT and not
> ENOSPC, we add this helper to fs/xfs/xfs_dquot.h:
>
> static inline bool
> xfs_dquot_is_enospc(
>         struct xfs_dquot        *dqp)
> {
>         if (!dqp)
>                 return false;
>         if (!xfs_dquot_is_enforced(dqp)
>                 return false;
>         if (dqp->q_blk.hardlimit - dqp->q_blk.reserved > 0)
I need some help on how to improve the above enospc check.
It seems that we need a value that is larger than 0.

4.0K space is available.
# df -h ./
Filesystem      Size  Used Avail Use% Mounted on
/dev/vda1       100M  100M  4.0K 100% /tmp/roothome/vda

ENOSPC is expected, but gets EDQUOT.
# touch t
touch: cannot touch 't': Disk quota exceeded

>
> The buffered write code ends up as:
>
>         .....
>         do {
>                 iolock = XFS_IOLOCK_EXCL;
>                 ret = xfs_ilock_iocb(iocb, iolock);
>                 if (ret)
>                         return ret;
>
>                 ret = xfs_file_write_checks(iocb, from, &iolock);
>                 if (ret)
>                         goto out;
>
>                 trace_xfs_file_buffered_write(iocb, from);
>                 ret = iomap_file_buffered_write(iocb, from,
>                                 &xfs_buffered_write_iomap_ops);
>                 if (!(ret == -EDQUOT || ret = -ENOSPC))
>                         break;
>
>                 xfs_iunlock(ip, iolock);
xfs_iunlock() is called after the retry.
>                 xfs_blockgc_nospace_flush(ip, ret);
>         } while (retries++ == 0);
>
>         if (ret == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
>                 ret = -ENOSPC;
>         .....
out:
        if (iolock)
                xfs_iunlock(ip, iolock);
Double xfs_iunlock().

Please take a look at the v3 patch.
Thanks.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e33e5e13b95f..7764697e7822 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -24,6 +24,9 @@ 
 #include "xfs_pnfs.h"
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
 
 #include <linux/dax.h>
 #include <linux/falloc.h>
@@ -761,6 +764,20 @@  xfs_file_dax_write(
 	return ret;
 }
 
+static inline bool want_blockgc_free_quota(struct xfs_inode *ip, int ret)
+{
+	if (ret == -EDQUOT)
+		return true;
+	if (ret != -ENOSPC)
+		return false;
+
+	if (XFS_IS_PQUOTA_ENFORCED(ip->i_mount) &&
+	    ip->i_pdquot && xfs_dquot_lowsp(ip->i_pdquot))
+		return true;
+
+	return false;
+}
+
 STATIC ssize_t
 xfs_file_buffered_write(
 	struct kiocb		*iocb,
@@ -796,7 +813,7 @@  xfs_file_buffered_write(
 	 * running at the same time.  Use a synchronous scan to increase the
 	 * effectiveness of the scan.
 	 */
-	if (ret == -EDQUOT && !cleared_space) {
+	if (want_blockgc_free_quota(ip, ret) && !cleared_space) {
 		xfs_iunlock(ip, iolock);
 		xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
 		cleared_space = true;