diff mbox series

[5/6] shmem: quota support

Message ID 20230713134848.249779-6-cem@kernel.org (mailing list archive)
State New
Headers show
Series shmem: Add user and group quota support for tmpfs | expand

Commit Message

Carlos Maiolino July 13, 2023, 1:48 p.m. UTC
From: Carlos Maiolino <cem@kernel.org>

Now the basic infra-structure is in place, enable quota support for tmpfs.

This offers user and group quotas to tmpfs (project quotas will be added
later). Also, as other filesystems, the tmpfs quota is not supported
within user namespaces yet, so idmapping is not translated.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/tmpfs.rst |  15 +++
 include/linux/shmem_fs.h            |   8 ++
 mm/shmem.c                          | 180 ++++++++++++++++++++++++++--
 3 files changed, 195 insertions(+), 8 deletions(-)

Comments

Christian Brauner July 14, 2023, 9:54 a.m. UTC | #1
On Thu, Jul 13, 2023 at 03:48:47PM +0200, cem@kernel.org wrote:
> From: Carlos Maiolino <cem@kernel.org>
> 
> Now the basic infra-structure is in place, enable quota support for tmpfs.
> 
> This offers user and group quotas to tmpfs (project quotas will be added
> later). Also, as other filesystems, the tmpfs quota is not supported
> within user namespaces yet, so idmapping is not translated.
> 
> Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> ---
>  Documentation/filesystems/tmpfs.rst |  15 +++
>  include/linux/shmem_fs.h            |   8 ++
>  mm/shmem.c                          | 180 ++++++++++++++++++++++++++--
>  3 files changed, 195 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
> index f18f46be5c0c..0c7d8bd052f1 100644
> --- a/Documentation/filesystems/tmpfs.rst
> +++ b/Documentation/filesystems/tmpfs.rst
> @@ -130,6 +130,21 @@ for emergency or testing purposes. The values you can set for shmem_enabled are:
>      option, for testing
>  ==  ============================================================
>  
> +tmpfs also supports quota with the following mount options
> +
> +========  =============================================================
> +quota     User and group quota accounting and enforcement is enabled on
> +          the mount. Tmpfs is using hidden system quota files that are
> +          initialized on mount.
> +usrquota  User quota accounting and enforcement is enabled on the
> +          mount.
> +grpquota  Group quota accounting and enforcement is enabled on the
> +          mount.
> +========  =============================================================
> +
> +Note that tmpfs quotas do not support user namespaces so no uid/gid
> +translation is done if quotas are enabled inside user namespaces.
> +
>  tmpfs has a mount option to set the NUMA memory allocation policy for
>  all files in that instance (if CONFIG_NUMA is enabled) - which can be
>  adjusted on the fly via 'mount -o remount ...'
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 7abfaf70b58a..1a568a0f542f 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -31,6 +31,9 @@ struct shmem_inode_info {
>  	atomic_t		stop_eviction;	/* hold when working on inode */
>  	struct timespec64	i_crtime;	/* file creation time */
>  	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
> +#ifdef CONFIG_TMPFS_QUOTA
> +	struct dquot		*i_dquot[MAXQUOTAS];
> +#endif
>  	struct inode		vfs_inode;
>  };
>  
> @@ -184,4 +187,9 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  #define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
>  #define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +extern const struct dquot_operations shmem_quota_operations;
> +extern struct quota_format_type shmem_quota_format;
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  #endif
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 2a7b8060b6f4..5022238dd68d 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
>  #include <uapi/linux/memfd.h>
>  #include <linux/rmap.h>
>  #include <linux/uuid.h>
> +#include <linux/quotaops.h>
>  
>  #include <linux/uaccess.h>
>  
> @@ -116,11 +117,13 @@ struct shmem_options {
>  	int huge;
>  	int seen;
>  	bool noswap;
> +	unsigned short quota_types;
>  #define SHMEM_SEEN_BLOCKS 1
>  #define SHMEM_SEEN_INODES 2
>  #define SHMEM_SEEN_HUGE 4
>  #define SHMEM_SEEN_INUMS 8
>  #define SHMEM_SEEN_NOSWAP 16
> +#define SHMEM_SEEN_QUOTA 32
>  };
>  
>  #ifdef CONFIG_TMPFS
> @@ -212,7 +215,16 @@ static inline int shmem_inode_acct_block(struct inode *inode, long pages)
>  		if (percpu_counter_compare(&sbinfo->used_blocks,
>  					   sbinfo->max_blocks - pages) > 0)
>  			goto unacct;
> +
> +		err = dquot_alloc_block_nodirty(inode, pages);
> +		if (err)
> +			goto unacct;
> +
>  		percpu_counter_add(&sbinfo->used_blocks, pages);
> +	} else {
> +		err = dquot_alloc_block_nodirty(inode, pages);
> +		if (err)
> +			goto unacct;
>  	}
>  
>  	return 0;
> @@ -227,6 +239,8 @@ static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
>  
> +	dquot_free_block_nodirty(inode, pages);
> +
>  	if (sbinfo->max_blocks)
>  		percpu_counter_sub(&sbinfo->used_blocks, pages);
>  	shmem_unacct_blocks(info->flags, pages);
> @@ -255,6 +269,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
>  static LIST_HEAD(shmem_swaplist);
>  static DEFINE_MUTEX(shmem_swaplist_mutex);
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +
> +static int shmem_enable_quotas(struct super_block *sb,
> +			       unsigned short quota_types)
> +{
> +	int type, err = 0;
> +
> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> +	for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
> +		if (!(quota_types & (1 << type)))
> +			continue;
> +		err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
> +					  DQUOT_USAGE_ENABLED |
> +					  DQUOT_LIMITS_ENABLED);
> +		if (err)
> +			goto out_err;
> +	}
> +	return 0;
> +
> +out_err:
> +	pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
> +		type, err);
> +	for (type--; type >= 0; type--)
> +		dquot_quota_off(sb, type);
> +	return err;
> +}
> +
> +static void shmem_disable_quotas(struct super_block *sb)
> +{
> +	int type;
> +
> +	for (type = 0; type < SHMEM_MAXQUOTAS; type++)
> +		dquot_quota_off(sb, type);
> +}
> +
> +static struct dquot **shmem_get_dquots(struct inode *inode)
> +{
> +	return SHMEM_I(inode)->i_dquot;
> +}
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  /*
>   * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
>   * produces a novel ino for the newly allocated inode.
> @@ -361,7 +416,6 @@ static void shmem_recalc_inode(struct inode *inode)
>  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
>  	if (freed > 0) {
>  		info->alloced -= freed;
> -		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
>  		shmem_inode_unacct_blocks(inode, freed);
>  	}
>  }
> @@ -379,7 +433,6 @@ bool shmem_charge(struct inode *inode, long pages)
>  
>  	spin_lock_irqsave(&info->lock, flags);
>  	info->alloced += pages;
> -	inode->i_blocks += pages * BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irqrestore(&info->lock, flags);
>  
> @@ -395,7 +448,6 @@ void shmem_uncharge(struct inode *inode, long pages)
>  
>  	spin_lock_irqsave(&info->lock, flags);
>  	info->alloced -= pages;
> -	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irqrestore(&info->lock, flags);
>  
> @@ -1141,6 +1193,21 @@ static int shmem_setattr(struct mnt_idmap *idmap,
>  		}
>  	}
>  
> +	if (is_quota_modification(idmap, inode, attr)) {
> +		error = dquot_initialize(inode);
> +		if (error)
> +			return error;
> +	}
> +
> +	/* Transfer quota accounting */
> +	if (i_uid_needs_update(idmap, attr, inode) ||
> +	    i_gid_needs_update(idmap, attr, inode)) {
> +		error = dquot_transfer(idmap, inode, attr);
> +
> +		if (error)
> +			return error;
> +	}
> +
>  	setattr_copy(idmap, inode, attr);
>  	if (attr->ia_valid & ATTR_MODE)
>  		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
> @@ -1187,6 +1254,10 @@ static void shmem_evict_inode(struct inode *inode)
>  	WARN_ON(inode->i_blocks);
>  	shmem_free_inode(inode->i_sb);
>  	clear_inode(inode);
> +#ifdef CONFIG_TMPFS_QUOTA
> +	dquot_free_inode(inode);
> +	dquot_drop(inode);
> +#endif
>  }
>  
>  static int shmem_find_swap_entries(struct address_space *mapping,
> @@ -1986,7 +2057,6 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
>  
>  	spin_lock_irq(&info->lock);
>  	info->alloced += folio_nr_pages(folio);
> -	inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irq(&info->lock);
>  	alloced = true;
> @@ -2357,9 +2427,10 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
>  #define shmem_initxattrs NULL
>  #endif
>  
> -static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
> -				     struct inode *dir, umode_t mode, dev_t dev,
> -				     unsigned long flags)
> +static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
> +					     struct super_block *sb,
> +					     struct inode *dir, umode_t mode,
> +					     dev_t dev, unsigned long flags)
>  {
>  	struct inode *inode;
>  	struct shmem_inode_info *info;
> @@ -2436,6 +2507,43 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
>  	return inode;
>  }
>  
> +#ifdef CONFIG_TMPFS_QUOTA
> +static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
> +				     struct super_block *sb, struct inode *dir,
> +				     umode_t mode, dev_t dev, unsigned long flags)
> +{
> +	int err;
> +	struct inode *inode;
> +
> +	inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
> +	if (IS_ERR(inode))
> +		return inode;
> +
> +	err = dquot_initialize(inode);
> +	if (err)
> +		goto errout;
> +
> +	err = dquot_alloc_inode(inode);
> +	if (err) {
> +		dquot_drop(inode);
> +		goto errout;
> +	}
> +	return inode;
> +
> +errout:
> +	inode->i_flags |= S_NOQUOTA;
> +	iput(inode);
> +	return ERR_PTR(err);
> +}
> +#else
> +static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
> +				     struct super_block *sb, struct inode *dir,
> +				     umode_t mode, dev_t dev, unsigned long flags)
> +{
> +	return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
> +}
> +#endif /* CONFIG_TMPFS_QUOTA */
> +
>  #ifdef CONFIG_USERFAULTFD
>  int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  			   struct vm_area_struct *dst_vma,
> @@ -2538,7 +2646,6 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
>  
>  	spin_lock_irq(&info->lock);
>  	info->alloced++;
> -	inode->i_blocks += BLOCKS_PER_PAGE;
>  	shmem_recalc_inode(inode);
>  	spin_unlock_irq(&info->lock);
>  
> @@ -3516,6 +3623,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
>  
>  static const struct inode_operations shmem_short_symlink_operations = {
>  	.getattr	= shmem_getattr,
> +	.setattr	= shmem_setattr,
>  	.get_link	= simple_get_link,
>  #ifdef CONFIG_TMPFS_XATTR
>  	.listxattr	= shmem_listxattr,
> @@ -3524,6 +3632,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
>  
>  static const struct inode_operations shmem_symlink_inode_operations = {
>  	.getattr	= shmem_getattr,
> +	.setattr	= shmem_setattr,
>  	.get_link	= shmem_get_link,
>  #ifdef CONFIG_TMPFS_XATTR
>  	.listxattr	= shmem_listxattr,
> @@ -3623,6 +3732,9 @@ enum shmem_param {
>  	Opt_inode32,
>  	Opt_inode64,
>  	Opt_noswap,
> +	Opt_quota,
> +	Opt_usrquota,
> +	Opt_grpquota,
>  };
>  
>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -3645,6 +3757,11 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_flag  ("inode32",	Opt_inode32),
>  	fsparam_flag  ("inode64",	Opt_inode64),
>  	fsparam_flag  ("noswap",	Opt_noswap),
> +#ifdef CONFIG_TMPFS_QUOTA
> +	fsparam_flag  ("quota",		Opt_quota),
> +	fsparam_flag  ("usrquota",	Opt_usrquota),
> +	fsparam_flag  ("grpquota",	Opt_grpquota),
> +#endif
>  	{}
>  };
>  
> @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  		ctx->noswap = true;
>  		ctx->seen |= SHMEM_SEEN_NOSWAP;
>  		break;
> +	case Opt_quota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> +		break;
> +	case Opt_usrquota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= QTYPE_MASK_USR;
> +		break;
> +	case Opt_grpquota:
> +		ctx->seen |= SHMEM_SEEN_QUOTA;
> +		ctx->quota_types |= QTYPE_MASK_GRP;
> +		break;
>  	}
>  	return 0;

I mentioned this in an earlier review; following the sequence:

if (ctx->seen & SHMEM_SEEN_QUOTA)
-> shmem_enable_quotas()
   -> dquot_load_quota_sb()

to then figure out that in dquot_load_quota_sb() we fail if
sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
mountable by unprivileged users. Every few months someone will end up
stumbling upon this code and wonder where it's blocked. There isn't even
a comment in the code.

Aside from that it's also really unfriendly to users because they may go
through setting up a tmpfs instances in the following way:

        fd_fs = fsopen("tmpfs");

User now enables quota:

        fsconfig(fd_fs, ..., "quota", ...) = 0

and goes on to set a bunch of other options:

        fsconfig(fd_fs, ..., "inode64", ...) = 0
        fsconfig(fd_fs, ..., "nr_inodes", ...) = 0
        fsconfig(fd_fs, ..., "nr_blocks", ...) = 0
        fsconfig(fd_fs, ..., "huge", ...) = 0
        fsconfig(fd_fs, ..., "mode", ...) = 0
        fsconfig(fd_fs, ..., "gid", ...) = 0

everything seems dandy and they create the superblock:

        fsconfig(fd_fs, FSCONFIG_CMD_CREATE, ...) = -EINVAL

which fails.

The user has not just performed 9 useless system calls they also have
zero clue what mount option caused the failure.

What this code really really should do is fail at:

        fsconfig(fd_fs, ..., "quota", ...) = -EINVAL

and log an error that the user can retrieve from the fs context. IOW,

diff --git a/mm/shmem.c b/mm/shmem.c
index 083ce6b478e7..baca8bf44569 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3863,14 +3863,20 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                ctx->seen |= SHMEM_SEEN_NOSWAP;
                break;
        case Opt_quota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
                break;
        case Opt_usrquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_USR;
                break;
        case Opt_grpquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_GRP;
                break;

This exactly what we already to for the "noswap" option btw.

Could you fold these changes into the patch and resend, please?
I synced with Andrew earlier and I'll be taking this series.

---

And btw, the *_SEEN_* logic for mount options is broken - but that's not
specific to your patch. Imagine:

        fd_fs = fsopen("tmpfs");
        fsconfig(fd_fs, ..., "nr_inodes", 0, "1000") = 0

Now ctx->inodes == 1000 and ctx->seen |= SHMEM_SEEN_INODES.

Now the user does:

        fsconfig(fd_fs, ..., "nr_inodes", 0, "-1234") = -EINVAL

This fails, but:

        ctx->inodes = memparse(param->string, &rest);
        if (*rest)
                goto bad_value;

will set ctx->inodes to whatever memparse returns but leaves
SHMEM_SEEN_INODES raised in ctx->seen. Now superblock creation may
succeed with a garbage inode limit. This should affect other mount
options as well.
Carlos Maiolino July 14, 2023, 10:40 a.m. UTC | #2
Hi Christian.

> > @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> >  		ctx->noswap = true;
> >  		ctx->seen |= SHMEM_SEEN_NOSWAP;
> >  		break;
> > +	case Opt_quota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> > +		break;
> > +	case Opt_usrquota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= QTYPE_MASK_USR;
> > +		break;
> > +	case Opt_grpquota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= QTYPE_MASK_GRP;
> > +		break;
> >  	}
> >  	return 0;
> 
> I mentioned this in an earlier review; following the sequence:

Ok, my apologies, I should have lost it in the noise.

> 
> if (ctx->seen & SHMEM_SEEN_QUOTA)
> -> shmem_enable_quotas()
>    -> dquot_load_quota_sb()
> 
> to then figure out that in dquot_load_quota_sb() we fail if
> sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
> mountable by unprivileged users. Every few months someone will end up
> stumbling upon this code and wonder where it's blocked. There isn't even
> a comment in the code.
> 
> Aside from that it's also really unfriendly to users because they may go
> through setting up a tmpfs instances in the following way:
> 
>         fd_fs = fsopen("tmpfs");
> 
> User now enables quota:
> 
>         fsconfig(fd_fs, ..., "quota", ...) = 0
> 
> and goes on to set a bunch of other options:
> 
>         fsconfig(fd_fs, ..., "inode64", ...) = 0
>         fsconfig(fd_fs, ..., "nr_inodes", ...) = 0
>         fsconfig(fd_fs, ..., "nr_blocks", ...) = 0
>         fsconfig(fd_fs, ..., "huge", ...) = 0
>         fsconfig(fd_fs, ..., "mode", ...) = 0
>         fsconfig(fd_fs, ..., "gid", ...) = 0
> 
> everything seems dandy and they create the superblock:
> 
>         fsconfig(fd_fs, FSCONFIG_CMD_CREATE, ...) = -EINVAL
> 
> which fails.
> 
> The user has not just performed 9 useless system calls they also have
> zero clue what mount option caused the failure.
> 
> What this code really really should do is fail at:
> 
>         fsconfig(fd_fs, ..., "quota", ...) = -EINVAL
> 
> and log an error that the user can retrieve from the fs context. IOW,
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 083ce6b478e7..baca8bf44569 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -3863,14 +3863,20 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>                 ctx->seen |= SHMEM_SEEN_NOSWAP;
>                 break;
>         case Opt_quota:
> +               if (fc->user_ns != &init_user_ns)
> +                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
>                 ctx->seen |= SHMEM_SEEN_QUOTA;
>                 ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
>                 break;
>         case Opt_usrquota:
> +               if (fc->user_ns != &init_user_ns)
> +                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
>                 ctx->seen |= SHMEM_SEEN_QUOTA;
>                 ctx->quota_types |= QTYPE_MASK_USR;
>                 break;
>         case Opt_grpquota:
> +               if (fc->user_ns != &init_user_ns)
> +                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts unsupported");
>                 ctx->seen |= SHMEM_SEEN_QUOTA;
>                 ctx->quota_types |= QTYPE_MASK_GRP;
>                 break;
> 
> This exactly what we already to for the "noswap" option btw.
> 
> Could you fold these changes into the patch and resend, please?
> I synced with Andrew earlier and I'll be taking this series.

Thanks! I will sure do it, I'll update the patch, build, test and send it again
in a few minutes.

> 
> ---
> 
> And btw, the *_SEEN_* logic for mount options is broken - but that's not
> specific to your patch. Imagine:
> 
>         fd_fs = fsopen("tmpfs");
>         fsconfig(fd_fs, ..., "nr_inodes", 0, "1000") = 0
> 
> Now ctx->inodes == 1000 and ctx->seen |= SHMEM_SEEN_INODES.
> 
> Now the user does:
> 
>         fsconfig(fd_fs, ..., "nr_inodes", 0, "-1234") = -EINVAL
> 
> This fails, but:
> 
>         ctx->inodes = memparse(param->string, &rest);
>         if (*rest)
>                 goto bad_value;
> 
> will set ctx->inodes to whatever memparse returns but leaves
> SHMEM_SEEN_INODES raised in ctx->seen. Now superblock creation may
> succeed with a garbage inode limit. This should affect other mount
> options as well.

Interesting. Thanks for the heads up. I'll look in more details into it when I
start working for namespace support for quotas (as we spoke previously).

Cheers.
Carlos Maiolino July 14, 2023, 12:26 p.m. UTC | #3
> >
> > @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> >  		ctx->noswap = true;
> >  		ctx->seen |= SHMEM_SEEN_NOSWAP;
> >  		break;
> > +	case Opt_quota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> > +		break;
> > +	case Opt_usrquota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= QTYPE_MASK_USR;
> > +		break;
> > +	case Opt_grpquota:
> > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > +		ctx->quota_types |= QTYPE_MASK_GRP;
> > +		break;
> >  	}
> >  	return 0;
> 
> I mentioned this in an earlier review; following the sequence:
> 
> if (ctx->seen & SHMEM_SEEN_QUOTA)
> -> shmem_enable_quotas()
>    -> dquot_load_quota_sb()
> 
> to then figure out that in dquot_load_quota_sb() we fail if
> sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
> mountable by unprivileged users. Every few months someone will end up
> stumbling upon this code and wonder where it's blocked. There isn't even
> a comment in the code.

I was just going to rebase these updated changes on top of linux-next, and I
realized the patches are already there. Wouldn't it be better if I send a
follow-up patch on top of linux-next, applying these changes, as a Fixes: tag?
Christian Brauner July 14, 2023, 1:48 p.m. UTC | #4
On Fri, Jul 14, 2023 at 02:26:44PM +0200, Carlos Maiolino wrote:
> > >
> > > @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> > >  		ctx->noswap = true;
> > >  		ctx->seen |= SHMEM_SEEN_NOSWAP;
> > >  		break;
> > > +	case Opt_quota:
> > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> > > +		break;
> > > +	case Opt_usrquota:
> > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > +		ctx->quota_types |= QTYPE_MASK_USR;
> > > +		break;
> > > +	case Opt_grpquota:
> > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > +		ctx->quota_types |= QTYPE_MASK_GRP;
> > > +		break;
> > >  	}
> > >  	return 0;
> > 
> > I mentioned this in an earlier review; following the sequence:
> > 
> > if (ctx->seen & SHMEM_SEEN_QUOTA)
> > -> shmem_enable_quotas()
> >    -> dquot_load_quota_sb()
> > 
> > to then figure out that in dquot_load_quota_sb() we fail if
> > sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
> > mountable by unprivileged users. Every few months someone will end up
> > stumbling upon this code and wonder where it's blocked. There isn't even
> > a comment in the code.
> 
> I was just going to rebase these updated changes on top of linux-next, and I
> realized the patches are already there. Wouldn't it be better if I send a
> follow-up patch on top of linux-next, applying these changes, as a Fixes: tag?

I would just resend and fold the fix into the patch. There's no good
reason to make this a separate patch imho.
Carlos Maiolino July 14, 2023, 2:47 p.m. UTC | #5
On Fri, Jul 14, 2023 at 03:48:12PM +0200, Christian Brauner wrote:
> On Fri, Jul 14, 2023 at 02:26:44PM +0200, Carlos Maiolino wrote:
> > > >
> > > > @@ -3736,6 +3853,18 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> > > >  		ctx->noswap = true;
> > > >  		ctx->seen |= SHMEM_SEEN_NOSWAP;
> > > >  		break;
> > > > +	case Opt_quota:
> > > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > > +		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
> > > > +		break;
> > > > +	case Opt_usrquota:
> > > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > > +		ctx->quota_types |= QTYPE_MASK_USR;
> > > > +		break;
> > > > +	case Opt_grpquota:
> > > > +		ctx->seen |= SHMEM_SEEN_QUOTA;
> > > > +		ctx->quota_types |= QTYPE_MASK_GRP;
> > > > +		break;
> > > >  	}
> > > >  	return 0;
> > >
> > > I mentioned this in an earlier review; following the sequence:
> > >
> > > if (ctx->seen & SHMEM_SEEN_QUOTA)
> > > -> shmem_enable_quotas()
> > >    -> dquot_load_quota_sb()
> > >
> > > to then figure out that in dquot_load_quota_sb() we fail if
> > > sb->s_user_ns != &init_user_ns is too subtle for a filesystem that's
> > > mountable by unprivileged users. Every few months someone will end up
> > > stumbling upon this code and wonder where it's blocked. There isn't even
> > > a comment in the code.
> >
> > I was just going to rebase these updated changes on top of linux-next, and I
> > realized the patches are already there. Wouldn't it be better if I send a
> > follow-up patch on top of linux-next, applying these changes, as a Fixes: tag?
> 
> I would just resend and fold the fix into the patch. There's no good
> reason to make this a separate patch imho.

sounds good. Thanks
diff mbox series

Patch

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index f18f46be5c0c..0c7d8bd052f1 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -130,6 +130,21 @@  for emergency or testing purposes. The values you can set for shmem_enabled are:
     option, for testing
 ==  ============================================================
 
+tmpfs also supports quota with the following mount options
+
+========  =============================================================
+quota     User and group quota accounting and enforcement is enabled on
+          the mount. Tmpfs is using hidden system quota files that are
+          initialized on mount.
+usrquota  User quota accounting and enforcement is enabled on the
+          mount.
+grpquota  Group quota accounting and enforcement is enabled on the
+          mount.
+========  =============================================================
+
+Note that tmpfs quotas do not support user namespaces so no uid/gid
+translation is done if quotas are enabled inside user namespaces.
+
 tmpfs has a mount option to set the NUMA memory allocation policy for
 all files in that instance (if CONFIG_NUMA is enabled) - which can be
 adjusted on the fly via 'mount -o remount ...'
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 7abfaf70b58a..1a568a0f542f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -31,6 +31,9 @@  struct shmem_inode_info {
 	atomic_t		stop_eviction;	/* hold when working on inode */
 	struct timespec64	i_crtime;	/* file creation time */
 	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+	struct dquot		*i_dquot[MAXQUOTAS];
+#endif
 	struct inode		vfs_inode;
 };
 
@@ -184,4 +187,9 @@  extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 #define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
 #define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
 
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 2a7b8060b6f4..5022238dd68d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -78,6 +78,7 @@  static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/rmap.h>
 #include <linux/uuid.h>
+#include <linux/quotaops.h>
 
 #include <linux/uaccess.h>
 
@@ -116,11 +117,13 @@  struct shmem_options {
 	int huge;
 	int seen;
 	bool noswap;
+	unsigned short quota_types;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
 #define SHMEM_SEEN_NOSWAP 16
+#define SHMEM_SEEN_QUOTA 32
 };
 
 #ifdef CONFIG_TMPFS
@@ -212,7 +215,16 @@  static inline int shmem_inode_acct_block(struct inode *inode, long pages)
 		if (percpu_counter_compare(&sbinfo->used_blocks,
 					   sbinfo->max_blocks - pages) > 0)
 			goto unacct;
+
+		err = dquot_alloc_block_nodirty(inode, pages);
+		if (err)
+			goto unacct;
+
 		percpu_counter_add(&sbinfo->used_blocks, pages);
+	} else {
+		err = dquot_alloc_block_nodirty(inode, pages);
+		if (err)
+			goto unacct;
 	}
 
 	return 0;
@@ -227,6 +239,8 @@  static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
+	dquot_free_block_nodirty(inode, pages);
+
 	if (sbinfo->max_blocks)
 		percpu_counter_sub(&sbinfo->used_blocks, pages);
 	shmem_unacct_blocks(info->flags, pages);
@@ -255,6 +269,47 @@  bool vma_is_shmem(struct vm_area_struct *vma)
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
+#ifdef CONFIG_TMPFS_QUOTA
+
+static int shmem_enable_quotas(struct super_block *sb,
+			       unsigned short quota_types)
+{
+	int type, err = 0;
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+	for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
+		if (!(quota_types & (1 << type)))
+			continue;
+		err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
+					  DQUOT_USAGE_ENABLED |
+					  DQUOT_LIMITS_ENABLED);
+		if (err)
+			goto out_err;
+	}
+	return 0;
+
+out_err:
+	pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
+		type, err);
+	for (type--; type >= 0; type--)
+		dquot_quota_off(sb, type);
+	return err;
+}
+
+static void shmem_disable_quotas(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < SHMEM_MAXQUOTAS; type++)
+		dquot_quota_off(sb, type);
+}
+
+static struct dquot **shmem_get_dquots(struct inode *inode)
+{
+	return SHMEM_I(inode)->i_dquot;
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 /*
  * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
  * produces a novel ino for the newly allocated inode.
@@ -361,7 +416,6 @@  static void shmem_recalc_inode(struct inode *inode)
 	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 	if (freed > 0) {
 		info->alloced -= freed;
-		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
 		shmem_inode_unacct_blocks(inode, freed);
 	}
 }
@@ -379,7 +433,6 @@  bool shmem_charge(struct inode *inode, long pages)
 
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced += pages;
-	inode->i_blocks += pages * BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irqrestore(&info->lock, flags);
 
@@ -395,7 +448,6 @@  void shmem_uncharge(struct inode *inode, long pages)
 
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced -= pages;
-	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irqrestore(&info->lock, flags);
 
@@ -1141,6 +1193,21 @@  static int shmem_setattr(struct mnt_idmap *idmap,
 		}
 	}
 
+	if (is_quota_modification(idmap, inode, attr)) {
+		error = dquot_initialize(inode);
+		if (error)
+			return error;
+	}
+
+	/* Transfer quota accounting */
+	if (i_uid_needs_update(idmap, attr, inode) ||
+	    i_gid_needs_update(idmap, attr, inode)) {
+		error = dquot_transfer(idmap, inode, attr);
+
+		if (error)
+			return error;
+	}
+
 	setattr_copy(idmap, inode, attr);
 	if (attr->ia_valid & ATTR_MODE)
 		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
@@ -1187,6 +1254,10 @@  static void shmem_evict_inode(struct inode *inode)
 	WARN_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	clear_inode(inode);
+#ifdef CONFIG_TMPFS_QUOTA
+	dquot_free_inode(inode);
+	dquot_drop(inode);
+#endif
 }
 
 static int shmem_find_swap_entries(struct address_space *mapping,
@@ -1986,7 +2057,6 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 
 	spin_lock_irq(&info->lock);
 	info->alloced += folio_nr_pages(folio);
-	inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
 	shmem_recalc_inode(inode);
 	spin_unlock_irq(&info->lock);
 	alloced = true;
@@ -2357,9 +2427,10 @@  static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
 #define shmem_initxattrs NULL
 #endif
 
-static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
-				     struct inode *dir, umode_t mode, dev_t dev,
-				     unsigned long flags)
+static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
+					     struct super_block *sb,
+					     struct inode *dir, umode_t mode,
+					     dev_t dev, unsigned long flags)
 {
 	struct inode *inode;
 	struct shmem_inode_info *info;
@@ -2436,6 +2507,43 @@  static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 	return inode;
 }
 
+#ifdef CONFIG_TMPFS_QUOTA
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+				     struct super_block *sb, struct inode *dir,
+				     umode_t mode, dev_t dev, unsigned long flags)
+{
+	int err;
+	struct inode *inode;
+
+	inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+	if (IS_ERR(inode))
+		return inode;
+
+	err = dquot_initialize(inode);
+	if (err)
+		goto errout;
+
+	err = dquot_alloc_inode(inode);
+	if (err) {
+		dquot_drop(inode);
+		goto errout;
+	}
+	return inode;
+
+errout:
+	inode->i_flags |= S_NOQUOTA;
+	iput(inode);
+	return ERR_PTR(err);
+}
+#else
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+				     struct super_block *sb, struct inode *dir,
+				     umode_t mode, dev_t dev, unsigned long flags)
+{
+	return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 #ifdef CONFIG_USERFAULTFD
 int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 			   struct vm_area_struct *dst_vma,
@@ -2538,7 +2646,6 @@  int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 
 	spin_lock_irq(&info->lock);
 	info->alloced++;
-	inode->i_blocks += BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irq(&info->lock);
 
@@ -3516,6 +3623,7 @@  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 static const struct inode_operations shmem_short_symlink_operations = {
 	.getattr	= shmem_getattr,
+	.setattr	= shmem_setattr,
 	.get_link	= simple_get_link,
 #ifdef CONFIG_TMPFS_XATTR
 	.listxattr	= shmem_listxattr,
@@ -3524,6 +3632,7 @@  static const struct inode_operations shmem_short_symlink_operations = {
 
 static const struct inode_operations shmem_symlink_inode_operations = {
 	.getattr	= shmem_getattr,
+	.setattr	= shmem_setattr,
 	.get_link	= shmem_get_link,
 #ifdef CONFIG_TMPFS_XATTR
 	.listxattr	= shmem_listxattr,
@@ -3623,6 +3732,9 @@  enum shmem_param {
 	Opt_inode32,
 	Opt_inode64,
 	Opt_noswap,
+	Opt_quota,
+	Opt_usrquota,
+	Opt_grpquota,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3645,6 +3757,11 @@  const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_flag  ("inode32",	Opt_inode32),
 	fsparam_flag  ("inode64",	Opt_inode64),
 	fsparam_flag  ("noswap",	Opt_noswap),
+#ifdef CONFIG_TMPFS_QUOTA
+	fsparam_flag  ("quota",		Opt_quota),
+	fsparam_flag  ("usrquota",	Opt_usrquota),
+	fsparam_flag  ("grpquota",	Opt_grpquota),
+#endif
 	{}
 };
 
@@ -3736,6 +3853,18 @@  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		ctx->noswap = true;
 		ctx->seen |= SHMEM_SEEN_NOSWAP;
 		break;
+	case Opt_quota:
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
+		break;
+	case Opt_usrquota:
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= QTYPE_MASK_USR;
+		break;
+	case Opt_grpquota:
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= QTYPE_MASK_GRP;
+		break;
 	}
 	return 0;
 
@@ -3843,6 +3972,12 @@  static int shmem_reconfigure(struct fs_context *fc)
 		goto out;
 	}
 
+	if (ctx->seen & SHMEM_SEEN_QUOTA &&
+	    !sb_any_quota_loaded(fc->root->d_sb)) {
+		err = "Cannot enable quota on remount";
+		goto out;
+	}
+
 	if (ctx->seen & SHMEM_SEEN_HUGE)
 		sbinfo->huge = ctx->huge;
 	if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -3934,6 +4069,9 @@  static void shmem_put_super(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+#ifdef CONFIG_TMPFS_QUOTA
+	shmem_disable_quotas(sb);
+#endif
 	free_percpu(sbinfo->ino_batch);
 	percpu_counter_destroy(&sbinfo->used_blocks);
 	mpol_put(sbinfo->mpol);
@@ -4013,6 +4151,17 @@  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 #endif
 	uuid_gen(&sb->s_uuid);
 
+#ifdef CONFIG_TMPFS_QUOTA
+	if (ctx->seen & SHMEM_SEEN_QUOTA) {
+		sb->dq_op = &shmem_quota_operations;
+		sb->s_qcop = &dquot_quotactl_sysfile_ops;
+		sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+
+		if (shmem_enable_quotas(sb, ctx->quota_types))
+			goto failed;
+	}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 	inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
 				VM_NORESERVE);
 	if (IS_ERR(inode)) {
@@ -4188,6 +4337,9 @@  static const struct super_operations shmem_ops = {
 #ifdef CONFIG_TMPFS
 	.statfs		= shmem_statfs,
 	.show_options	= shmem_show_options,
+#endif
+#ifdef CONFIG_TMPFS_QUOTA
+	.get_dquots	= shmem_get_dquots,
 #endif
 	.evict_inode	= shmem_evict_inode,
 	.drop_inode	= generic_delete_inode,
@@ -4254,6 +4406,14 @@  void __init shmem_init(void)
 
 	shmem_init_inodecache();
 
+#ifdef CONFIG_TMPFS_QUOTA
+	error = register_quota_format(&shmem_quota_format);
+	if (error < 0) {
+		pr_err("Could not register quota format\n");
+		goto out3;
+	}
+#endif
+
 	error = register_filesystem(&shmem_fs_type);
 	if (error) {
 		pr_err("Could not register tmpfs\n");
@@ -4278,6 +4438,10 @@  void __init shmem_init(void)
 out1:
 	unregister_filesystem(&shmem_fs_type);
 out2:
+#ifdef CONFIG_TMPFS_QUOTA
+	unregister_quota_format(&shmem_quota_format);
+out3:
+#endif
 	shmem_destroy_inodecache();
 	shm_mnt = ERR_PTR(error);
 }