diff mbox series

shmem: Use raw_spinlock_t for ->stat_lock

Message ID 20210806142916.jdwkb5bx62q5fwfo@linutronix.de (mailing list archive)
State New
Headers show
Series shmem: Use raw_spinlock_t for ->stat_lock | expand

Commit Message

Sebastian Andrzej Siewior Aug. 6, 2021, 2:29 p.m. UTC
Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is
per-CPU. Access here is serialized by disabling preemption. If the pool is
empty, it gets reloaded from `->next_ino'. Access here is serialized by
->stat_lock which is a spinlock_t and can not be acquired with disabled
preemption.
One way around it would make per-CPU ino_batch struct containing the inode
number a local_lock_t.
Another solution is to promote ->stat_lock to a raw_spinlock_t. The critical
sections are short. The mpol_put() must be moved outside of the critical
section to avoid invoking the destructor with disabled preemption.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/shmem_fs.h |  2 +-
 mm/shmem.c               | 31 +++++++++++++++++--------------
 2 files changed, 18 insertions(+), 15 deletions(-)

Comments

Hugh Dickins Aug. 11, 2021, 3:06 a.m. UTC | #1
On Fri, 6 Aug 2021, Sebastian Andrzej Siewior wrote:

> Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is
> per-CPU. Access here is serialized by disabling preemption. If the pool is
> empty, it gets reloaded from `->next_ino'. Access here is serialized by
> ->stat_lock which is a spinlock_t and can not be acquired with disabled
> preemption.
> One way around it would make per-CPU ino_batch struct containing the inode
> number a local_lock_t.
> Another solution is to promote ->stat_lock to a raw_spinlock_t. The critical
> sections are short. The mpol_put() must be moved outside of the critical
> section to avoid invoking the destructor with disabled preemption.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

Acked-by: Hugh Dickins <hughd@google.com>

I was a little worried at first by the use in shmem_reconfigure(),
potentially around a percpu_counter_compare(), which in some cases does a
sum across all cpus - not as short a critical section as the rest of them.

But I see that __percpu_counter_sum() uses a raw_spinlock_t itself, and
has done so for twelve years: if that one is good, then this use here
must be good too.

Thanks.

> ---
>  include/linux/shmem_fs.h |  2 +-
>  mm/shmem.c               | 31 +++++++++++++++++--------------
>  2 files changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 8e775ce517bb3..0a8499fb9c3cb 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -31,7 +31,7 @@ struct shmem_sb_info {
>  	struct percpu_counter used_blocks;  /* How many are allocated */
>  	unsigned long max_inodes;   /* How many inodes are allowed */
>  	unsigned long free_inodes;  /* How many are left for allocation */
> -	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
> +	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
>  	umode_t mode;		    /* Mount mode for root directory */
>  	unsigned char huge;	    /* Whether to try for hugepages */
>  	kuid_t uid;		    /* Mount uid for root directory */
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 70d9ce294bb49..1b95afc4483c1 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
>  	ino_t ino;
>  
>  	if (!(sb->s_flags & SB_KERNMOUNT)) {
> -		spin_lock(&sbinfo->stat_lock);
> +		raw_spin_lock(&sbinfo->stat_lock);
>  		if (sbinfo->max_inodes) {
>  			if (!sbinfo->free_inodes) {
> -				spin_unlock(&sbinfo->stat_lock);
> +				raw_spin_unlock(&sbinfo->stat_lock);
>  				return -ENOSPC;
>  			}
>  			sbinfo->free_inodes--;
> @@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
>  			}
>  			*inop = ino;
>  		}
> -		spin_unlock(&sbinfo->stat_lock);
> +		raw_spin_unlock(&sbinfo->stat_lock);
>  	} else if (inop) {
>  		/*
>  		 * __shmem_file_setup, one of our callers, is lock-free: it
> @@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
>  		 * to worry about things like glibc compatibility.
>  		 */
>  		ino_t *next_ino;
> +
>  		next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
>  		ino = *next_ino;
>  		if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
> -			spin_lock(&sbinfo->stat_lock);
> +			raw_spin_lock(&sbinfo->stat_lock);
>  			ino = sbinfo->next_ino;
>  			sbinfo->next_ino += SHMEM_INO_BATCH;
> -			spin_unlock(&sbinfo->stat_lock);
> +			raw_spin_unlock(&sbinfo->stat_lock);
>  			if (unlikely(is_zero_ino(ino)))
>  				ino++;
>  		}
> @@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
>  	if (sbinfo->max_inodes) {
> -		spin_lock(&sbinfo->stat_lock);
> +		raw_spin_lock(&sbinfo->stat_lock);
>  		sbinfo->free_inodes++;
> -		spin_unlock(&sbinfo->stat_lock);
> +		raw_spin_unlock(&sbinfo->stat_lock);
>  	}
>  }
>  
> @@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
>  {
>  	struct mempolicy *mpol = NULL;
>  	if (sbinfo->mpol) {
> -		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
> +		raw_spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
>  		mpol = sbinfo->mpol;
>  		mpol_get(mpol);
> -		spin_unlock(&sbinfo->stat_lock);
> +		raw_spin_unlock(&sbinfo->stat_lock);
>  	}
>  	return mpol;
>  }
> @@ -3500,9 +3501,10 @@ static int shmem_reconfigure(struct fs_context *fc)
>  	struct shmem_options *ctx = fc->fs_private;
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
>  	unsigned long inodes;
> +	struct mempolicy *mpol = NULL;
>  	const char *err;
>  
> -	spin_lock(&sbinfo->stat_lock);
> +	raw_spin_lock(&sbinfo->stat_lock);
>  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
>  	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
>  		if (!sbinfo->max_blocks) {
> @@ -3547,14 +3549,15 @@ static int shmem_reconfigure(struct fs_context *fc)
>  	 * Preserve previous mempolicy unless mpol remount option was specified.
>  	 */
>  	if (ctx->mpol) {
> -		mpol_put(sbinfo->mpol);
> +		mpol = sbinfo->mpol;
>  		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
>  		ctx->mpol = NULL;
>  	}
> -	spin_unlock(&sbinfo->stat_lock);
> +	raw_spin_unlock(&sbinfo->stat_lock);
> +	mpol_put(mpol);
>  	return 0;
>  out:
> -	spin_unlock(&sbinfo->stat_lock);
> +	raw_spin_unlock(&sbinfo->stat_lock);
>  	return invalfc(fc, "%s", err);
>  }
>  
> @@ -3671,7 +3674,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	sbinfo->mpol = ctx->mpol;
>  	ctx->mpol = NULL;
>  
> -	spin_lock_init(&sbinfo->stat_lock);
> +	raw_spin_lock_init(&sbinfo->stat_lock);
>  	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
>  		goto failed;
>  	spin_lock_init(&sbinfo->shrinklist_lock);
> -- 
> 2.32.0
> 
>
diff mbox series

Patch

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 8e775ce517bb3..0a8499fb9c3cb 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -31,7 +31,7 @@  struct shmem_sb_info {
 	struct percpu_counter used_blocks;  /* How many are allocated */
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
-	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
+	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
 	umode_t mode;		    /* Mount mode for root directory */
 	unsigned char huge;	    /* Whether to try for hugepages */
 	kuid_t uid;		    /* Mount uid for root directory */
diff --git a/mm/shmem.c b/mm/shmem.c
index 70d9ce294bb49..1b95afc4483c1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -278,10 +278,10 @@  static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 	ino_t ino;
 
 	if (!(sb->s_flags & SB_KERNMOUNT)) {
-		spin_lock(&sbinfo->stat_lock);
+		raw_spin_lock(&sbinfo->stat_lock);
 		if (sbinfo->max_inodes) {
 			if (!sbinfo->free_inodes) {
-				spin_unlock(&sbinfo->stat_lock);
+				raw_spin_unlock(&sbinfo->stat_lock);
 				return -ENOSPC;
 			}
 			sbinfo->free_inodes--;
@@ -304,7 +304,7 @@  static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 			}
 			*inop = ino;
 		}
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	} else if (inop) {
 		/*
 		 * __shmem_file_setup, one of our callers, is lock-free: it
@@ -319,13 +319,14 @@  static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 		 * to worry about things like glibc compatibility.
 		 */
 		ino_t *next_ino;
+
 		next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
 		ino = *next_ino;
 		if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
-			spin_lock(&sbinfo->stat_lock);
+			raw_spin_lock(&sbinfo->stat_lock);
 			ino = sbinfo->next_ino;
 			sbinfo->next_ino += SHMEM_INO_BATCH;
-			spin_unlock(&sbinfo->stat_lock);
+			raw_spin_unlock(&sbinfo->stat_lock);
 			if (unlikely(is_zero_ino(ino)))
 				ino++;
 		}
@@ -341,9 +342,9 @@  static void shmem_free_inode(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
+		raw_spin_lock(&sbinfo->stat_lock);
 		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 }
 
@@ -1453,10 +1454,10 @@  static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 {
 	struct mempolicy *mpol = NULL;
 	if (sbinfo->mpol) {
-		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
+		raw_spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
 		mpol = sbinfo->mpol;
 		mpol_get(mpol);
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 	return mpol;
 }
@@ -3500,9 +3501,10 @@  static int shmem_reconfigure(struct fs_context *fc)
 	struct shmem_options *ctx = fc->fs_private;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
 	unsigned long inodes;
+	struct mempolicy *mpol = NULL;
 	const char *err;
 
-	spin_lock(&sbinfo->stat_lock);
+	raw_spin_lock(&sbinfo->stat_lock);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
 	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
 		if (!sbinfo->max_blocks) {
@@ -3547,14 +3549,15 @@  static int shmem_reconfigure(struct fs_context *fc)
 	 * Preserve previous mempolicy unless mpol remount option was specified.
 	 */
 	if (ctx->mpol) {
-		mpol_put(sbinfo->mpol);
+		mpol = sbinfo->mpol;
 		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
 		ctx->mpol = NULL;
 	}
-	spin_unlock(&sbinfo->stat_lock);
+	raw_spin_unlock(&sbinfo->stat_lock);
+	mpol_put(mpol);
 	return 0;
 out:
-	spin_unlock(&sbinfo->stat_lock);
+	raw_spin_unlock(&sbinfo->stat_lock);
 	return invalfc(fc, "%s", err);
 }
 
@@ -3671,7 +3674,7 @@  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sbinfo->mpol = ctx->mpol;
 	ctx->mpol = NULL;
 
-	spin_lock_init(&sbinfo->stat_lock);
+	raw_spin_lock_init(&sbinfo->stat_lock);
 	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	spin_lock_init(&sbinfo->shrinklist_lock);