Message ID | 20210806142916.jdwkb5bx62q5fwfo@linutronix.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | shmem: Use raw_spinlock_t for ->stat_lock | expand |
On Fri, 6 Aug 2021, Sebastian Andrzej Siewior wrote: > Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is > per-CPU. Access here is serialized by disabling preemption. If the pool is > empty, it gets reloaded from `->next_ino'. Access here is serialized by > ->stat_lock which is a spinlock_t and can not be acquired with disabled > preemption. > One way around it would make per-CPU ino_batch struct containing the inode > number a local_lock_t. > Another solution is to promote ->stat_lock to a raw_spinlock_t. The critical > sections are short. The mpol_put() must be moved outside of the critical > section to avoid invoking the destructor with disabled preemption. > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Hugh Dickins <hughd@google.com> I was a little worried at first by the use in shmem_reconfigure(), potentially around a percpu_counter_compare(), which in some cases does a sum across all cpus - not as short a critical section as the rest of them. But I see that __percpu_counter_sum() uses a raw_spinlock_t itself, and has done so for twelve years: if that one is good, then this use here must be good too. Thanks. > --- > include/linux/shmem_fs.h | 2 +- > mm/shmem.c | 31 +++++++++++++++++-------------- > 2 files changed, 18 insertions(+), 15 deletions(-) > > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h > index 8e775ce517bb3..0a8499fb9c3cb 100644 > --- a/include/linux/shmem_fs.h > +++ b/include/linux/shmem_fs.h > @@ -31,7 +31,7 @@ struct shmem_sb_info { > struct percpu_counter used_blocks; /* How many are allocated */ > unsigned long max_inodes; /* How many inodes are allowed */ > unsigned long free_inodes; /* How many are left for allocation */ > - spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ > + raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ > umode_t mode; /* Mount mode for root directory */ > unsigned char huge; /* Whether to try for hugepages */ > kuid_t uid; /* Mount uid for root directory */ > diff --git a/mm/shmem.c b/mm/shmem.c > index 70d9ce294bb49..1b95afc4483c1 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) > ino_t ino; > > if (!(sb->s_flags & SB_KERNMOUNT)) { > - spin_lock(&sbinfo->stat_lock); > + raw_spin_lock(&sbinfo->stat_lock); > if (sbinfo->max_inodes) { > if (!sbinfo->free_inodes) { > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > return -ENOSPC; > } > sbinfo->free_inodes--; > @@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) > } > *inop = ino; > } > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > } else if (inop) { > /* > * __shmem_file_setup, one of our callers, is lock-free: it > @@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) > * to worry about things like glibc compatibility. > */ > ino_t *next_ino; > + > next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); > ino = *next_ino; > if (unlikely(ino % SHMEM_INO_BATCH == 0)) { > - spin_lock(&sbinfo->stat_lock); > + raw_spin_lock(&sbinfo->stat_lock); > ino = sbinfo->next_ino; > sbinfo->next_ino += SHMEM_INO_BATCH; > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > if (unlikely(is_zero_ino(ino))) > ino++; > } > @@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb) > { > struct shmem_sb_info *sbinfo = SHMEM_SB(sb); > if (sbinfo->max_inodes) { > - spin_lock(&sbinfo->stat_lock); > + raw_spin_lock(&sbinfo->stat_lock); > sbinfo->free_inodes++; > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > } > } > > @@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) > { > struct mempolicy *mpol = NULL; > if (sbinfo->mpol) { > - spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ > + raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ > mpol = sbinfo->mpol; > mpol_get(mpol); > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > } > return mpol; > } > @@ -3500,9 +3501,10 @@ static int shmem_reconfigure(struct fs_context *fc) > struct shmem_options *ctx = fc->fs_private; > struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); > unsigned long inodes; > + struct mempolicy *mpol = NULL; > const char *err; > > - spin_lock(&sbinfo->stat_lock); > + raw_spin_lock(&sbinfo->stat_lock); > inodes = sbinfo->max_inodes - sbinfo->free_inodes; > if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { > if (!sbinfo->max_blocks) { > @@ -3547,14 +3549,15 @@ static int shmem_reconfigure(struct fs_context *fc) > * Preserve previous mempolicy unless mpol remount option was specified. > */ > if (ctx->mpol) { > - mpol_put(sbinfo->mpol); > + mpol = sbinfo->mpol; > sbinfo->mpol = ctx->mpol; /* transfers initial ref */ > ctx->mpol = NULL; > } > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > + mpol_put(mpol); > return 0; > out: > - spin_unlock(&sbinfo->stat_lock); > + raw_spin_unlock(&sbinfo->stat_lock); > return invalfc(fc, "%s", err); > } > > @@ -3671,7 +3674,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) > sbinfo->mpol = ctx->mpol; > ctx->mpol = NULL; > > - spin_lock_init(&sbinfo->stat_lock); > + raw_spin_lock_init(&sbinfo->stat_lock); > if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) > goto failed; > spin_lock_init(&sbinfo->shrinklist_lock); > -- > 2.32.0 > >
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 8e775ce517bb3..0a8499fb9c3cb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -31,7 +31,7 @@ struct shmem_sb_info { struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ - spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ + raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ diff --git a/mm/shmem.c b/mm/shmem.c index 70d9ce294bb49..1b95afc4483c1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) ino_t ino; if (!(sb->s_flags & SB_KERNMOUNT)) { - spin_lock(&sbinfo->stat_lock); + raw_spin_lock(&sbinfo->stat_lock); if (sbinfo->max_inodes) { if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); return -ENOSPC; } sbinfo->free_inodes--; @@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) } *inop = ino; } - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); } else if (inop) { /* * __shmem_file_setup, one of our callers, is lock-free: it @@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) * to worry about things like glibc compatibility. */ ino_t *next_ino; + next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); ino = *next_ino; if (unlikely(ino % SHMEM_INO_BATCH == 0)) { - spin_lock(&sbinfo->stat_lock); + raw_spin_lock(&sbinfo->stat_lock); ino = sbinfo->next_ino; sbinfo->next_ino += SHMEM_INO_BATCH; - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); if (unlikely(is_zero_ino(ino))) ino++; } @@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); + raw_spin_lock(&sbinfo->stat_lock); sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); } } @@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) { struct mempolicy *mpol = NULL; if (sbinfo->mpol) { - spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ + raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ mpol = sbinfo->mpol; mpol_get(mpol); - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); } return mpol; } @@ -3500,9 +3501,10 @@ static int shmem_reconfigure(struct fs_context *fc) struct shmem_options *ctx = fc->fs_private; struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); unsigned long inodes; + struct mempolicy *mpol = NULL; const char *err; - spin_lock(&sbinfo->stat_lock); + raw_spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { @@ -3547,14 +3549,15 @@ static int shmem_reconfigure(struct fs_context *fc) * Preserve previous mempolicy unless mpol remount option was specified. */ if (ctx->mpol) { - mpol_put(sbinfo->mpol); + mpol = sbinfo->mpol; sbinfo->mpol = ctx->mpol; /* transfers initial ref */ ctx->mpol = NULL; } - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); + mpol_put(mpol); return 0; out: - spin_unlock(&sbinfo->stat_lock); + raw_spin_unlock(&sbinfo->stat_lock); return invalfc(fc, "%s", err); } @@ -3671,7 +3674,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) sbinfo->mpol = ctx->mpol; ctx->mpol = NULL; - spin_lock_init(&sbinfo->stat_lock); + raw_spin_lock_init(&sbinfo->stat_lock); if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) goto failed; spin_lock_init(&sbinfo->shrinklist_lock);
Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is per-CPU. Access here is serialized by disabling preemption. If the pool is empty, it gets reloaded from `->next_ino'. Access here is serialized by ->stat_lock which is a spinlock_t and can not be acquired with disabled preemption. One way around it would make per-CPU ino_batch struct containing the inode number a local_lock_t. Another solution is to promote ->stat_lock to a raw_spinlock_t. The critical sections are short. The mpol_put() must be moved outside of the critical section to avoid invoking the destructor with disabled preemption. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/shmem_fs.h | 2 +- mm/shmem.c | 31 +++++++++++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-)