Message ID | 20220901221720.1105021-3-joel@joelfernandes.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement call_rcu_lazy() and miscellaneous fixes | expand |
On 9/2/22 00:17, Joel Fernandes (Google) wrote: > From: Vlastimil Babka <vbabka@suse.cz> > > Joel reports [1] that increasing the rcu_head size for debugging > purposes used to work before struct slab was split from struct page, but > now runs into the various SLAB_MATCH() sanity checks of the layout. > > This is because the rcu_head in struct page is in union with large > sub-structures and has space to grow without exceeding their size, while > in struct slab (for SLAB and SLUB) it's in union only with a list_head. > > On closer inspection (and after the previous patch) we can put all > fields except slab_cache to a union with rcu_head, as slab_cache is > sufficient for the rcu freeing callbacks to work and the rest can be > overwritten by rcu_head without causing issues. > > This is only somewhat complicated by the need to keep SLUB's > freelist+counters aligned for cmpxchg_double. As a result the fields > need to be reordered so that slab_cache is first (after page flags) and > the union with rcu_head follows. For consistency, do that for SLAB as > well, although not necessary there. > > As a result, the rcu_head field in struct page and struct slab is no > longer at the same offset, but that doesn't matter as there is no > casting that would rely on that in the slab freeing callbacks, so we can > just drop the respective SLAB_MATCH() check. > > Also we need to update the SLAB_MATCH() for compound_head to reflect the > new ordering. > > While at it, also add a static_assert to check the alignment needed for > cmpxchg_double so mistakes are found sooner than a runtime GPF. > > [1] https://lore.kernel.org/all/85afd876-d8bb-0804-b2c5-48ed3055e702@joelfernandes.org/ > > Reported-by: Joel Fernandes <joel@joelfernandes.org> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> I've added patches 01 and 02 to slab tree for -next exposure before Joel's full series posting, but it should be also ok if rcu tree carries them with the whole patchset. I can then drop them from slab tree (there are no dependencies with other stuff there) so we don't introduce duplicite commits needlessly, just give me a heads up. > --- > mm/slab.h | 54 ++++++++++++++++++++++++++++++++---------------------- > 1 file changed, 32 insertions(+), 22 deletions(-) > > diff --git a/mm/slab.h b/mm/slab.h > index 4ec82bec15ec..2c248864ea91 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -11,37 +11,43 @@ struct slab { > > #if defined(CONFIG_SLAB) > > + struct kmem_cache *slab_cache; > union { > - struct list_head slab_list; > + struct { > + struct list_head slab_list; > + void *freelist; /* array of free object indexes */ > + void *s_mem; /* first object */ > + }; > struct rcu_head rcu_head; > }; > - struct kmem_cache *slab_cache; > - void *freelist; /* array of free object indexes */ > - void *s_mem; /* first object */ > unsigned int active; > > #elif defined(CONFIG_SLUB) > > - union { > - struct list_head slab_list; > - struct rcu_head rcu_head; > -#ifdef CONFIG_SLUB_CPU_PARTIAL > - struct { > - struct slab *next; > - int slabs; /* Nr of slabs left */ > - }; > -#endif > - }; > struct kmem_cache *slab_cache; > - /* Double-word boundary */ > - void *freelist; /* first free object */ > union { > - unsigned long counters; > struct { > - unsigned inuse:16; > - unsigned objects:15; > - unsigned frozen:1; > + union { > + struct list_head slab_list; > +#ifdef CONFIG_SLUB_CPU_PARTIAL > + struct { > + struct slab *next; > + int slabs; /* Nr of slabs left */ > + }; > +#endif > + }; > + /* Double-word boundary */ > + void *freelist; /* first free object */ > + union { > + unsigned long counters; > + struct { > + unsigned inuse:16; > + unsigned objects:15; > + unsigned frozen:1; > + }; > + }; > }; > + struct rcu_head rcu_head; > }; > unsigned int __unused; > > @@ -66,9 +72,10 @@ struct slab { > #define SLAB_MATCH(pg, sl) \ > static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) > SLAB_MATCH(flags, __page_flags); > -SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ > #ifndef CONFIG_SLOB > -SLAB_MATCH(rcu_head, rcu_head); > +SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ > +#else > +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ > #endif > SLAB_MATCH(_refcount, __page_refcount); > #ifdef CONFIG_MEMCG > @@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data); > #endif > #undef SLAB_MATCH > static_assert(sizeof(struct slab) <= sizeof(struct page)); > +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB) > +static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 16)); > +#endif > > /** > * folio_slab - Converts from folio to slab.
On 9/2/22 11:26, Vlastimil Babka wrote: > On 9/2/22 00:17, Joel Fernandes (Google) wrote: >> From: Vlastimil Babka <vbabka@suse.cz> >> >> Joel reports [1] that increasing the rcu_head size for debugging >> purposes used to work before struct slab was split from struct page, but >> now runs into the various SLAB_MATCH() sanity checks of the layout. >> >> This is because the rcu_head in struct page is in union with large >> sub-structures and has space to grow without exceeding their size, while >> in struct slab (for SLAB and SLUB) it's in union only with a list_head. >> >> On closer inspection (and after the previous patch) we can put all >> fields except slab_cache to a union with rcu_head, as slab_cache is >> sufficient for the rcu freeing callbacks to work and the rest can be >> overwritten by rcu_head without causing issues. >> >> This is only somewhat complicated by the need to keep SLUB's >> freelist+counters aligned for cmpxchg_double. As a result the fields >> need to be reordered so that slab_cache is first (after page flags) and >> the union with rcu_head follows. For consistency, do that for SLAB as >> well, although not necessary there. >> >> As a result, the rcu_head field in struct page and struct slab is no >> longer at the same offset, but that doesn't matter as there is no >> casting that would rely on that in the slab freeing callbacks, so we can >> just drop the respective SLAB_MATCH() check. >> >> Also we need to update the SLAB_MATCH() for compound_head to reflect the >> new ordering. >> >> While at it, also add a static_assert to check the alignment needed for >> cmpxchg_double so mistakes are found sooner than a runtime GPF. >> >> [1] https://lore.kernel.org/all/85afd876-d8bb-0804-b2c5-48ed3055e702@joelfernandes.org/ >> >> Reported-by: Joel Fernandes <joel@joelfernandes.org> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > > I've added patches 01 and 02 to slab tree for -next exposure before Joel's > full series posting, but it should be also ok if rcu tree carries them with > the whole patchset. I can then drop them from slab tree (there are no > dependencies with other stuff there) so we don't introduce duplicite commits > needlessly, just give me a heads up. Ah but in that case please apply the reviews from my posting [1] patch 1: Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> patch 2 Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> [1] https://lore.kernel.org/all/20220826090912.11292-1-vbabka@suse.cz/
On 9/2/2022 5:30 AM, Vlastimil Babka wrote: > On 9/2/22 11:26, Vlastimil Babka wrote: >> On 9/2/22 00:17, Joel Fernandes (Google) wrote: >>> From: Vlastimil Babka <vbabka@suse.cz> >>> >>> Joel reports [1] that increasing the rcu_head size for debugging >>> purposes used to work before struct slab was split from struct page, but >>> now runs into the various SLAB_MATCH() sanity checks of the layout. >>> >>> This is because the rcu_head in struct page is in union with large >>> sub-structures and has space to grow without exceeding their size, while >>> in struct slab (for SLAB and SLUB) it's in union only with a list_head. >>> >>> On closer inspection (and after the previous patch) we can put all >>> fields except slab_cache to a union with rcu_head, as slab_cache is >>> sufficient for the rcu freeing callbacks to work and the rest can be >>> overwritten by rcu_head without causing issues. >>> >>> This is only somewhat complicated by the need to keep SLUB's >>> freelist+counters aligned for cmpxchg_double. As a result the fields >>> need to be reordered so that slab_cache is first (after page flags) and >>> the union with rcu_head follows. For consistency, do that for SLAB as >>> well, although not necessary there. >>> >>> As a result, the rcu_head field in struct page and struct slab is no >>> longer at the same offset, but that doesn't matter as there is no >>> casting that would rely on that in the slab freeing callbacks, so we can >>> just drop the respective SLAB_MATCH() check. >>> >>> Also we need to update the SLAB_MATCH() for compound_head to reflect the >>> new ordering. >>> >>> While at it, also add a static_assert to check the alignment needed for >>> cmpxchg_double so mistakes are found sooner than a runtime GPF. >>> >>> [1] https://lore.kernel.org/all/85afd876-d8bb-0804-b2c5-48ed3055e702@joelfernandes.org/ >>> >>> Reported-by: Joel Fernandes <joel@joelfernandes.org> >>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> >> >> I've added patches 01 and 02 to slab tree for -next exposure before Joel's >> full series posting, but it should be also ok if rcu tree carries them with >> the whole patchset. I can then drop them from slab tree (there are no >> dependencies with other stuff there) so we don't introduce duplicite commits >> needlessly, just give me a heads up. > > Ah but in that case please apply the reviews from my posting [1] > > patch 1: > Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > > patch 2 > Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > > [1] https://lore.kernel.org/all/20220826090912.11292-1-vbabka@suse.cz/ Sorry for injecting confusion - my main intent with including the mm patches in this series is to make it easier for other reviewers/testers to backport the series to their kernels in one shot. Some reviewers expressed interested in trying out the series. I think it is best to let the -mm patches in the series go through the slab tree, as you also have the Acks/Reviews there and will take sure those dependencies are out of the way. My lesson here is to be more clear, I could have added some notes for context below the "---" of those mm patches. Thanks again for your help, - Joel
On Fri, Sep 02, 2022 at 11:09:08AM -0400, Joel Fernandes wrote: > > > On 9/2/2022 5:30 AM, Vlastimil Babka wrote: > > On 9/2/22 11:26, Vlastimil Babka wrote: > >> On 9/2/22 00:17, Joel Fernandes (Google) wrote: > >>> From: Vlastimil Babka <vbabka@suse.cz> > >>> > >>> Joel reports [1] that increasing the rcu_head size for debugging > >>> purposes used to work before struct slab was split from struct page, but > >>> now runs into the various SLAB_MATCH() sanity checks of the layout. > >>> > >>> This is because the rcu_head in struct page is in union with large > >>> sub-structures and has space to grow without exceeding their size, while > >>> in struct slab (for SLAB and SLUB) it's in union only with a list_head. > >>> > >>> On closer inspection (and after the previous patch) we can put all > >>> fields except slab_cache to a union with rcu_head, as slab_cache is > >>> sufficient for the rcu freeing callbacks to work and the rest can be > >>> overwritten by rcu_head without causing issues. > >>> > >>> This is only somewhat complicated by the need to keep SLUB's > >>> freelist+counters aligned for cmpxchg_double. As a result the fields > >>> need to be reordered so that slab_cache is first (after page flags) and > >>> the union with rcu_head follows. For consistency, do that for SLAB as > >>> well, although not necessary there. > >>> > >>> As a result, the rcu_head field in struct page and struct slab is no > >>> longer at the same offset, but that doesn't matter as there is no > >>> casting that would rely on that in the slab freeing callbacks, so we can > >>> just drop the respective SLAB_MATCH() check. > >>> > >>> Also we need to update the SLAB_MATCH() for compound_head to reflect the > >>> new ordering. > >>> > >>> While at it, also add a static_assert to check the alignment needed for > >>> cmpxchg_double so mistakes are found sooner than a runtime GPF. > >>> > >>> [1] https://lore.kernel.org/all/85afd876-d8bb-0804-b2c5-48ed3055e702@joelfernandes.org/ > >>> > >>> Reported-by: Joel Fernandes <joel@joelfernandes.org> > >>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > >> > >> I've added patches 01 and 02 to slab tree for -next exposure before Joel's > >> full series posting, but it should be also ok if rcu tree carries them with > >> the whole patchset. I can then drop them from slab tree (there are no > >> dependencies with other stuff there) so we don't introduce duplicite commits > >> needlessly, just give me a heads up. > > > > Ah but in that case please apply the reviews from my posting [1] > > > > patch 1: > > Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > > > > patch 2 > > Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > > > > [1] https://lore.kernel.org/all/20220826090912.11292-1-vbabka@suse.cz/ > > > Sorry for injecting confusion - my main intent with including the mm patches in > this series is to make it easier for other reviewers/testers to backport the > series to their kernels in one shot. Some reviewers expressed interested in > trying out the series. > > I think it is best to let the -mm patches in the series go through the slab > tree, as you also have the Acks/Reviews there and will take sure those > dependencies are out of the way. > > My lesson here is to be more clear, I could have added some notes for context > below the "---" of those mm patches. > > Thanks again for your help, Hello, Vlastimil, and thank you for putting these together! I believe that your two patches should go in via the slab tree. I am queueing them in -rcu only temporarily and just for convenience in testing. I expect that I will rebase them so that I can let your versions cover things in -next. Thanx, Paul
diff --git a/mm/slab.h b/mm/slab.h index 4ec82bec15ec..2c248864ea91 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -11,37 +11,43 @@ struct slab { #if defined(CONFIG_SLAB) + struct kmem_cache *slab_cache; union { - struct list_head slab_list; + struct { + struct list_head slab_list; + void *freelist; /* array of free object indexes */ + void *s_mem; /* first object */ + }; struct rcu_head rcu_head; }; - struct kmem_cache *slab_cache; - void *freelist; /* array of free object indexes */ - void *s_mem; /* first object */ unsigned int active; #elif defined(CONFIG_SLUB) - union { - struct list_head slab_list; - struct rcu_head rcu_head; -#ifdef CONFIG_SLUB_CPU_PARTIAL - struct { - struct slab *next; - int slabs; /* Nr of slabs left */ - }; -#endif - }; struct kmem_cache *slab_cache; - /* Double-word boundary */ - void *freelist; /* first free object */ union { - unsigned long counters; struct { - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; + union { + struct list_head slab_list; +#ifdef CONFIG_SLUB_CPU_PARTIAL + struct { + struct slab *next; + int slabs; /* Nr of slabs left */ + }; +#endif + }; + /* Double-word boundary */ + void *freelist; /* first free object */ + union { + unsigned long counters; + struct { + unsigned inuse:16; + unsigned objects:15; + unsigned frozen:1; + }; + }; }; + struct rcu_head rcu_head; }; unsigned int __unused; @@ -66,9 +72,10 @@ struct slab { #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, __page_flags); -SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ #ifndef CONFIG_SLOB -SLAB_MATCH(rcu_head, rcu_head); +SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ +#else +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ #endif SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG @@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB) +static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 16)); +#endif /** * folio_slab - Converts from folio to slab.