diff mbox series

[2/4] slab: Convert __kmalloc_large_node() and free_large_kmalloc() to use folios

Message ID 20231222202807.2135717-3-willy@infradead.org (mailing list archive)
State New
Headers show
Series Remove some lruvec page accounting functions | expand

Commit Message

Matthew Wilcox Dec. 22, 2023, 8:28 p.m. UTC
Add folio_alloc_node() to replace alloc_pages_node() and then use
folio APIs throughout instead of converting back to pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/gfp.h |  9 +++++++++
 mm/slub.c           | 15 +++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

Comments

Hyeonggon Yoo Dec. 22, 2023, 11:11 p.m. UTC | #1
On Sat, Dec 23, 2023 at 5:28 AM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> Add folio_alloc_node() to replace alloc_pages_node() and then use
> folio APIs throughout instead of converting back to pages.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
[...]
> diff --git a/mm/slub.c b/mm/slub.c
> index 35aa706dc318..261f01915d9b 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3919,18 +3919,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
>   */
>  static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
>  {
> -       struct page *page;
> +       struct folio *folio;
>         void *ptr = NULL;
>         unsigned int order = get_order(size);
>
>         if (unlikely(flags & GFP_SLAB_BUG_MASK))
>                 flags = kmalloc_fix_flags(flags);
>
> -       flags |= __GFP_COMP;
> -       page = alloc_pages_node(node, flags, order);
> -       if (page) {
> -               ptr = page_address(page);
> -               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
> +       folio = folio_alloc_node(flags, order, node);

folio_alloc_node()
->__folio_alloc_node()
->__folio_alloc()
->page_rmappable_folio()
->folio_prep_large_rmappable()

I think it's not intentional to call this?

> +       if (folio) {
> +               ptr = folio_address(folio);
> +               lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
>                                       PAGE_SIZE << order);
>         }

Thanks,
Hyeonggon
Matthew Wilcox Dec. 22, 2023, 11:13 p.m. UTC | #2
On Sat, Dec 23, 2023 at 08:11:11AM +0900, Hyeonggon Yoo wrote:
> > -       flags |= __GFP_COMP;
> > -       page = alloc_pages_node(node, flags, order);
> > -       if (page) {
> > -               ptr = page_address(page);
> > -               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
> > +       folio = folio_alloc_node(flags, order, node);
> 
> folio_alloc_node()
> ->__folio_alloc_node()
> ->__folio_alloc()
> ->page_rmappable_folio()
> ->folio_prep_large_rmappable()
> 
> I think it's not intentional to call this?

Oh, hm, you're right.

I withdraw this patch.  I need to think about this a little more.
Matthew Wilcox Dec. 23, 2023, 6:09 a.m. UTC | #3
On Sat, Dec 23, 2023 at 08:11:11AM +0900, Hyeonggon Yoo wrote:
> > +       folio = folio_alloc_node(flags, order, node);
> 
> folio_alloc_node()
> ->__folio_alloc_node()
> ->__folio_alloc()
> ->page_rmappable_folio()
> ->folio_prep_large_rmappable()
> 
> I think it's not intentional to call this?

I've been thinking about this, and obviously I got bitten by two of
the meanings of folio (both "not a tail page" and "mmapable memory").
And that leads me to thinking about how this will look when we allocate
memdescs separately from pages.

I don't think we should try to keep memcg_data at the same offset in
struct slab and struct folio (once we're out of our current transitional
period).  So we need to stop casting from folio to slab and vice versa.
Which means that slab can't call __lruvec_stat_mod_folio().

I'll try and get something together to support this, both for the
current layout and once memdescs are separated from struct page.
Andrew Morton Dec. 27, 2023, 10:01 p.m. UTC | #4
On Fri, 22 Dec 2023 20:28:05 +0000 "Matthew Wilcox (Oracle)" <willy@infradead.org> wrote:

> Add folio_alloc_node() to replace alloc_pages_node() and then use
> folio APIs throughout instead of converting back to pages.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/gfp.h |  9 +++++++++
>  mm/slub.c           | 15 +++++++--------

This depends on changes which are in Vlastimil's tree and linux-next. 
So I reworked it to not do that, which means there will be a resolution
for Linus to do, which Stephen will tell us about.  It's simple, just
from code motion.

Maybe mm.git should include the slab tree, I haven't really considered
what would be the implications of that.


 include/linux/gfp.h |    9 +++++++++
 mm/slab_common.c    |   15 +++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

--- a/include/linux/gfp.h~slab-convert-__kmalloc_large_node-and-free_large_kmalloc-to-use-folios
+++ a/include/linux/gfp.h
@@ -247,6 +247,15 @@ struct folio *__folio_alloc_node(gfp_t g
 	return __folio_alloc(gfp, order, nid, NULL);
 }
 
+static inline
+struct folio *folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+
+	return __folio_alloc_node(gfp, order, nid);
+}
+
 /*
  * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
  * prefer the current CPU's closest node. Otherwise node must be valid and
--- a/mm/slab_common.c~slab-convert-__kmalloc_large_node-and-free_large_kmalloc-to-use-folios
+++ a/mm/slab_common.c
@@ -979,9 +979,9 @@ void free_large_kmalloc(struct folio *fo
 	kasan_kfree_large(object);
 	kmsan_kfree_large(object);
 
-	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+	lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
 			      -(PAGE_SIZE << order));
-	__free_pages(folio_page(folio, 0), order);
+	folio_put(folio);
 }
 
 static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
@@ -1137,18 +1137,17 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
 
 static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
-	struct page *page;
+	struct folio *folio;
 	void *ptr = NULL;
 	unsigned int order = get_order(size);
 
 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
 		flags = kmalloc_fix_flags(flags);
 
-	flags |= __GFP_COMP;
-	page = alloc_pages_node(node, flags, order);
-	if (page) {
-		ptr = page_address(page);
-		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+	folio = folio_alloc_node(flags, order, node);
+	if (folio) {
+		ptr = folio_address(folio);
+		lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
 				      PAGE_SIZE << order);
 	}
Hyeonggon Yoo Dec. 28, 2023, 4:15 a.m. UTC | #5
On Thu, Dec 28, 2023 at 8:04 AM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Fri, 22 Dec 2023 20:28:05 +0000 "Matthew Wilcox (Oracle)" <willy@infradead.org> wrote:
>
> > Add folio_alloc_node() to replace alloc_pages_node() and then use
> > folio APIs throughout instead of converting back to pages.
> >
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  include/linux/gfp.h |  9 +++++++++
> >  mm/slub.c           | 15 +++++++--------
>
> This depends on changes which are in Vlastimil's tree and linux-next.
> So I reworked it to not do that, which means there will be a resolution
> for Linus to do, which Stephen will tell us about.  It's simple, just
> from code motion.

I think you're missing that Matthew withdrew this patch?
Vlastimil Babka Jan. 2, 2024, 3:58 p.m. UTC | #6
On 12/27/23 23:01, Andrew Morton wrote:
> On Fri, 22 Dec 2023 20:28:05 +0000 "Matthew Wilcox (Oracle)" <willy@infradead.org> wrote:
> 
>> Add folio_alloc_node() to replace alloc_pages_node() and then use
>> folio APIs throughout instead of converting back to pages.
>> 
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  include/linux/gfp.h |  9 +++++++++
>>  mm/slub.c           | 15 +++++++--------
> 
> This depends on changes which are in Vlastimil's tree and linux-next. 
> So I reworked it to not do that, which means there will be a resolution
> for Linus to do, which Stephen will tell us about.  It's simple, just
> from code motion.

Basing series on a specific tree (mm in this case) and not whole linux-next
would be the way. But also Matthew said in v2 he didn't expect the series to
be picked up for 6.8 at this point so it was fair to use linux-next for
review, and for a final posting for 6.9 it could simply have 6.8-rc1 as a base.

> Maybe mm.git should include the slab tree, I haven't really considered
> what would be the implications of that.

I think this cycle is exceptional in that SLAB removal is unusually large,
so normally there should be little to no conflicts. We can revisit if this
becomes more common?
diff mbox series

Patch

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index de292a007138..d56c1d7b5c5a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -247,6 +247,15 @@  struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
 	return __folio_alloc(gfp, order, nid, NULL);
 }
 
+static inline
+struct folio *folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+
+	return __folio_alloc_node(gfp, order, nid);
+}
+
 /*
  * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
  * prefer the current CPU's closest node. Otherwise node must be valid and
diff --git a/mm/slub.c b/mm/slub.c
index 35aa706dc318..261f01915d9b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3919,18 +3919,17 @@  EXPORT_SYMBOL(kmem_cache_alloc_node);
  */
 static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
-	struct page *page;
+	struct folio *folio;
 	void *ptr = NULL;
 	unsigned int order = get_order(size);
 
 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
 		flags = kmalloc_fix_flags(flags);
 
-	flags |= __GFP_COMP;
-	page = alloc_pages_node(node, flags, order);
-	if (page) {
-		ptr = page_address(page);
-		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+	folio = folio_alloc_node(flags, order, node);
+	if (folio) {
+		ptr = folio_address(folio);
+		lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
 				      PAGE_SIZE << order);
 	}
 
@@ -4379,9 +4378,9 @@  static void free_large_kmalloc(struct folio *folio, void *object)
 	kasan_kfree_large(object);
 	kmsan_kfree_large(object);
 
-	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+	lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
 			      -(PAGE_SIZE << order));
-	__free_pages(folio_page(folio, 0), order);
+	folio_put(folio);
 }
 
 /**