diff mbox series

mm/compaction: add check mechanism to avoid cma alloc fail

Message ID 20240122022317.30091-1-Haiqiang.Gong@mediatek.com (mailing list archive)
State New, archived
Headers show
Series mm/compaction: add check mechanism to avoid cma alloc fail | expand

Commit Message

Haiqiang Gong (龚海强) Jan. 22, 2024, 2:23 a.m. UTC
cma alloc may fail when we doing cma alloc/free test on kernel 5.10/5.15.

We found that the next memory cannot be migrated because of the alloc of
fs as next backtrace:
__alloc_pages_nodemask
pagecache_get_page
grow_dev_page
__getblk_gfp
ext4_sb_breadahead_unmovable
__ext4_get_inode_loc
__ext4_iget
ext4_lookup
__lookup_slow
walk_component
path_lookupat
filename_lookup
vfs_statx
This kind of unmovable memory is not placed in the cma buffer when kernel
memory alloc but is migrated in by kcompactd when the kernel migration.
It will cause memory can't be migrate when cma alloc.

Add check mechanism in the compaction_alloc() where kcompaced alloc for
memory. Will return NULL and give up this memory migration if the
allocated memory is in the cma buffer and the memory is unmovable.

Signed-off-by: Haiqiang Gong <Haiqiang.Gong@mediatek.com>
---
 mm/compaction.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Matthew Wilcox Jan. 22, 2024, 3:31 a.m. UTC | #1
On Mon, Jan 22, 2024 at 10:23:17AM +0800, Haiqiang Gong wrote:
> cma alloc may fail when we doing cma alloc/free test on kernel 5.10/5.15.
> 
> We found that the next memory cannot be migrated because of the alloc of
> fs as next backtrace:
> __alloc_pages_nodemask
> pagecache_get_page
> grow_dev_page
> __getblk_gfp
> ext4_sb_breadahead_unmovable
> __ext4_get_inode_loc
> __ext4_iget
> ext4_lookup
> __lookup_slow
> walk_component
> path_lookupat
> filename_lookup
> vfs_statx
> This kind of unmovable memory is not placed in the cma buffer when kernel
> memory alloc but is migrated in by kcompactd when the kernel migration.
> It will cause memory can't be migrate when cma alloc.

I don't understand.  You say that the memory isn't movable, but then you
say that it's migrated in.  So it was movable, but it's no longer
movable after being moved once?  

> Add check mechanism in the compaction_alloc() where kcompaced alloc for
> memory. Will return NULL and give up this memory migration if the
> allocated memory is in the cma buffer and the memory is unmovable.

> +static bool forbid_move_to_cma_range(struct folio *src, struct folio *dst)
> +{
> +	if (folio_mapping(src) && is_in_cma_range(dst))
> +		return true;
> +
> +	return false;
> +}

Why would folio_mapping() be the right way to determine if memory is
unmovable?  The vast majority of filesystem data is movable.
Baolin Wang Jan. 22, 2024, 6:59 a.m. UTC | #2
On 1/22/2024 10:23 AM, Haiqiang Gong wrote:
> cma alloc may fail when we doing cma alloc/free test on kernel 5.10/5.15.

Do you have a real use case for the cma alloc issue? And have you tried 
it on the new kernel?

> We found that the next memory cannot be migrated because of the alloc of
> fs as next backtrace:
> __alloc_pages_nodemask
> pagecache_get_page
> grow_dev_page
> __getblk_gfp
> ext4_sb_breadahead_unmovable
> __ext4_get_inode_loc
> __ext4_iget
> ext4_lookup
> __lookup_slow
> walk_component
> path_lookupat
> filename_lookup
> vfs_statx
> This kind of unmovable memory is not placed in the cma buffer when kernel
> memory alloc but is migrated in by kcompactd when the kernel migration.
> It will cause memory can't be migrate when cma alloc.

Why the CMA memory can not be migrated? Could you describe in more 
detail the reasons for the CMA memory migration failure?

> Add check mechanism in the compaction_alloc() where kcompaced alloc for
> memory. Will return NULL and give up this memory migration if the
> allocated memory is in the cma buffer and the memory is unmovable.
> 
> Signed-off-by: Haiqiang Gong <Haiqiang.Gong@mediatek.com>
> ---
>   mm/compaction.c | 38 ++++++++++++++++++++++++++++++++++++++
>   1 file changed, 38 insertions(+)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 27ada42924d5..29c0661adc22 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -25,6 +25,11 @@
>   #include <linux/psi.h>
>   #include "internal.h"
>   
> +#ifdef CONFIG_CMA
> +#include <linux/cma.h>
> +#include "cma.h"
> +#endif
> +
>   #ifdef CONFIG_COMPACTION
>   /*
>    * Fragmentation score check interval for proactive compaction purposes.
> @@ -1758,6 +1763,33 @@ static void isolate_freepages(struct compact_control *cc)
>   	split_map_pages(freelist);
>   }
>   
> +#ifdef CONFIG_CMA
> +static bool is_in_cma_range(struct folio *folio)
> +{
> +	int i;
> +	unsigned long pfn = 0;
> +	struct page *page = folio_page(folio, 0);
> +
> +	pfn = page_to_pfn(page);
> +	for (i = 0; i < cma_area_count; i++) {
> +		struct cma *cma = &cma_areas[i];
> +
> +		if (cma->base_pfn <= pfn && (cma->base_pfn + cma->count) > pfn)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static bool forbid_move_to_cma_range(struct folio *src, struct folio *dst)
> +{
> +	if (folio_mapping(src) && is_in_cma_range(dst))
> +		return true;
> +
> +	return false;
> +}
> +#endif
> +
>   /*
>    * This is a migrate-callback that "allocates" freepages by taking pages
>    * from the isolated freelists in the block we are migrating to.
> @@ -1775,6 +1807,12 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>   	}
>   
>   	dst = list_entry(cc->freepages.next, struct folio, lru);
> +#ifdef CONFIG_CMA
> +	if (forbid_move_to_cma_range(src, dst)) {
> +		pr_notice("kcompactd: could not move non-cma memory to cma buffer\n");
> +		return NULL;
> +	}
> +#endif
>   	list_del(&dst->lru);
>   	cc->nr_freepages--;
>
Matthew Wilcox Jan. 24, 2024, 6:40 p.m. UTC | #3
On Wed, Jan 24, 2024 at 07:20:53AM +0000, Haiqiang Gong (龚海强) wrote:
> > I don't understand.  You say that the memory isn't movable, but then you 
> > say that it's migrated in.  So it was movable, but it's no longer
> > movable after being moved once?  
> Sorry for not expressing clearly
> When doing memory migration, the kernel will determine whether the current 
> page can be moved based on the refcount and mapcount of the current page.
> This memory can be moved during kernel compaction. At this time, refcount 
> is less than or equal to mapcount.
> After this memory is kcompacted and placed in the cma buffer, under 
> certain special conditions, the refcount may be greater than the mapcount
> (ex:the current page is being used by fs), and then migrate will fail.

But that's always true.  Any page that is currently in use might have
its refcount temporarily incremented.  There's nothing special about
pages that belong to a file.  You've basically just prevented all
filesystem memory from being migrated to the CMA area, and that's wrong.

What's special about this page?  Or were you just unlucky?
Matthew Wilcox Jan. 27, 2024, 4:23 p.m. UTC | #4
On Sat, Jan 27, 2024 at 11:03:48AM +0000, Haiqiang Gong (龚海强) wrote:
> On Wed, 2024-01-24 at 18:40 +0000, Matthew Wilcox wrote:
> >  On Wed, Jan 24, 2024 at 07:20:53AM +0000, Haiqiang Gong (龚海强) wrote:
> > > > I don't understand.  You say that the memory isn't movable, but
> > then you 
> > > > say that it's migrated in.  So it was movable, but it's no longer
> > > > movable after being moved once?  
> > > Sorry for not expressing clearly
> > > When doing memory migration, the kernel will determine whether the
> > current 
> > > page can be moved based on the refcount and mapcount of the current
> > page.
> > > This memory can be moved during kernel compaction. At this time,
> > refcount 
> > > is less than or equal to mapcount.
> > > After this memory is kcompacted and placed in the cma buffer,
> > under 
> > > certain special conditions, the refcount may be greater than the
> > mapcount
> > > (ex:the current page is being used by fs), and then migrate will
> > fail.
> > 
> > But that's always true.  Any page that is currently in use might have
> > its refcount temporarily incremented.  There's nothing special about
> > pages that belong to a file.  You've basically just prevented all
> > filesystem memory from being migrated to the CMA area, and that's
> > wrong.
> > 
> Yes, we agree with you that refcount may temporarily incremented.
> Issues we have reproduced:
> The current page is migrated to the cma area by kcompactd, rather than
> allocated by kernel memory allocater.
> Our opinion is that if a page cannot be allocated to the CMA area, then
> we should not put it in the CMA area when doing kernel migration. This
> seems more reasonable. Do you agree with this view or do you have any
> other suggestions?

That does seem reasonable.  But I don't know if it helps you at all;
is there a type of allocation which is migratable but not allocatable
from the CMA area?

> > What's special about this page?  Or were you just unlucky?
> We didn't find anything special about this page. During our debugging,
> we found that once a similar problem occurs in the current page, it can
> no longer be migrated (retrying after an hour will not work).

Perhaps you can find out more information about this particular page; who
allocated it, why was it migratable initially but not the second time?
Perhaps something happens to this page to keep the refcount high, and
if we can find out that will happen, we can migrate it out of the CMA
area before incrementing the refcount.
diff mbox series

Patch

diff --git a/mm/compaction.c b/mm/compaction.c
index 27ada42924d5..29c0661adc22 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -25,6 +25,11 @@ 
 #include <linux/psi.h>
 #include "internal.h"
 
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#include "cma.h"
+#endif
+
 #ifdef CONFIG_COMPACTION
 /*
  * Fragmentation score check interval for proactive compaction purposes.
@@ -1758,6 +1763,33 @@  static void isolate_freepages(struct compact_control *cc)
 	split_map_pages(freelist);
 }
 
+#ifdef CONFIG_CMA
+static bool is_in_cma_range(struct folio *folio)
+{
+	int i;
+	unsigned long pfn = 0;
+	struct page *page = folio_page(folio, 0);
+
+	pfn = page_to_pfn(page);
+	for (i = 0; i < cma_area_count; i++) {
+		struct cma *cma = &cma_areas[i];
+
+		if (cma->base_pfn <= pfn && (cma->base_pfn + cma->count) > pfn)
+			return true;
+	}
+
+	return false;
+}
+
+static bool forbid_move_to_cma_range(struct folio *src, struct folio *dst)
+{
+	if (folio_mapping(src) && is_in_cma_range(dst))
+		return true;
+
+	return false;
+}
+#endif
+
 /*
  * This is a migrate-callback that "allocates" freepages by taking pages
  * from the isolated freelists in the block we are migrating to.
@@ -1775,6 +1807,12 @@  static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 	}
 
 	dst = list_entry(cc->freepages.next, struct folio, lru);
+#ifdef CONFIG_CMA
+	if (forbid_move_to_cma_range(src, dst)) {
+		pr_notice("kcompactd: could not move non-cma memory to cma buffer\n");
+		return NULL;
+	}
+#endif
 	list_del(&dst->lru);
 	cc->nr_freepages--;