diff mbox series

[v5,3/5] mm,memory_hotplug: Add kernel boot option to enable memmap_on_memory

Message ID 20210319092635.6214-4-osalvador@suse.de (mailing list archive)
State New, archived
Headers show
Series Allocate memmap from hotadded memory (per device) | expand

Commit Message

Oscar Salvador March 19, 2021, 9:26 a.m. UTC
Self stored memmap leads to a sparse memory situation which is unsuitable
for workloads that requires large contiguous memory chunks, so make this
an opt-in which needs to be explicitly enabled.

To control this, let memory_hotplug have its own memory space, as suggested
by David, so we can add memory_hotplug.memmap_on_memory parameter.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++++++++++
 mm/Makefile                                     |  5 ++++-
 mm/memory_hotplug.c                             | 10 +++++++++-
 3 files changed, 29 insertions(+), 2 deletions(-)

Comments

Michal Hocko March 23, 2021, 10:47 a.m. UTC | #1
On Fri 19-03-21 10:26:33, Oscar Salvador wrote:
> Self stored memmap leads to a sparse memory situation which is unsuitable
> for workloads that requires large contiguous memory chunks, so make this
> an opt-in which needs to be explicitly enabled.
> 
> To control this, let memory_hotplug have its own memory space, as suggested
> by David, so we can add memory_hotplug.memmap_on_memory parameter.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Reviewed-by: David Hildenbrand <david@redhat.com>

Acked-by: Michal Hocko <mhocko@suse.com>

I would just rephrased the help text to be less low level
> ---
>  Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++++++++++
>  mm/Makefile                                     |  5 ++++-
>  mm/memory_hotplug.c                             | 10 +++++++++-
>  3 files changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 04545725f187..d29b96e50c5c 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2794,6 +2794,22 @@
>  			seconds.  Use this parameter to check at some
>  			other rate.  0 disables periodic checking.
>  
> +	memory_hotplug.memmap_on_memory
> +			[KNL,X86,ARM] Boolean flag to enable this feature.
> +			Format: {on | off (default)}
> +			When enabled, memory to build the pages tables for the
> +			memmap array describing the hot-added range will be taken
> +			from the range itself, so the memmap page tables will be
> +			self-hosted.
> +			Since only single memory device ranges are supported at
> +			the moment, this option is disabled by default because
> +			it might have an impact on workloads that needs large
> +			contiguous memory chunks.
> +			The state of the flag can be read in
> +			/sys/module/memory_hotplug/parameters/memmap_on_memory.
> +			Note that even when enabled, there are a few cases where
> +			the feature is not effective.
> +

			When enabled, runtime hotplugged memory will
			allocate its internal metadata (struct pages)
			from the hotadded memory which will allow to
			hotadd a lot of memory without requiring
			additional memory to do so.
			This feature is disabled by default because it
			has some implication on large (e.g. GB)
			allocations in some configurations (e.g. small
			memory blocks).

>  	memtest=	[KNL,X86,ARM,PPC] Enable memtest
>  			Format: <integer>
>  			default : 0 <disable>
> diff --git a/mm/Makefile b/mm/Makefile
> index 72227b24a616..82ae9482f5e3 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -58,9 +58,13 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
>  page-alloc-y := page_alloc.o
>  page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
>  
> +# Give 'memory_hotplug' its own module-parameter namespace
> +memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
> +
>  obj-y += page-alloc.o
>  obj-y += init-mm.o
>  obj-y += memblock.o
> +obj-y += $(memory-hotplug-y)
>  
>  ifdef CONFIG_MMU
>  	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
> @@ -83,7 +87,6 @@ obj-$(CONFIG_SLUB) += slub.o
>  obj-$(CONFIG_KASAN)	+= kasan/
>  obj-$(CONFIG_KFENCE) += kfence/
>  obj-$(CONFIG_FAILSLAB) += failslab.o
> -obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
>  obj-$(CONFIG_MEMTEST)		+= memtest.o
>  obj-$(CONFIG_MIGRATION) += migrate.o
>  obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 350cde69a97d..c525e5dab859 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -42,7 +42,15 @@
>  #include "internal.h"
>  #include "shuffle.h"
>  
> -static bool memmap_on_memory;

The memmap_on_memory can be dropped from the 1st patch IIUC and only
introduce it now.

> +
> +/*
> + * memory_hotplug.memmap_on_memory parameter
> + */
> +static bool memmap_on_memory __ro_after_init;
> +#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
> +module_param(memmap_on_memory, bool, 0444);
> +MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
> +#endif

I am not very much familiar with the machinery. Does this expose the
state to the userspace?
Oscar Salvador March 24, 2021, 8:45 a.m. UTC | #2
On Tue, Mar 23, 2021 at 11:47:53AM +0100, Michal Hocko wrote:
> On Fri 19-03-21 10:26:33, Oscar Salvador wrote:
> > Self stored memmap leads to a sparse memory situation which is unsuitable
> > for workloads that requires large contiguous memory chunks, so make this
> > an opt-in which needs to be explicitly enabled.
> > 
> > To control this, let memory_hotplug have its own memory space, as suggested
> > by David, so we can add memory_hotplug.memmap_on_memory parameter.
> > 
> > Signed-off-by: Oscar Salvador <osalvador@suse.de>
> > Reviewed-by: David Hildenbrand <david@redhat.com>
> 
> Acked-by: Michal Hocko <mhocko@suse.com>
> 
> I would just rephrased the help text to be less low level
...
> 			When enabled, runtime hotplugged memory will
> 			allocate its internal metadata (struct pages)
> 			from the hotadded memory which will allow to
> 			hotadd a lot of memory without requiring
> 			additional memory to do so.
> 			This feature is disabled by default because it
> 			has some implication on large (e.g. GB)
> 			allocations in some configurations (e.g. small
> 			memory blocks).

Ok, this sounds good as well, and I guess it might suit best for what admin-guide
is about.

> The memmap_on_memory can be dropped from the 1st patch IIUC and only
> introduce it now.

It could be done, and I __think__ in some previous persion it was that way, but
I am leaning to not do it.
In the 1st patch, memmap_on_memory is false by default, so I see it as a preparatory
step for later (this patchset) till it might be enabled.

Moreover, the big comment from mhp_support_memmap_on_memory() should change to not
mention it, and change here again to reflect it.

All in all, I think it can stay, but maybe place a comment in the 1st patch above
the variable saying something like "This is a noop now, it will be enabled later on"



> > +
> > +/*
> > + * memory_hotplug.memmap_on_memory parameter
> > + */
> > +static bool memmap_on_memory __ro_after_init;
> > +#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
> > +module_param(memmap_on_memory, bool, 0444);
> > +MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
> > +#endif
> 
> I am not very much familiar with the machinery. Does this expose the
> state to the userspace?

Kind of:

# ls /sys/module/memory_hotplug/parameters
memmap_on_memory
# cat /sys/module/memory_hotplug/parameters/memmap_on_memory 
Y

But that is not really the state, but rather it shows whether the user
opted-in the feature by passing "memory_hotplug.memmap_on_memory=yes".
It might be that the user opted-in the feature, but it cannot be used at
at runtime (e.g: mhp_support_memmap_on_memory() return false due to size !=
memory_block_size())
Michal Hocko March 24, 2021, 9:02 a.m. UTC | #3
On Wed 24-03-21 09:45:01, Oscar Salvador wrote:
> On Tue, Mar 23, 2021 at 11:47:53AM +0100, Michal Hocko wrote:
> > On Fri 19-03-21 10:26:33, Oscar Salvador wrote:
> > > Self stored memmap leads to a sparse memory situation which is unsuitable
> > > for workloads that requires large contiguous memory chunks, so make this
> > > an opt-in which needs to be explicitly enabled.
> > > 
> > > To control this, let memory_hotplug have its own memory space, as suggested
> > > by David, so we can add memory_hotplug.memmap_on_memory parameter.
> > > 
> > > Signed-off-by: Oscar Salvador <osalvador@suse.de>
> > > Reviewed-by: David Hildenbrand <david@redhat.com>
> > 
> > Acked-by: Michal Hocko <mhocko@suse.com>
> > 
> > I would just rephrased the help text to be less low level
> ...
> > 			When enabled, runtime hotplugged memory will
> > 			allocate its internal metadata (struct pages)
> > 			from the hotadded memory which will allow to
> > 			hotadd a lot of memory without requiring
> > 			additional memory to do so.
> > 			This feature is disabled by default because it
> > 			has some implication on large (e.g. GB)
> > 			allocations in some configurations (e.g. small
> > 			memory blocks).
> 
> Ok, this sounds good as well, and I guess it might suit best for what admin-guide
> is about.
> 
> > The memmap_on_memory can be dropped from the 1st patch IIUC and only
> > introduce it now.
> 
> It could be done, and I __think__ in some previous persion it was that way, but
> I am leaning to not do it.
> In the 1st patch, memmap_on_memory is false by default, so I see it as a preparatory
> step for later (this patchset) till it might be enabled.
> 
> Moreover, the big comment from mhp_support_memmap_on_memory() should change to not
> mention it, and change here again to reflect it.
> 
> All in all, I think it can stay, but maybe place a comment in the 1st patch above
> the variable saying something like "This is a noop now, it will be enabled later on"

I will leave that up to you. This is likely not worth a larger
discussion but it seems quite pointless to add a variable which never
changes. The resulting code might look different than you expect because
compiler is allowed to simply drop the whole condition.
 
> > > +
> > > +/*
> > > + * memory_hotplug.memmap_on_memory parameter
> > > + */
> > > +static bool memmap_on_memory __ro_after_init;
> > > +#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
> > > +module_param(memmap_on_memory, bool, 0444);
> > > +MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
> > > +#endif
> > 
> > I am not very much familiar with the machinery. Does this expose the
> > state to the userspace?
> 
> Kind of:
> 
> # ls /sys/module/memory_hotplug/parameters
> memmap_on_memory
> # cat /sys/module/memory_hotplug/parameters/memmap_on_memory 
> Y
> 
> But that is not really the state, but rather it shows whether the user
> opted-in the feature by passing "memory_hotplug.memmap_on_memory=yes".
> It might be that the user opted-in the feature, but it cannot be used at
> at runtime (e.g: mhp_support_memmap_on_memory() return false due to size !=
> memory_block_size())

Thanks for the clarification.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..d29b96e50c5c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2794,6 +2794,22 @@ 
 			seconds.  Use this parameter to check at some
 			other rate.  0 disables periodic checking.
 
+	memory_hotplug.memmap_on_memory
+			[KNL,X86,ARM] Boolean flag to enable this feature.
+			Format: {on | off (default)}
+			When enabled, memory to build the pages tables for the
+			memmap array describing the hot-added range will be taken
+			from the range itself, so the memmap page tables will be
+			self-hosted.
+			Since only single memory device ranges are supported at
+			the moment, this option is disabled by default because
+			it might have an impact on workloads that needs large
+			contiguous memory chunks.
+			The state of the flag can be read in
+			/sys/module/memory_hotplug/parameters/memmap_on_memory.
+			Note that even when enabled, there are a few cases where
+			the feature is not effective.
+
 	memtest=	[KNL,X86,ARM,PPC] Enable memtest
 			Format: <integer>
 			default : 0 <disable>
diff --git a/mm/Makefile b/mm/Makefile
index 72227b24a616..82ae9482f5e3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -58,9 +58,13 @@  obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 page-alloc-y := page_alloc.o
 page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
 
+# Give 'memory_hotplug' its own module-parameter namespace
+memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
+
 obj-y += page-alloc.o
 obj-y += init-mm.o
 obj-y += memblock.o
+obj-y += $(memory-hotplug-y)
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
@@ -83,7 +87,6 @@  obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_KASAN)	+= kasan/
 obj-$(CONFIG_KFENCE) += kfence/
 obj-$(CONFIG_FAILSLAB) += failslab.o
-obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 350cde69a97d..c525e5dab859 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -42,7 +42,15 @@ 
 #include "internal.h"
 #include "shuffle.h"
 
-static bool memmap_on_memory;
+
+/*
+ * memory_hotplug.memmap_on_memory parameter
+ */
+static bool memmap_on_memory __ro_after_init;
+#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
+module_param(memmap_on_memory, bool, 0444);
+MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
+#endif
 
 /*
  * online_page_callback contains pointer to current page onlining function.