diff mbox series

[RFC,1/4] mm/memory_hotplug: Add interface for runtime (de)configuration of memory

Message ID 20241202082732.3959803-2-sumanthk@linux.ibm.com (mailing list archive)
State New
Headers show
Series Support dynamic (de)configuration of memory | expand

Commit Message

Sumanth Korikkar Dec. 2, 2024, 8:27 a.m. UTC
Provide a new interface for dynamic configuration and deconfiguration of
hotplug memory, allowing for mixed altmap and non-altmap support.  It is
a follow-up on the discussion with David:

https://lore.kernel.org/all/ee492da8-74b4-4a97-8b24-73e07257f01d@redhat.com/

As mentioned in the discussion, advantages of the new interface are:

* Users can dynamically specify which memory ranges should have altmap
  support, rather than having it statically enabled or disabled for all
  hot-plugged memory.

* In the long term,  user could specify a memory range, including
  multiple blocks, and whether user wants altmap support for that range.
  This could allow for the altmap block grouping, or even variable-sized
  blocks, in the future. i.e. "grouping" memory blocks that share a same
  altmap located on the first memory blocks in the group and reduce
  fragementation due to altmap.

To leverage these advantages:
Create a sysfs interface /sys/bus/memory/devices/configure_memory, which
performs runtime (de)configuration of memory with altmap or non-altmap
support. The interface validates the memory ranges against architecture
specific memory configuration and performs add_memory()/remove_memory().
Dynamic (de)configuration of memory is made configurable via config
CONFIG_RUNTIME_MEMORY_CONFIGURATION.

Usage format for the new interface:
echo config_mode,memoryrange,altmap_mode >
/sys/bus/memory/devices/configure_memory

E.g. to configure a range with altmap:
echo 1,0x200000000-0x20fffffff,1 > /sys/bus/memory/devices/configure_memory

This interface could not only help to make s390 more flexible and
similar to others (wrt adding hotplug memory in advance). It might also
be possible to provide the dynamically configured altmap support for
others. E.g.  instead of directly doing an add_memory() in the ACPI
handler, with the static altmap setting, one could instead defer that to
the new interface which allows dynamic altmap configuration.

Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
---
 drivers/base/memory.c  | 124 +++++++++++++++++++++++++++++++++++++++++
 include/linux/memory.h |   6 ++
 mm/Kconfig             |  16 ++++++
 3 files changed, 146 insertions(+)
diff mbox series

Patch

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 67858eeb92ed..f024444b3301 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -631,6 +631,127 @@  int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 	return 0;
 }
 
+#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION
+enum {
+	REMOVE_MEMORY = 0,
+	ADD_MEMORY,
+	MAX_CONFIGURE_MODE
+};
+
+enum {
+	NOALTMAP = 0,
+	ALTMAP,
+	MAX_ALTMAP_MODE
+};
+
+/*
+ * Return true when the memory range is valid.
+ *
+ * Architecture specific code can override the below function and validate the
+ * memory range against its possible memory configurations.
+ */
+bool __weak arch_validate_memory_range(unsigned long long start,
+				       unsigned long long end)
+{
+	return false;
+}
+
+/*
+ * Format:
+ * echo config_mode,memoryrange,altmap_mode >
+ * /sys/bus/memory/devices/configure_memory
+ *
+ * config_mode:
+ *	value: 1 - add_memory, 0 - remove_memory
+ *
+ * range:
+ * 0x<start address>-0x<end address>
+ * Where start address is aligned to memory block size and end address
+ * represents last byte in the range.
+ * example: 0x200000000-0x20fffffff
+ *
+ * altmap_mode:
+ *	value: 1 - altmap support, 0 - no altmap support
+ */
+static ssize_t configure_memory_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	s64 start, end, block_size, range;
+	u32 config_mode, altmap_mode;
+	int num, nid, ret = -EINVAL;
+	struct memory_block *mem;
+
+	num = sscanf(buf, "%u,0x%llx-0x%llx,%u", &config_mode, &start, &end, &altmap_mode);
+	if (num != 4)
+		goto out;
+
+	if (config_mode >= MAX_CONFIGURE_MODE || altmap_mode >= MAX_ALTMAP_MODE)
+		goto out;
+
+	altmap_mode = altmap_mode ? MHP_MEMMAP_ON_MEMORY |
+			MHP_OFFLINE_INACCESSIBLE : MHP_NONE;
+
+	block_size = memory_block_size_bytes();
+
+	if (!IS_ALIGNED(start, block_size) || !IS_ALIGNED(end + 1, block_size))
+		goto out;
+
+	if (start < 0 || end < 0 || start >= end)
+		goto out;
+
+	if (!arch_validate_memory_range(start, end))
+		goto out;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		goto out;
+
+	if (config_mode == ADD_MEMORY) {
+		for (range = start; range < end + 1; range += block_size) {
+			mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(range)));
+			if (mem) {
+				pr_info("Memory already configured - (start:0x%llx)\n", range);
+				ret = -EEXIST;
+				put_device(&mem->dev);
+				goto out_unlock;
+			}
+			nid = memory_add_physaddr_to_nid(range);
+			ret = __add_memory(nid, range, block_size, altmap_mode);
+			if (ret) {
+				pr_info("Memory addition failed - (start:0x%llx)\n", range);
+				goto out_unlock;
+			}
+		}
+	} else if (config_mode == REMOVE_MEMORY) {
+		for (range = start; range < end + 1; range += block_size) {
+			mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(range)));
+			if (!mem) {
+				pr_info("Memory not configured - (start:0x%llx)\n", range);
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			if (mem->state != MEM_OFFLINE) {
+				pr_info("Memory removal failed - (start:0x%llx) not offline\n",
+					range);
+				put_device(&mem->dev);
+				ret = -EBUSY;
+				goto out_unlock;
+			} else {
+				/* drop the ref just got via find_memory_block() */
+				put_device(&mem->dev);
+			}
+			__remove_memory(range, block_size);
+		}
+	}
+out_unlock:
+	unlock_device_hotplug();
+out:
+	return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(configure_memory);
+#endif /* CONFIG_RUNTIME_MEMORY_CONFIGURATION */
+
 /*
  * A reference for the returned memory block device is acquired.
  *
@@ -941,6 +1062,9 @@  static struct attribute *memory_root_attrs[] = {
 	&dev_attr_auto_online_blocks.attr,
 #ifdef CONFIG_CRASH_HOTPLUG
 	&dev_attr_crash_hotplug.attr,
+#endif
+#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION
+	&dev_attr_configure_memory.attr,
 #endif
 	NULL
 };
diff --git a/include/linux/memory.h b/include/linux/memory.h
index c0afee5d126e..88b2b374bc44 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -85,6 +85,12 @@  struct memory_block {
 #endif
 };
 
+#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION
+bool arch_validate_memory_range(unsigned long long start, unsigned long long end);
+ssize_t arch_get_memory_max_configurable(void);
+int memory_create_sysfs_max_configurable(void);
+#endif /* CONFIG_RUNTIME_MEMORY_CONFIGURATION */
+
 int arch_get_memory_phys_device(unsigned long start_pfn);
 unsigned long memory_block_size_bytes(void);
 int set_memory_block_size_order(unsigned int order);
diff --git a/mm/Kconfig b/mm/Kconfig
index 84000b016808..2aec2fc3fb25 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -576,6 +576,22 @@  config MHP_MEMMAP_ON_MEMORY
 	depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP
 	depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 
+config RUNTIME_MEMORY_CONFIGURATION
+	bool "Dynamic configuration and deconfiguration of memory"
+	def_bool n
+	depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP
+	help
+	  This option provides support to perform dynamic configuration and
+	  deconfiguration of memory with altmap or non-altmap support
+	  (/sys/bus/memory/devices/configure_memory). The interface validates
+	  the configuration and deconfiguration of memory ranges against
+	  architecture specific configuration and performs add_memory() with
+	  altmap or non-altmap support and remove_memory() respectively.
+
+	  Say Y here if the architecture supports validating dynamically
+	  (de)configured memory against architecture specific memory
+	  configurations.
+
 endif # MEMORY_HOTPLUG
 
 config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE