diff mbox series

[v2,1/2] mm: memory_hotplug: enumerate all supported section flags

Message ID 20220520025538.21144-2-songmuchun@bytedance.com (mailing list archive)
State New
Headers show
Series make hugetlb_optimize_vmemmap compatible with memmap_on_memory | expand

Commit Message

Muchun Song May 20, 2022, 2:55 a.m. UTC
We are almost running out of section flags, only one bit is available in
the worst case (powerpc with 256k pages).  However, there are still some
free bits (in ->section_mem_map) on other architectures (e.g. x86_64 has
10 bits available, arm64 has 8 bits available with worst case of 64K
pages).  We have hard coded those numbers in code, it is inconvenient to
use those bits on other architectures except powerpc.  So transfer those
section flags to enumeration to make it easy to add new section flags in
the future.  Also, move SECTION_TAINT_ZONE_DEVICE into the scope of
CONFIG_ZONE_DEVICE to save a bit on non-zone-device case.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/kconfig.h |  1 +
 include/linux/mmzone.h  | 54 +++++++++++++++++++++++++++++++++++++++++--------
 mm/memory_hotplug.c     |  6 ++++++
 3 files changed, 53 insertions(+), 8 deletions(-)

Comments

David Hildenbrand June 15, 2022, 9:35 a.m. UTC | #1
On 20.05.22 04:55, Muchun Song wrote:
> We are almost running out of section flags, only one bit is available in
> the worst case (powerpc with 256k pages).  However, there are still some
> free bits (in ->section_mem_map) on other architectures (e.g. x86_64 has
> 10 bits available, arm64 has 8 bits available with worst case of 64K
> pages).  We have hard coded those numbers in code, it is inconvenient to
> use those bits on other architectures except powerpc.  So transfer those
> section flags to enumeration to make it easy to add new section flags in
> the future.  Also, move SECTION_TAINT_ZONE_DEVICE into the scope of
> CONFIG_ZONE_DEVICE to save a bit on non-zone-device case.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>

Sorry for the late reply. This looks overly complicated to me.

IOW, staring at that patch I don't quite like what I am seeing.


Something like the following is *a lot* easier to read than some
MAPPER macro magic. What speaks against it?

/*
 * Section bits use the lower unused bits in the ->section_mem_map
 */
enum {
	SECTION_MARKED_PRESENT_BIT = 0,
	SECTION_HAS_MEM_MAP_BIT,
	...
#ifdef ZONE_DEVICE
	SECTION_TAINT_ZONE_DEVICE_BIT
#endif
}

#define SECTION_MARKED_PRESENT	   (1ULL << SECTION_MARKED_PRESENT_BIT)
...
#ifdef ZONE_DEVICE
#define SECTION_TAINT_ZONE_DEVICE  (1ULL << SECTION_TAINT_ZONE_DEVICE_BIT)
#endif /* ZONE_DEVICE */



> ---
>  include/linux/kconfig.h |  1 +
>  include/linux/mmzone.h  | 54 +++++++++++++++++++++++++++++++++++++++++--------
>  mm/memory_hotplug.c     |  6 ++++++
>  3 files changed, 53 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
> index 20d1079e92b4..7044032b9f42 100644
> --- a/include/linux/kconfig.h
> +++ b/include/linux/kconfig.h
> @@ -10,6 +10,7 @@
>  #define __LITTLE_ENDIAN 1234
>  #endif
>  
> +#define __ARG_PLACEHOLDER_ 0,
>  #define __ARG_PLACEHOLDER_1 0,
>  #define __take_second_arg(__ignored, val, ...) val
>  
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 299259cfe462..2cf2a76535ab 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1422,16 +1422,47 @@ extern size_t mem_section_usage_size(void);
>   *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
>   *      worst combination is powerpc with 256k pages,
>   *      which results in PFN_SECTION_SHIFT equal 6.
> - * To sum it up, at least 6 bits are available.
> + * To sum it up, at least 6 bits are available on all architectures.
> + * However, we can exceed 6 bits on some other architectures except
> + * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
> + * with the worst case of 64K pages on arm64) if we make sure the
> + * exceeded bit is not applicable to powerpc.
>   */
> -#define SECTION_MARKED_PRESENT		(1UL<<0)
> -#define SECTION_HAS_MEM_MAP		(1UL<<1)
> -#define SECTION_IS_ONLINE		(1UL<<2)
> -#define SECTION_IS_EARLY		(1UL<<3)
> -#define SECTION_TAINT_ZONE_DEVICE	(1UL<<4)
> -#define SECTION_MAP_LAST_BIT		(1UL<<5)
> +#define ENUM_SECTION_FLAG(MAPPER)						\
> +	MAPPER(MARKED_PRESENT)							\
> +	MAPPER(HAS_MEM_MAP)							\
> +	MAPPER(IS_ONLINE)							\
> +	MAPPER(IS_EARLY)							\
> +	MAPPER(TAINT_ZONE_DEVICE, CONFIG_ZONE_DEVICE)				\
> +	MAPPER(MAP_LAST_BIT)
> +
> +#define __SECTION_SHIFT_FLAG_MAPPER_0(x)
> +#define __SECTION_SHIFT_FLAG_MAPPER_1(x)	SECTION_##x##_SHIFT,
> +#define __SECTION_SHIFT_FLAG_MAPPER(x, ...)	\
> +	__PASTE(__SECTION_SHIFT_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
> +
> +#define __SECTION_FLAG_MAPPER_0(x)
> +#define __SECTION_FLAG_MAPPER_1(x)		SECTION_##x = BIT(SECTION_##x##_SHIFT),
> +#define __SECTION_FLAG_MAPPER(x, ...)		\
> +	__PASTE(__SECTION_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
> +
> +enum {
> +	/*
> +	 * Generate a series of enumeration flags like SECTION_$name_SHIFT.
> +	 * Each entry in ENUM_SECTION_FLAG() macro will be generated to one
> +	 * enumeration iff the 2nd parameter of MAPPER() is defined or absent.
> +	 * The $name comes from the 1st parameter of MAPPER() macro.
> +	 */
> +	ENUM_SECTION_FLAG(__SECTION_SHIFT_FLAG_MAPPER)
> +	/*
> +	 * Generate a series of enumeration flags like:
> +	 *   SECTION_$name = BIT(SECTION_$name_SHIFT)
> +	 */
> +	ENUM_SECTION_FLAG(__SECTION_FLAG_MAPPER)
> +};
> +
>  #define SECTION_MAP_MASK		(~(SECTION_MAP_LAST_BIT-1))
> -#define SECTION_NID_SHIFT		6
> +#define SECTION_NID_SHIFT		SECTION_MAP_LAST_BIT_SHIFT
>  
>  static inline struct page *__section_mem_map_addr(struct mem_section *section)
>  {
> @@ -1470,12 +1501,19 @@ static inline int online_section(struct mem_section *section)
>  	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
>  }
>  
> +#ifdef CONFIG_ZONE_DEVICE
>  static inline int online_device_section(struct mem_section *section)
>  {
>  	unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
>  
>  	return section && ((section->section_mem_map & flags) == flags);
>  }
> +#else
> +static inline int online_device_section(struct mem_section *section)
> +{
> +	return 0;
> +}
> +#endif
>  
>  static inline int online_section_nr(unsigned long nr)
>  {
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 1213d0c67a53..3b360eda933f 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -672,12 +672,18 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
>  
>  }
>  
> +#ifdef CONFIG_ZONE_DEVICE
>  static void section_taint_zone_device(unsigned long pfn)
>  {
>  	struct mem_section *ms = __pfn_to_section(pfn);
>  
>  	ms->section_mem_map |= SECTION_TAINT_ZONE_DEVICE;
>  }
> +#else
> +static inline void section_taint_zone_device(unsigned long pfn)
> +{
> +}
> +#endif
>  
>  /*
>   * Associate the pfn range with the given zone, initializing the memmaps
Muchun Song June 15, 2022, 1:02 p.m. UTC | #2
On Wed, Jun 15, 2022 at 11:35:09AM +0200, David Hildenbrand wrote:
> On 20.05.22 04:55, Muchun Song wrote:
> > We are almost running out of section flags, only one bit is available in
> > the worst case (powerpc with 256k pages).  However, there are still some
> > free bits (in ->section_mem_map) on other architectures (e.g. x86_64 has
> > 10 bits available, arm64 has 8 bits available with worst case of 64K
> > pages).  We have hard coded those numbers in code, it is inconvenient to
> > use those bits on other architectures except powerpc.  So transfer those
> > section flags to enumeration to make it easy to add new section flags in
> > the future.  Also, move SECTION_TAINT_ZONE_DEVICE into the scope of
> > CONFIG_ZONE_DEVICE to save a bit on non-zone-device case.
> > 
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> 
> Sorry for the late reply. This looks overly complicated to me.
> 
> IOW, staring at that patch I don't quite like what I am seeing.
> 
> 
> Something like the following is *a lot* easier to read than some
> MAPPER macro magic. What speaks against it?
>

Thanks for taking a look.

Yeah, it is more readable. This question is also raised by Oscar.
I pasted the reply to here.

"
Yeah, it's a little complicated. All the magic aims to generate
two enumeration from one MAPPER(xxx, config), one is SECTION_xxx_SHIFT,
another is SECTION_xxx = BIT(SECTION_xxx_SHIFT) if the 'config' is
configured. If we want to add a new flag, like the follow patch, just
one line could do that.

  MAPPER(CANNOT_OPTIMIZE_VMEMMAP, CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP)

Without those magic, we have to add 4 lines like follows to do the
similar thing.

  #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
        SECTION_CANNOT_OPTIMIZE_VMEMMAP_SHIFT,
  #define SECTION_CANNOT_OPTIMIZE_VMEMMAP BIT(SECTION_CANNOT_OPTIMIZE_VMEMMAP_SHIFT)
  #endif

I admit it is more clear but not simplified as above approach.
"

Both two approaches are fine to me. I can switch to the following approach
seems you think the following one is better.

Thanks.

> /*
>  * Section bits use the lower unused bits in the ->section_mem_map
>  */
> enum {
> 	SECTION_MARKED_PRESENT_BIT = 0,
> 	SECTION_HAS_MEM_MAP_BIT,
> 	...
> #ifdef ZONE_DEVICE
> 	SECTION_TAINT_ZONE_DEVICE_BIT
> #endif
> }
> 
> #define SECTION_MARKED_PRESENT	   (1ULL << SECTION_MARKED_PRESENT_BIT)
> ...
> #ifdef ZONE_DEVICE
> #define SECTION_TAINT_ZONE_DEVICE  (1ULL << SECTION_TAINT_ZONE_DEVICE_BIT)
> #endif /* ZONE_DEVICE */
> 
> 
> 
> > ---
> >  include/linux/kconfig.h |  1 +
> >  include/linux/mmzone.h  | 54 +++++++++++++++++++++++++++++++++++++++++--------
> >  mm/memory_hotplug.c     |  6 ++++++
> >  3 files changed, 53 insertions(+), 8 deletions(-)
> > 
> > diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
> > index 20d1079e92b4..7044032b9f42 100644
> > --- a/include/linux/kconfig.h
> > +++ b/include/linux/kconfig.h
> > @@ -10,6 +10,7 @@
> >  #define __LITTLE_ENDIAN 1234
> >  #endif
> >  
> > +#define __ARG_PLACEHOLDER_ 0,
> >  #define __ARG_PLACEHOLDER_1 0,
> >  #define __take_second_arg(__ignored, val, ...) val
> >  
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 299259cfe462..2cf2a76535ab 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -1422,16 +1422,47 @@ extern size_t mem_section_usage_size(void);
> >   *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
> >   *      worst combination is powerpc with 256k pages,
> >   *      which results in PFN_SECTION_SHIFT equal 6.
> > - * To sum it up, at least 6 bits are available.
> > + * To sum it up, at least 6 bits are available on all architectures.
> > + * However, we can exceed 6 bits on some other architectures except
> > + * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
> > + * with the worst case of 64K pages on arm64) if we make sure the
> > + * exceeded bit is not applicable to powerpc.
> >   */
> > -#define SECTION_MARKED_PRESENT		(1UL<<0)
> > -#define SECTION_HAS_MEM_MAP		(1UL<<1)
> > -#define SECTION_IS_ONLINE		(1UL<<2)
> > -#define SECTION_IS_EARLY		(1UL<<3)
> > -#define SECTION_TAINT_ZONE_DEVICE	(1UL<<4)
> > -#define SECTION_MAP_LAST_BIT		(1UL<<5)
> > +#define ENUM_SECTION_FLAG(MAPPER)						\
> > +	MAPPER(MARKED_PRESENT)							\
> > +	MAPPER(HAS_MEM_MAP)							\
> > +	MAPPER(IS_ONLINE)							\
> > +	MAPPER(IS_EARLY)							\
> > +	MAPPER(TAINT_ZONE_DEVICE, CONFIG_ZONE_DEVICE)				\
> > +	MAPPER(MAP_LAST_BIT)
> > +
> > +#define __SECTION_SHIFT_FLAG_MAPPER_0(x)
> > +#define __SECTION_SHIFT_FLAG_MAPPER_1(x)	SECTION_##x##_SHIFT,
> > +#define __SECTION_SHIFT_FLAG_MAPPER(x, ...)	\
> > +	__PASTE(__SECTION_SHIFT_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
> > +
> > +#define __SECTION_FLAG_MAPPER_0(x)
> > +#define __SECTION_FLAG_MAPPER_1(x)		SECTION_##x = BIT(SECTION_##x##_SHIFT),
> > +#define __SECTION_FLAG_MAPPER(x, ...)		\
> > +	__PASTE(__SECTION_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
> > +
> > +enum {
> > +	/*
> > +	 * Generate a series of enumeration flags like SECTION_$name_SHIFT.
> > +	 * Each entry in ENUM_SECTION_FLAG() macro will be generated to one
> > +	 * enumeration iff the 2nd parameter of MAPPER() is defined or absent.
> > +	 * The $name comes from the 1st parameter of MAPPER() macro.
> > +	 */
> > +	ENUM_SECTION_FLAG(__SECTION_SHIFT_FLAG_MAPPER)
> > +	/*
> > +	 * Generate a series of enumeration flags like:
> > +	 *   SECTION_$name = BIT(SECTION_$name_SHIFT)
> > +	 */
> > +	ENUM_SECTION_FLAG(__SECTION_FLAG_MAPPER)
> > +};
> > +
> >  #define SECTION_MAP_MASK		(~(SECTION_MAP_LAST_BIT-1))
> > -#define SECTION_NID_SHIFT		6
> > +#define SECTION_NID_SHIFT		SECTION_MAP_LAST_BIT_SHIFT
> >  
> >  static inline struct page *__section_mem_map_addr(struct mem_section *section)
> >  {
> > @@ -1470,12 +1501,19 @@ static inline int online_section(struct mem_section *section)
> >  	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
> >  }
> >  
> > +#ifdef CONFIG_ZONE_DEVICE
> >  static inline int online_device_section(struct mem_section *section)
> >  {
> >  	unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
> >  
> >  	return section && ((section->section_mem_map & flags) == flags);
> >  }
> > +#else
> > +static inline int online_device_section(struct mem_section *section)
> > +{
> > +	return 0;
> > +}
> > +#endif
> >  
> >  static inline int online_section_nr(unsigned long nr)
> >  {
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index 1213d0c67a53..3b360eda933f 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -672,12 +672,18 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
> >  
> >  }
> >  
> > +#ifdef CONFIG_ZONE_DEVICE
> >  static void section_taint_zone_device(unsigned long pfn)
> >  {
> >  	struct mem_section *ms = __pfn_to_section(pfn);
> >  
> >  	ms->section_mem_map |= SECTION_TAINT_ZONE_DEVICE;
> >  }
> > +#else
> > +static inline void section_taint_zone_device(unsigned long pfn)
> > +{
> > +}
> > +#endif
> >  
> >  /*
> >   * Associate the pfn range with the given zone, initializing the memmaps
> 
> 
> -- 
> Thanks,
> 
> David / dhildenb
> 
>
diff mbox series

Patch

diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
index 20d1079e92b4..7044032b9f42 100644
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -10,6 +10,7 @@ 
 #define __LITTLE_ENDIAN 1234
 #endif
 
+#define __ARG_PLACEHOLDER_ 0,
 #define __ARG_PLACEHOLDER_1 0,
 #define __take_second_arg(__ignored, val, ...) val
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 299259cfe462..2cf2a76535ab 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1422,16 +1422,47 @@  extern size_t mem_section_usage_size(void);
  *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
  *      worst combination is powerpc with 256k pages,
  *      which results in PFN_SECTION_SHIFT equal 6.
- * To sum it up, at least 6 bits are available.
+ * To sum it up, at least 6 bits are available on all architectures.
+ * However, we can exceed 6 bits on some other architectures except
+ * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
+ * with the worst case of 64K pages on arm64) if we make sure the
+ * exceeded bit is not applicable to powerpc.
  */
-#define SECTION_MARKED_PRESENT		(1UL<<0)
-#define SECTION_HAS_MEM_MAP		(1UL<<1)
-#define SECTION_IS_ONLINE		(1UL<<2)
-#define SECTION_IS_EARLY		(1UL<<3)
-#define SECTION_TAINT_ZONE_DEVICE	(1UL<<4)
-#define SECTION_MAP_LAST_BIT		(1UL<<5)
+#define ENUM_SECTION_FLAG(MAPPER)						\
+	MAPPER(MARKED_PRESENT)							\
+	MAPPER(HAS_MEM_MAP)							\
+	MAPPER(IS_ONLINE)							\
+	MAPPER(IS_EARLY)							\
+	MAPPER(TAINT_ZONE_DEVICE, CONFIG_ZONE_DEVICE)				\
+	MAPPER(MAP_LAST_BIT)
+
+#define __SECTION_SHIFT_FLAG_MAPPER_0(x)
+#define __SECTION_SHIFT_FLAG_MAPPER_1(x)	SECTION_##x##_SHIFT,
+#define __SECTION_SHIFT_FLAG_MAPPER(x, ...)	\
+	__PASTE(__SECTION_SHIFT_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
+
+#define __SECTION_FLAG_MAPPER_0(x)
+#define __SECTION_FLAG_MAPPER_1(x)		SECTION_##x = BIT(SECTION_##x##_SHIFT),
+#define __SECTION_FLAG_MAPPER(x, ...)		\
+	__PASTE(__SECTION_FLAG_MAPPER_, IS_ENABLED(__VA_ARGS__))(x)
+
+enum {
+	/*
+	 * Generate a series of enumeration flags like SECTION_$name_SHIFT.
+	 * Each entry in ENUM_SECTION_FLAG() macro will be generated to one
+	 * enumeration iff the 2nd parameter of MAPPER() is defined or absent.
+	 * The $name comes from the 1st parameter of MAPPER() macro.
+	 */
+	ENUM_SECTION_FLAG(__SECTION_SHIFT_FLAG_MAPPER)
+	/*
+	 * Generate a series of enumeration flags like:
+	 *   SECTION_$name = BIT(SECTION_$name_SHIFT)
+	 */
+	ENUM_SECTION_FLAG(__SECTION_FLAG_MAPPER)
+};
+
 #define SECTION_MAP_MASK		(~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT		6
+#define SECTION_NID_SHIFT		SECTION_MAP_LAST_BIT_SHIFT
 
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
@@ -1470,12 +1501,19 @@  static inline int online_section(struct mem_section *section)
 	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
 }
 
+#ifdef CONFIG_ZONE_DEVICE
 static inline int online_device_section(struct mem_section *section)
 {
 	unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
 
 	return section && ((section->section_mem_map & flags) == flags);
 }
+#else
+static inline int online_device_section(struct mem_section *section)
+{
+	return 0;
+}
+#endif
 
 static inline int online_section_nr(unsigned long nr)
 {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1213d0c67a53..3b360eda933f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -672,12 +672,18 @@  static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
 
 }
 
+#ifdef CONFIG_ZONE_DEVICE
 static void section_taint_zone_device(unsigned long pfn)
 {
 	struct mem_section *ms = __pfn_to_section(pfn);
 
 	ms->section_mem_map |= SECTION_TAINT_ZONE_DEVICE;
 }
+#else
+static inline void section_taint_zone_device(unsigned long pfn)
+{
+}
+#endif
 
 /*
  * Associate the pfn range with the given zone, initializing the memmaps