diff mbox series

[v3,07/10] xen/nodemask: Drop nodes_{setall, clear}() and improve the initialisers

Message ID 20190729121204.13559-8-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series xen/nodemask: API cleanup and fixes | expand

Commit Message

Andrew Cooper July 29, 2019, 12:12 p.m. UTC
There is no need to use runtime variable-length clearing when MAX_NUMNODES is
known to the compiler.  Drop these functions and use the initialisers instead.

numa_initmem_init() opencodes nodemask_of_node(), but its implemention is
still poor, so replace it with NODEMASK_OF() which is calculated at compile
time, and without the use of a locked set_bit() operation.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien.grall@arm.com>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>

v3:
 * New
---
 xen/arch/x86/dom0_build.c  |  2 +-
 xen/arch/x86/numa.c        |  3 +-
 xen/common/domain.c        |  4 +--
 xen/include/xen/nodemask.h | 85 +++++++++++++++++-----------------------------
 4 files changed, 35 insertions(+), 59 deletions(-)

Comments

Jan Beulich July 30, 2019, 9:44 a.m. UTC | #1
On 29.07.2019 14:12, Andrew Cooper wrote:
> There is no need to use runtime variable-length clearing when MAX_NUMNODES is
> known to the compiler.  Drop these functions and use the initialisers instead.

The only slight concern I have with this is that it further locks
down the maximum remaining to be a compile time constant. But this
is not an objection, just a remark.

> @@ -67,7 +65,34 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
>   
>   #define nodemask_bits(src) ((src)->bits)
>   
> -extern nodemask_t _unused_nodemask_arg_;
> +#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
> +
> +#define NODEMASK_NONE                                                   \
> +((nodemask_t) {{                                                        \
> +        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0                     \
> +}})
> +
> +#if MAX_NUMNODES <= BITS_PER_LONG
> +
> +#define NODEMASK_ALL      ((nodemask_t) {{ NODEMASK_LAST_WORD }})
> +#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }})
> +
> +#else /* MAX_NUMNODES > BITS_PER_LONG */
> +
> +#define NODEMASK_ALL                                                    \
> +((nodemask_t) {{                                                        \
> +        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL,                 \
> +        [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD          \
> +}})
> +
> +#define NODEMASK_OF(node)                                               \
> +({                                                                      \
> +    nodemask_t m = NODES_NONE;                                          \
> +    m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG);   \

I think you will want to avoid the double evaluation of "node"
here. With this taken care of
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Andrew Cooper July 31, 2019, 12:49 p.m. UTC | #2
On 30/07/2019 10:44, Jan Beulich wrote:

> On 29.07.2019 14:12, Andrew Cooper wrote:
>> There is no need to use runtime variable-length clearing when MAX_NUMNODES is
>> known to the compiler.  Drop these functions and use the initialisers instead.
> The only slight concern I have with this is that it further locks
> down the maximum remaining to be a compile time constant. But this
> is not an objection, just a remark.

The maximum number of nodes I'm aware of at all is 10, and we currently default to 64.

I don't think it is likely that we'll get to a point where a runtime nodesize is a realistic consideration that we would want to take.

>
>> @@ -67,7 +65,34 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
>>   
>>   #define nodemask_bits(src) ((src)->bits)
>>   
>> -extern nodemask_t _unused_nodemask_arg_;
>> +#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
>> +
>> +#define NODEMASK_NONE                                                   \
>> +((nodemask_t) {{                                                        \
>> +        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0                     \
>> +}})
>> +
>> +#if MAX_NUMNODES <= BITS_PER_LONG
>> +
>> +#define NODEMASK_ALL      ((nodemask_t) {{ NODEMASK_LAST_WORD }})
>> +#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }})
>> +
>> +#else /* MAX_NUMNODES > BITS_PER_LONG */
>> +
>> +#define NODEMASK_ALL                                                    \
>> +((nodemask_t) {{                                                        \
>> +        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL,                 \
>> +        [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD          \
>> +}})
>> +
>> +#define NODEMASK_OF(node)                                               \
>> +({                                                                      \
>> +    nodemask_t m = NODES_NONE;                                          \
>> +    m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG);   \
> I think you will want to avoid the double evaluation of "node"
> here. With this taken care of
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

I'm afraid this is a bit more complicated after I spotted another opencoding of NODEMASK_OF().

diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c

index 00b64c3322..24af9bc471 100644

--- a/xen/arch/arm/smpboot.c

+++ b/xen/arch/arm/smpboot.c

@@ -46,7 +46,7 @@ struct cpuinfo_arm cpu_data[NR_CPUS];

 register_t __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };

 

 /* Fake one node for now. See also include/asm-arm/numa.h */

-nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };

+nodemask_t __read_mostly node_online_map = NODEMASK_OF(0);

 

 /* Xen stack for bringing up the first CPU. */

 static unsigned char __initdata cpu0_boot_stack[STACK_SIZE]

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c

index 7473f83b7b..9a55c013e5 100644

--- a/xen/arch/x86/numa.c

+++ b/xen/arch/x86/numa.c

@@ -47,7 +47,7 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {

 };

 cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;

 

-nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };

+nodemask_t __read_mostly node_online_map = NODEMASK_OF(0);

 

 bool numa_off;

 s8 acpi_numa = 0;

diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h

index 9933fec5c4..c474dca3f0 100644

--- a/xen/include/xen/nodemask.h

+++ b/xen/include/xen/nodemask.h

@@ -86,11 +86,9 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;

 }})

 

 #define NODEMASK_OF(node)                                               \

-({                                                                      \

-    nodemask_t m = NODES_NONE;                                          \

-    m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG);   \

-    m;                                                                  \

-})

+((nodemask_t) {{                                                        \

+        [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \

+}})

 

 #endif /* MAX_NUMNODES */

 

and to be used as a static initialiser, NODEMASK_OF() needs to be an ICE and can't use ({}) .

I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro.

Furthermore, experimenting with a deliberate attempt to provoke this, I got 

numa.c: In function ‘numa_initmem_init’:

/local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer

         [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \

          ^

numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’

     node_online_map = NODEMASK_OF(foo++);

                       ^~~~~~~~~~~

/local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’)

         [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \

          ^

numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’

     node_online_map = NODEMASK_OF(foo++);

                       ^~~~~~~~~~~

from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice.

~Andrew
Jan Beulich July 31, 2019, 1:12 p.m. UTC | #3
On 31.07.2019 14:49, Andrew Cooper wrote:
> I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro.
> 
> Furthermore, experimenting with a deliberate attempt to provoke this, I got
> 
> numa.c: In function ‘numa_initmem_init’:
> 
> /local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer
> 
>           [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \
> 
>            ^
> 
> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’
> 
>       node_online_map = NODEMASK_OF(foo++);
> 
>                         ^~~~~~~~~~~
> 
> /local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’)
> 
>           [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \
> 
>            ^
> 
> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’
> 
>       node_online_map = NODEMASK_OF(foo++);
> 
>                         ^~~~~~~~~~~
> 
> from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice.

Ah, right. With this my R-b applies to the change as is (with the
additional adjustments folded in that you've pointed out).

Jan
Andrew Cooper July 31, 2019, 1:32 p.m. UTC | #4
On 31/07/2019 14:12, Jan Beulich wrote:
> On 31.07.2019 14:49, Andrew Cooper wrote:
>> I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro.
>>
>> Furthermore, experimenting with a deliberate attempt to provoke this, I got
>>
>> numa.c: In function ‘numa_initmem_init’:
>>
>> /local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer
>>
>>           [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \
>>
>>            ^
>>
>> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’
>>
>>       node_online_map = NODEMASK_OF(foo++);
>>
>>                         ^~~~~~~~~~~
>>
>> /local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’)
>>
>>           [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG)      \
>>
>>            ^
>>
>> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’
>>
>>       node_online_map = NODEMASK_OF(foo++);
>>
>>                         ^~~~~~~~~~~
>>
>> from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice.
> Ah, right. With this my R-b applies to the change as is (with the
> additional adjustments folded in that you've pointed out).

I've actually tweaked it a fraction more to not bifurcate NODEMASK_OF()
in an ifdef, which means we'll get diagnostics like that out of the
compiler even when I haven't hacked NODES_SHIFT to 7 locally.

However, it now touches ARM code so I'll post a full v4 series with this
and later issues in the series addressed.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index c69570920c..06500c87c6 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -231,7 +231,7 @@  unsigned int __init dom0_max_vcpus(void)
 
     if ( pv_shim )
     {
-        nodes_setall(dom0_nodes);
+        dom0_nodes = NODEMASK_ALL;
 
         /*
          * When booting in shim mode APs are not started until the guest brings
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index 7e1f563012..7473f83b7b 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -269,8 +269,7 @@  void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
     /* setup dummy node covering all memory */
     memnode_shift = BITS_PER_LONG - 1;
     memnodemap = _memnodemap;
-    nodes_clear(node_online_map);
-    node_set_online(0);
+    node_online_map = NODEMASK_OF(0);
     for ( i = 0; i < nr_cpu_ids; i++ )
         numa_set_node(i, 0);
     cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
diff --git a/xen/common/domain.c b/xen/common/domain.c
index e8e850796e..11565a64b3 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -384,7 +384,7 @@  struct domain *domain_create(domid_t domid,
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
     spin_lock_init(&d->node_affinity_lock);
-    d->node_affinity = NODE_MASK_ALL;
+    d->node_affinity = NODEMASK_ALL;
     d->auto_node_affinity = 1;
 
     spin_lock_init(&d->shutdown_lock);
@@ -615,7 +615,7 @@  void domain_update_node_affinity(struct domain *d)
         dom_affinity = cpumask_empty(dom_cpumask_soft) ?
                            dom_cpumask : dom_cpumask_soft;
 
-        nodes_clear(d->node_affinity);
+        d->node_affinity = NODEMASK_NONE;
         for_each_cpu ( cpu, dom_affinity )
             node_set(cpu_to_node(cpu), d->node_affinity);
     }
diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h
index 1dd6c7458e..9933fec5c4 100644
--- a/xen/include/xen/nodemask.h
+++ b/xen/include/xen/nodemask.h
@@ -12,8 +12,6 @@ 
  *
  * void node_set(node, mask)		turn on bit 'node' in mask
  * void node_clear(node, mask)		turn off bit 'node' in mask
- * void nodes_setall(mask)		set all bits
- * void nodes_clear(mask)		clear all bits
  * bool nodemask_test(node, mask)	true iff bit 'node' set in mask
  * int node_test_and_set(node, mask)	test and set bit 'node' in mask
  *
@@ -36,9 +34,9 @@ 
  * int cycle_node(node, mask)		Next node cycling from 'node', or
  *					MAX_NUMNODES
  *
- * nodemask_t nodemask_of_node(node)	Return nodemask with bit 'node' set
- * NODE_MASK_ALL			Initializer - all bits set
- * NODE_MASK_NONE			Initializer - no bits set
+ * nodemask_t NODEMASK_OF(node)		Initializer - bit 'node' set
+ * NODEMASK_ALL				Initializer - all bits set
+ * NODEMASK_NONE			Initializer - no bits set
  * unsigned long *nodemask_bits(mask)	Array of unsigned long's in mask
  *
  * for_each_node_mask(node, mask)	for-loop node over mask
@@ -67,7 +65,34 @@  typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
 
 #define nodemask_bits(src) ((src)->bits)
 
-extern nodemask_t _unused_nodemask_arg_;
+#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
+
+#define NODEMASK_NONE                                                   \
+((nodemask_t) {{                                                        \
+        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0                     \
+}})
+
+#if MAX_NUMNODES <= BITS_PER_LONG
+
+#define NODEMASK_ALL      ((nodemask_t) {{ NODEMASK_LAST_WORD }})
+#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }})
+
+#else /* MAX_NUMNODES > BITS_PER_LONG */
+
+#define NODEMASK_ALL                                                    \
+((nodemask_t) {{                                                        \
+        [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL,                 \
+        [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD          \
+}})
+
+#define NODEMASK_OF(node)                                               \
+({                                                                      \
+    nodemask_t m = NODES_NONE;                                          \
+    m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG);   \
+    m;                                                                  \
+})
+
+#endif /* MAX_NUMNODES */
 
 #define node_set(node, dst) __node_set((node), &(dst))
 static inline void __node_set(int node, volatile nodemask_t *dstp)
@@ -81,18 +106,6 @@  static inline void __node_clear(int node, volatile nodemask_t *dstp)
 	clear_bit(node, dstp->bits);
 }
 
-#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
-static inline void __nodes_setall(nodemask_t *dstp, int nbits)
-{
-	bitmap_fill(dstp->bits, nbits);
-}
-
-#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
-static inline void __nodes_clear(nodemask_t *dstp, int nbits)
-{
-	bitmap_zero(dstp->bits, nbits);
-}
-
 static inline bool nodemask_test(unsigned int node, const nodemask_t *dst)
 {
     return test_bit(node, dst->bits);
@@ -213,18 +226,6 @@  static inline int __last_node(const nodemask_t *srcp, int nbits)
 	return pnode;
 }
 
-#define nodemask_of_node(node)						\
-({									\
-	typeof(_unused_nodemask_arg_) m;				\
-	if (sizeof(m) == sizeof(unsigned long)) {			\
-		m.bits[0] = 1UL<<(node);				\
-	} else {							\
-		nodes_clear(m);						\
-		node_set((node), m);					\
-	}								\
-	m;								\
-})
-
 #define cycle_node(n, src) __cycle_node((n), &(src), MAX_NUMNODES)
 static inline int __cycle_node(int n, const nodemask_t *maskp, int nbits)
 {
@@ -235,30 +236,6 @@  static inline int __cycle_node(int n, const nodemask_t *maskp, int nbits)
     return nxt;
 }
 
-#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
-
-#if MAX_NUMNODES <= BITS_PER_LONG
-
-#define NODE_MASK_ALL							\
-((nodemask_t) { {							\
-	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
-} })
-
-#else
-
-#define NODE_MASK_ALL							\
-((nodemask_t) { {							\
-	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL,			\
-	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
-} })
-
-#endif
-
-#define NODE_MASK_NONE							\
-((nodemask_t) { {							\
-	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] =  0UL			\
-} })
-
 #if MAX_NUMNODES > 1
 #define for_each_node_mask(node, mask)			\
 	for ((node) = first_node(mask);			\