Message ID | 20190729121204.13559-8-andrew.cooper3@citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen/nodemask: API cleanup and fixes | expand |
On 29.07.2019 14:12, Andrew Cooper wrote: > There is no need to use runtime variable-length clearing when MAX_NUMNODES is > known to the compiler. Drop these functions and use the initialisers instead. The only slight concern I have with this is that it further locks down the maximum remaining to be a compile time constant. But this is not an objection, just a remark. > @@ -67,7 +65,34 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; > > #define nodemask_bits(src) ((src)->bits) > > -extern nodemask_t _unused_nodemask_arg_; > +#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) > + > +#define NODEMASK_NONE \ > +((nodemask_t) {{ \ > + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0 \ > +}}) > + > +#if MAX_NUMNODES <= BITS_PER_LONG > + > +#define NODEMASK_ALL ((nodemask_t) {{ NODEMASK_LAST_WORD }}) > +#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }}) > + > +#else /* MAX_NUMNODES > BITS_PER_LONG */ > + > +#define NODEMASK_ALL \ > +((nodemask_t) {{ \ > + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL, \ > + [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD \ > +}}) > + > +#define NODEMASK_OF(node) \ > +({ \ > + nodemask_t m = NODES_NONE; \ > + m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG); \ I think you will want to avoid the double evaluation of "node" here. With this taken care of Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
On 30/07/2019 10:44, Jan Beulich wrote: > On 29.07.2019 14:12, Andrew Cooper wrote: >> There is no need to use runtime variable-length clearing when MAX_NUMNODES is >> known to the compiler. Drop these functions and use the initialisers instead. > The only slight concern I have with this is that it further locks > down the maximum remaining to be a compile time constant. But this > is not an objection, just a remark. The maximum number of nodes I'm aware of at all is 10, and we currently default to 64. I don't think it is likely that we'll get to a point where a runtime nodesize is a realistic consideration that we would want to take. > >> @@ -67,7 +65,34 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; >> >> #define nodemask_bits(src) ((src)->bits) >> >> -extern nodemask_t _unused_nodemask_arg_; >> +#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) >> + >> +#define NODEMASK_NONE \ >> +((nodemask_t) {{ \ >> + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0 \ >> +}}) >> + >> +#if MAX_NUMNODES <= BITS_PER_LONG >> + >> +#define NODEMASK_ALL ((nodemask_t) {{ NODEMASK_LAST_WORD }}) >> +#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }}) >> + >> +#else /* MAX_NUMNODES > BITS_PER_LONG */ >> + >> +#define NODEMASK_ALL \ >> +((nodemask_t) {{ \ >> + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL, \ >> + [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD \ >> +}}) >> + >> +#define NODEMASK_OF(node) \ >> +({ \ >> + nodemask_t m = NODES_NONE; \ >> + m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG); \ > I think you will want to avoid the double evaluation of "node" > here. With this taken care of > Reviewed-by: Jan Beulich <jbeulich@suse.com> I'm afraid this is a bit more complicated after I spotted another opencoding of NODEMASK_OF(). diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c index 00b64c3322..24af9bc471 100644 --- a/xen/arch/arm/smpboot.c +++ b/xen/arch/arm/smpboot.c @@ -46,7 +46,7 @@ struct cpuinfo_arm cpu_data[NR_CPUS]; register_t __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID }; /* Fake one node for now. See also include/asm-arm/numa.h */ -nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; +nodemask_t __read_mostly node_online_map = NODEMASK_OF(0); /* Xen stack for bringing up the first CPU. */ static unsigned char __initdata cpu0_boot_stack[STACK_SIZE] diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 7473f83b7b..9a55c013e5 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -47,7 +47,7 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = { }; cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; -nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; +nodemask_t __read_mostly node_online_map = NODEMASK_OF(0); bool numa_off; s8 acpi_numa = 0; diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h index 9933fec5c4..c474dca3f0 100644 --- a/xen/include/xen/nodemask.h +++ b/xen/include/xen/nodemask.h @@ -86,11 +86,9 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; }}) #define NODEMASK_OF(node) \ -({ \ - nodemask_t m = NODES_NONE; \ - m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG); \ - m; \ -}) +((nodemask_t) {{ \ + [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ +}}) #endif /* MAX_NUMNODES */ and to be used as a static initialiser, NODEMASK_OF() needs to be an ICE and can't use ({}) . I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro. Furthermore, experimenting with a deliberate attempt to provoke this, I got numa.c: In function ‘numa_initmem_init’: /local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ ^ numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ node_online_map = NODEMASK_OF(foo++); ^~~~~~~~~~~ /local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’) [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ ^ numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ node_online_map = NODEMASK_OF(foo++); ^~~~~~~~~~~ from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice. ~Andrew
On 31.07.2019 14:49, Andrew Cooper wrote: > I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro. > > Furthermore, experimenting with a deliberate attempt to provoke this, I got > > numa.c: In function ‘numa_initmem_init’: > > /local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer > > [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ > > ^ > > numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ > > node_online_map = NODEMASK_OF(foo++); > > ^~~~~~~~~~~ > > /local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’) > > [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ > > ^ > > numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ > > node_online_map = NODEMASK_OF(foo++); > > ^~~~~~~~~~~ > > from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice. Ah, right. With this my R-b applies to the change as is (with the additional adjustments folded in that you've pointed out). Jan
On 31/07/2019 14:12, Jan Beulich wrote: > On 31.07.2019 14:49, Andrew Cooper wrote: >> I don't see a way to avoid expanding node twice, but given that its wrapper is in ALL_CAPS and obviously a macro. >> >> Furthermore, experimenting with a deliberate attempt to provoke this, I got >> >> numa.c: In function ‘numa_initmem_init’: >> >> /local/xen.git/xen/include/xen/nodemask.h:90:10: error: nonconstant array index in initializer >> >> [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ >> >> ^ >> >> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ >> >> node_online_map = NODEMASK_OF(foo++); >> >> ^~~~~~~~~~~ >> >> /local/xen.git/xen/include/xen/nodemask.h:90:10: note: (near initialization for ‘(anonymous).bits’) >> >> [(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG) \ >> >> ^ >> >> numa.c:274:23: note: in expansion of macro ‘NODEMASK_OF’ >> >> node_online_map = NODEMASK_OF(foo++); >> >> ^~~~~~~~~~~ >> >> from GCC 6.3, which I think covers everything we need, and will prevent side effects from double expansion in practice. > Ah, right. With this my R-b applies to the change as is (with the > additional adjustments folded in that you've pointed out). I've actually tweaked it a fraction more to not bifurcate NODEMASK_OF() in an ifdef, which means we'll get diagnostics like that out of the compiler even when I haven't hacked NODES_SHIFT to 7 locally. However, it now touches ARM code so I'll post a full v4 series with this and later issues in the series addressed. ~Andrew
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c index c69570920c..06500c87c6 100644 --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -231,7 +231,7 @@ unsigned int __init dom0_max_vcpus(void) if ( pv_shim ) { - nodes_setall(dom0_nodes); + dom0_nodes = NODEMASK_ALL; /* * When booting in shim mode APs are not started until the guest brings diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 7e1f563012..7473f83b7b 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -269,8 +269,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) /* setup dummy node covering all memory */ memnode_shift = BITS_PER_LONG - 1; memnodemap = _memnodemap; - nodes_clear(node_online_map); - node_set_online(0); + node_online_map = NODEMASK_OF(0); for ( i = 0; i < nr_cpu_ids; i++ ) numa_set_node(i, 0); cpumask_copy(&node_to_cpumask[0], cpumask_of(0)); diff --git a/xen/common/domain.c b/xen/common/domain.c index e8e850796e..11565a64b3 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -384,7 +384,7 @@ struct domain *domain_create(domid_t domid, INIT_PAGE_LIST_HEAD(&d->xenpage_list); spin_lock_init(&d->node_affinity_lock); - d->node_affinity = NODE_MASK_ALL; + d->node_affinity = NODEMASK_ALL; d->auto_node_affinity = 1; spin_lock_init(&d->shutdown_lock); @@ -615,7 +615,7 @@ void domain_update_node_affinity(struct domain *d) dom_affinity = cpumask_empty(dom_cpumask_soft) ? dom_cpumask : dom_cpumask_soft; - nodes_clear(d->node_affinity); + d->node_affinity = NODEMASK_NONE; for_each_cpu ( cpu, dom_affinity ) node_set(cpu_to_node(cpu), d->node_affinity); } diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h index 1dd6c7458e..9933fec5c4 100644 --- a/xen/include/xen/nodemask.h +++ b/xen/include/xen/nodemask.h @@ -12,8 +12,6 @@ * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask - * void nodes_setall(mask) set all bits - * void nodes_clear(mask) clear all bits * bool nodemask_test(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * @@ -36,9 +34,9 @@ * int cycle_node(node, mask) Next node cycling from 'node', or * MAX_NUMNODES * - * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set - * NODE_MASK_ALL Initializer - all bits set - * NODE_MASK_NONE Initializer - no bits set + * nodemask_t NODEMASK_OF(node) Initializer - bit 'node' set + * NODEMASK_ALL Initializer - all bits set + * NODEMASK_NONE Initializer - no bits set * unsigned long *nodemask_bits(mask) Array of unsigned long's in mask * * for_each_node_mask(node, mask) for-loop node over mask @@ -67,7 +65,34 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; #define nodemask_bits(src) ((src)->bits) -extern nodemask_t _unused_nodemask_arg_; +#define NODEMASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) + +#define NODEMASK_NONE \ +((nodemask_t) {{ \ + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 1] = 0 \ +}}) + +#if MAX_NUMNODES <= BITS_PER_LONG + +#define NODEMASK_ALL ((nodemask_t) {{ NODEMASK_LAST_WORD }}) +#define NODEMASK_OF(node) ((nodemask_t) {{ 1UL << (node) }}) + +#else /* MAX_NUMNODES > BITS_PER_LONG */ + +#define NODEMASK_ALL \ +((nodemask_t) {{ \ + [0 ... BITS_TO_LONGS(MAX_NUMNODES) - 2] = ~0UL, \ + [BITS_TO_LONGS(MAX_NUMNODES) - 1] = NODEMASK_LAST_WORD \ +}}) + +#define NODEMASK_OF(node) \ +({ \ + nodemask_t m = NODES_NONE; \ + m.bits[(node) / BITS_PER_LONG] = 1UL << ((node) % BITS_PER_LONG); \ + m; \ +}) + +#endif /* MAX_NUMNODES */ #define node_set(node, dst) __node_set((node), &(dst)) static inline void __node_set(int node, volatile nodemask_t *dstp) @@ -81,18 +106,6 @@ static inline void __node_clear(int node, volatile nodemask_t *dstp) clear_bit(node, dstp->bits); } -#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) -static inline void __nodes_setall(nodemask_t *dstp, int nbits) -{ - bitmap_fill(dstp->bits, nbits); -} - -#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) -static inline void __nodes_clear(nodemask_t *dstp, int nbits) -{ - bitmap_zero(dstp->bits, nbits); -} - static inline bool nodemask_test(unsigned int node, const nodemask_t *dst) { return test_bit(node, dst->bits); @@ -213,18 +226,6 @@ static inline int __last_node(const nodemask_t *srcp, int nbits) return pnode; } -#define nodemask_of_node(node) \ -({ \ - typeof(_unused_nodemask_arg_) m; \ - if (sizeof(m) == sizeof(unsigned long)) { \ - m.bits[0] = 1UL<<(node); \ - } else { \ - nodes_clear(m); \ - node_set((node), m); \ - } \ - m; \ -}) - #define cycle_node(n, src) __cycle_node((n), &(src), MAX_NUMNODES) static inline int __cycle_node(int n, const nodemask_t *maskp, int nbits) { @@ -235,30 +236,6 @@ static inline int __cycle_node(int n, const nodemask_t *maskp, int nbits) return nxt; } -#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) - -#if MAX_NUMNODES <= BITS_PER_LONG - -#define NODE_MASK_ALL \ -((nodemask_t) { { \ - [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ -} }) - -#else - -#define NODE_MASK_ALL \ -((nodemask_t) { { \ - [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ - [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ -} }) - -#endif - -#define NODE_MASK_NONE \ -((nodemask_t) { { \ - [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ -} }) - #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \
There is no need to use runtime variable-length clearing when MAX_NUMNODES is known to the compiler. Drop these functions and use the initialisers instead. numa_initmem_init() opencodes nodemask_of_node(), but its implemention is still poor, so replace it with NODEMASK_OF() which is calculated at compile time, and without the use of a locked set_bit() operation. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Wei Liu <wl@xen.org> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Stefano Stabellini <sstabellini@kernel.org> CC: Julien Grall <julien.grall@arm.com> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> v3: * New --- xen/arch/x86/dom0_build.c | 2 +- xen/arch/x86/numa.c | 3 +- xen/common/domain.c | 4 +-- xen/include/xen/nodemask.h | 85 +++++++++++++++++----------------------------- 4 files changed, 35 insertions(+), 59 deletions(-)