Message ID | 20220714045351.434957-7-aneesh.kumar@linux.ibm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/demotion: Memory tiers and demotion | expand |
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > Also update different helpes to use NODE_DATA()->memtier. Since > node specific memtier can change based on the reassignment of > NUMA node to a different memory tiers, accessing NODE_DATA()->memtier > needs to happen under an rcu read lock or memory_tier_lock. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > include/linux/mmzone.h | 3 ++ > mm/memory-tiers.c | 64 +++++++++++++++++++++++++++++++----------- > 2 files changed, 50 insertions(+), 17 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index aab70355d64f..353812495a70 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -928,6 +928,9 @@ typedef struct pglist_data { > /* Per-node vmstats */ > struct per_cpu_nodestat __percpu *per_cpu_nodestats; > atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; > +#ifdef CONFIG_NUMA > + struct memory_tier __rcu *memtier; > +#endif > } pg_data_t; > > #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) > diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c > index e951f54ce56c..bab4700bf58d 100644 > --- a/mm/memory-tiers.c > +++ b/mm/memory-tiers.c > @@ -7,6 +7,7 @@ > #include <linux/moduleparam.h> > #include <linux/memory.h> > #include <linux/random.h> > +#include <linux/rcupdate.h> > #include <linux/memory-tiers.h> > > #include "internal.h" > @@ -124,18 +125,23 @@ static struct memory_tier *register_memory_tier(unsigned int tier) > static void unregister_memory_tier(struct memory_tier *memtier) > { > list_del(&memtier->list); > - kfree(memtier); > + kfree_rcu(memtier); > } > > static struct memory_tier *__node_get_memory_tier(int node) > { > - struct memory_tier *memtier; > + pg_data_t *pgdat; > > - list_for_each_entry(memtier, &memory_tiers, list) { > - if (node_isset(node, memtier->nodelist)) > - return memtier; > - } > - return NULL; > + pgdat = NODE_DATA(node); > + if (!pgdat) > + return NULL; > + /* > + * Since we hold memory_tier_lock, we can avoid > + * RCU read locks when accessing the details. No > + * parallel updates are possible here. > + */ > + return rcu_dereference_check(pgdat->memtier, > + lockdep_is_held(&memory_tier_lock)); > } > > static struct memory_tier *__get_memory_tier_from_id(int id) > @@ -149,6 +155,33 @@ static struct memory_tier *__get_memory_tier_from_id(int id) > return NULL; > } > > +/* > + * Called with memory_tier_lock. Hence the device references cannot > + * be dropped during this function. > + */ > +static void memtier_node_set(int node, struct memory_tier *memtier) > +{ > + pg_data_t *pgdat; > + struct memory_tier *current_memtier; > + > + pgdat = NODE_DATA(node); > + if (!pgdat) > + return; > + /* > + * Make sure we mark the memtier NULL before we assign the new memory tier > + * to the NUMA node. This make sure that anybody looking at NODE_DATA > + * finds a NULL memtier or the one which is still valid. > + */ > + current_memtier = rcu_dereference_check(pgdat->memtier, > + lockdep_is_held(&memory_tier_lock)); > + rcu_assign_pointer(pgdat->memtier, NULL); > + if (current_memtier) > + node_clear(node, current_memtier->nodelist); It seems odd to me that you would update the current memtier prior to the synchronize_rcu(). I suppose it's really memory_tier_lock that protects the details like ->nodelist, but is there any reason not do the update after anyway? > + synchronize_rcu(); > + node_set(node, memtier->nodelist); > + rcu_assign_pointer(pgdat->memtier, memtier); > +} > + > static int __node_create_and_set_memory_tier(int node, int tier) > { > int ret = 0; > @@ -162,7 +195,7 @@ static int __node_create_and_set_memory_tier(int node, int tier) > goto out; > } > } > - node_set(node, memtier->nodelist); > + memtier_node_set(node, memtier); > out: > return ret; > } > @@ -184,14 +217,7 @@ int node_create_and_set_memory_tier(int node, int tier) > if (current_tier->id == tier) > goto out; > > - node_clear(node, current_tier->nodelist); > - > ret = __node_create_and_set_memory_tier(node, tier); > - if (ret) { > - /* reset it back to older tier */ > - node_set(node, current_tier->nodelist); > - goto out; > - } > if (nodes_empty(current_tier->nodelist)) > unregister_memory_tier(current_tier); > > @@ -213,7 +239,7 @@ static int __node_set_memory_tier(int node, int tier) > ret = -EINVAL; > goto out; > } > - node_set(node, memtier->nodelist); > + memtier_node_set(node, memtier); > out: > return ret; > } > @@ -428,6 +454,7 @@ static void __init migrate_on_reclaim_init(void) > > static int __init memory_tier_init(void) > { > + int node; > struct memory_tier *memtier; > > /* > @@ -444,7 +471,10 @@ static int __init memory_tier_init(void) > __func__, PTR_ERR(memtier)); > > /* CPU only nodes are not part of memory tiers. */ > - memtier->nodelist = node_states[N_MEMORY]; > + for_each_node_state(node, N_MEMORY) { > + rcu_assign_pointer(NODE_DATA(node)->memtier, memtier); > + node_set(node, memtier->nodelist); Similar comment here - the order seems opposite to what I'd expect. Shouldn't memtier->nodelist be fully initialised prior to making it visible with rcu_assign_pointer()? > + } > mutex_unlock(&memory_tier_lock); > > migrate_on_reclaim_init();
Alistair Popple <apopple@nvidia.com> writes: > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > >> Also update different helpes to use NODE_DATA()->memtier. Since >> node specific memtier can change based on the reassignment of >> NUMA node to a different memory tiers, accessing NODE_DATA()->memtier >> needs to happen under an rcu read lock or memory_tier_lock. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> include/linux/mmzone.h | 3 ++ >> mm/memory-tiers.c | 64 +++++++++++++++++++++++++++++++----------- >> 2 files changed, 50 insertions(+), 17 deletions(-) >> >> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h >> index aab70355d64f..353812495a70 100644 >> --- a/include/linux/mmzone.h >> +++ b/include/linux/mmzone.h >> @@ -928,6 +928,9 @@ typedef struct pglist_data { >> /* Per-node vmstats */ >> struct per_cpu_nodestat __percpu *per_cpu_nodestats; >> atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; >> +#ifdef CONFIG_NUMA >> + struct memory_tier __rcu *memtier; >> +#endif >> } pg_data_t; >> >> #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) >> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >> index e951f54ce56c..bab4700bf58d 100644 >> --- a/mm/memory-tiers.c >> +++ b/mm/memory-tiers.c >> @@ -7,6 +7,7 @@ >> #include <linux/moduleparam.h> >> #include <linux/memory.h> >> #include <linux/random.h> >> +#include <linux/rcupdate.h> >> #include <linux/memory-tiers.h> >> >> #include "internal.h" >> @@ -124,18 +125,23 @@ static struct memory_tier *register_memory_tier(unsigned int tier) >> static void unregister_memory_tier(struct memory_tier *memtier) >> { >> list_del(&memtier->list); >> - kfree(memtier); >> + kfree_rcu(memtier); >> } >> >> static struct memory_tier *__node_get_memory_tier(int node) >> { >> - struct memory_tier *memtier; >> + pg_data_t *pgdat; >> >> - list_for_each_entry(memtier, &memory_tiers, list) { >> - if (node_isset(node, memtier->nodelist)) >> - return memtier; >> - } >> - return NULL; >> + pgdat = NODE_DATA(node); >> + if (!pgdat) >> + return NULL; >> + /* >> + * Since we hold memory_tier_lock, we can avoid >> + * RCU read locks when accessing the details. No >> + * parallel updates are possible here. >> + */ >> + return rcu_dereference_check(pgdat->memtier, >> + lockdep_is_held(&memory_tier_lock)); >> } >> >> static struct memory_tier *__get_memory_tier_from_id(int id) >> @@ -149,6 +155,33 @@ static struct memory_tier *__get_memory_tier_from_id(int id) >> return NULL; >> } >> >> +/* >> + * Called with memory_tier_lock. Hence the device references cannot >> + * be dropped during this function. >> + */ >> +static void memtier_node_set(int node, struct memory_tier *memtier) >> +{ >> + pg_data_t *pgdat; >> + struct memory_tier *current_memtier; >> + >> + pgdat = NODE_DATA(node); >> + if (!pgdat) >> + return; >> + /* >> + * Make sure we mark the memtier NULL before we assign the new memory tier >> + * to the NUMA node. This make sure that anybody looking at NODE_DATA >> + * finds a NULL memtier or the one which is still valid. >> + */ >> + current_memtier = rcu_dereference_check(pgdat->memtier, >> + lockdep_is_held(&memory_tier_lock)); >> + rcu_assign_pointer(pgdat->memtier, NULL); >> + if (current_memtier) >> + node_clear(node, current_memtier->nodelist); > > It seems odd to me that you would update the current memtier prior to > the synchronize_rcu(). I suppose it's really memory_tier_lock that > protects the details like ->nodelist, but is there any reason not do the > update after anyway? The synchronize_rcu ensures that the lockless read of pgdat->memtier either see value NULL or a stable memtier which got current numa node in its nodelist. IIUC what you are suggesting is we should move the node_clear after synchronize_rcu?. I am also wondering whether I need a smp_wmb()? pgdat->memtier = NULL; synchronize_rcu remove node from memtier; set node in new memtier smp_wmb(); pgdat->memtier = new memtier; > >> + synchronize_rcu(); >> + node_set(node, memtier->nodelist); >> + rcu_assign_pointer(pgdat->memtier, memtier); >> +} >> + >> static int __node_create_and_set_memory_tier(int node, int tier) >> { >> int ret = 0; >> @@ -162,7 +195,7 @@ static int __node_create_and_set_memory_tier(int node, int tier) >> goto out; >> } >> } >> - node_set(node, memtier->nodelist); >> + memtier_node_set(node, memtier); >> out: >> return ret; >> } >> @@ -184,14 +217,7 @@ int node_create_and_set_memory_tier(int node, int tier) >> if (current_tier->id == tier) >> goto out; >> >> - node_clear(node, current_tier->nodelist); >> - >> ret = __node_create_and_set_memory_tier(node, tier); >> - if (ret) { >> - /* reset it back to older tier */ >> - node_set(node, current_tier->nodelist); >> - goto out; >> - } >> if (nodes_empty(current_tier->nodelist)) >> unregister_memory_tier(current_tier); >> >> @@ -213,7 +239,7 @@ static int __node_set_memory_tier(int node, int tier) >> ret = -EINVAL; >> goto out; >> } >> - node_set(node, memtier->nodelist); >> + memtier_node_set(node, memtier); >> out: >> return ret; >> } >> @@ -428,6 +454,7 @@ static void __init migrate_on_reclaim_init(void) >> >> static int __init memory_tier_init(void) >> { >> + int node; >> struct memory_tier *memtier; >> >> /* >> @@ -444,7 +471,10 @@ static int __init memory_tier_init(void) >> __func__, PTR_ERR(memtier)); >> >> /* CPU only nodes are not part of memory tiers. */ >> - memtier->nodelist = node_states[N_MEMORY]; >> + for_each_node_state(node, N_MEMORY) { >> + rcu_assign_pointer(NODE_DATA(node)->memtier, memtier); >> + node_set(node, memtier->nodelist); > > Similar comment here - the order seems opposite to what I'd expect. > Shouldn't memtier->nodelist be fully initialised prior to making it > visible with rcu_assign_pointer()? Will fix this. This is early during boot. So the ordering won't impact correctness. Hence i can skip the smp_wmb()? > >> + } >> mutex_unlock(&memory_tier_lock); >> >> migrate_on_reclaim_init();
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > Alistair Popple <apopple@nvidia.com> writes: > >> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >> >>> Also update different helpes to use NODE_DATA()->memtier. Since >>> node specific memtier can change based on the reassignment of >>> NUMA node to a different memory tiers, accessing NODE_DATA()->memtier >>> needs to happen under an rcu read lock or memory_tier_lock. >>> >>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> --- >>> include/linux/mmzone.h | 3 ++ >>> mm/memory-tiers.c | 64 +++++++++++++++++++++++++++++++----------- >>> 2 files changed, 50 insertions(+), 17 deletions(-) >>> >>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h >>> index aab70355d64f..353812495a70 100644 >>> --- a/include/linux/mmzone.h >>> +++ b/include/linux/mmzone.h >>> @@ -928,6 +928,9 @@ typedef struct pglist_data { >>> /* Per-node vmstats */ >>> struct per_cpu_nodestat __percpu *per_cpu_nodestats; >>> atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; >>> +#ifdef CONFIG_NUMA >>> + struct memory_tier __rcu *memtier; >>> +#endif >>> } pg_data_t; >>> >>> #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) >>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >>> index e951f54ce56c..bab4700bf58d 100644 >>> --- a/mm/memory-tiers.c >>> +++ b/mm/memory-tiers.c >>> @@ -7,6 +7,7 @@ >>> #include <linux/moduleparam.h> >>> #include <linux/memory.h> >>> #include <linux/random.h> >>> +#include <linux/rcupdate.h> >>> #include <linux/memory-tiers.h> >>> >>> #include "internal.h" >>> @@ -124,18 +125,23 @@ static struct memory_tier *register_memory_tier(unsigned int tier) >>> static void unregister_memory_tier(struct memory_tier *memtier) >>> { >>> list_del(&memtier->list); >>> - kfree(memtier); >>> + kfree_rcu(memtier); >>> } >>> >>> static struct memory_tier *__node_get_memory_tier(int node) >>> { >>> - struct memory_tier *memtier; >>> + pg_data_t *pgdat; >>> >>> - list_for_each_entry(memtier, &memory_tiers, list) { >>> - if (node_isset(node, memtier->nodelist)) >>> - return memtier; >>> - } >>> - return NULL; >>> + pgdat = NODE_DATA(node); >>> + if (!pgdat) >>> + return NULL; >>> + /* >>> + * Since we hold memory_tier_lock, we can avoid >>> + * RCU read locks when accessing the details. No >>> + * parallel updates are possible here. >>> + */ >>> + return rcu_dereference_check(pgdat->memtier, >>> + lockdep_is_held(&memory_tier_lock)); >>> } >>> >>> static struct memory_tier *__get_memory_tier_from_id(int id) >>> @@ -149,6 +155,33 @@ static struct memory_tier *__get_memory_tier_from_id(int id) >>> return NULL; >>> } >>> >>> +/* >>> + * Called with memory_tier_lock. Hence the device references cannot >>> + * be dropped during this function. >>> + */ >>> +static void memtier_node_set(int node, struct memory_tier *memtier) >>> +{ >>> + pg_data_t *pgdat; >>> + struct memory_tier *current_memtier; >>> + >>> + pgdat = NODE_DATA(node); >>> + if (!pgdat) >>> + return; >>> + /* >>> + * Make sure we mark the memtier NULL before we assign the new memory tier >>> + * to the NUMA node. This make sure that anybody looking at NODE_DATA >>> + * finds a NULL memtier or the one which is still valid. >>> + */ >>> + current_memtier = rcu_dereference_check(pgdat->memtier, >>> + lockdep_is_held(&memory_tier_lock)); >>> + rcu_assign_pointer(pgdat->memtier, NULL); >>> + if (current_memtier) >>> + node_clear(node, current_memtier->nodelist); >> >> It seems odd to me that you would update the current memtier prior to >> the synchronize_rcu(). I suppose it's really memory_tier_lock that >> protects the details like ->nodelist, but is there any reason not do the >> update after anyway? > > The synchronize_rcu ensures that the lockless read of pgdat->memtier > either see value NULL or a stable memtier which got current numa node in > its nodelist. IIUC what you are suggesting is we should move the > node_clear after synchronize_rcu?. I am also wondering whether I need > a smp_wmb()? rcu_assign_pointer() includes the appropriate barriers to ensure any initialisation will be visible, so I don't believe you need smp_wmb(). > pgdat->memtier = NULL; > synchronize_rcu > remove node from memtier; > set node in new memtier > smp_wmb(); > pgdat->memtier = new memtier; Yeah, that's what I was suggesting. Although to be clear I don't think there was actually a correctness issue with what you had, because memtier->nodelist is protected by memory_tier_lock anyway and not accessed under the rcu_read_lock(). It just looked a little odd IMHO to be updating something that was still potentially being used prior to synchronize_rcu() completing. >> >>> + synchronize_rcu(); >>> + node_set(node, memtier->nodelist); >>> + rcu_assign_pointer(pgdat->memtier, memtier); >>> +} >>> + >>> static int __node_create_and_set_memory_tier(int node, int tier) >>> { >>> int ret = 0; >>> @@ -162,7 +195,7 @@ static int __node_create_and_set_memory_tier(int node, int tier) >>> goto out; >>> } >>> } >>> - node_set(node, memtier->nodelist); >>> + memtier_node_set(node, memtier); >>> out: >>> return ret; >>> } >>> @@ -184,14 +217,7 @@ int node_create_and_set_memory_tier(int node, int tier) >>> if (current_tier->id == tier) >>> goto out; >>> >>> - node_clear(node, current_tier->nodelist); >>> - >>> ret = __node_create_and_set_memory_tier(node, tier); >>> - if (ret) { >>> - /* reset it back to older tier */ >>> - node_set(node, current_tier->nodelist); >>> - goto out; >>> - } >>> if (nodes_empty(current_tier->nodelist)) >>> unregister_memory_tier(current_tier); >>> >>> @@ -213,7 +239,7 @@ static int __node_set_memory_tier(int node, int tier) >>> ret = -EINVAL; >>> goto out; >>> } >>> - node_set(node, memtier->nodelist); >>> + memtier_node_set(node, memtier); >>> out: >>> return ret; >>> } >>> @@ -428,6 +454,7 @@ static void __init migrate_on_reclaim_init(void) >>> >>> static int __init memory_tier_init(void) >>> { >>> + int node; >>> struct memory_tier *memtier; >>> >>> /* >>> @@ -444,7 +471,10 @@ static int __init memory_tier_init(void) >>> __func__, PTR_ERR(memtier)); >>> >>> /* CPU only nodes are not part of memory tiers. */ >>> - memtier->nodelist = node_states[N_MEMORY]; >>> + for_each_node_state(node, N_MEMORY) { >>> + rcu_assign_pointer(NODE_DATA(node)->memtier, memtier); >>> + node_set(node, memtier->nodelist); >> >> Similar comment here - the order seems opposite to what I'd expect. >> Shouldn't memtier->nodelist be fully initialised prior to making it >> visible with rcu_assign_pointer()? > > Will fix this. This is early during boot. So the ordering won't impact > correctness. Hence i can skip the smp_wmb()? Yeah, rcu_assign_pointer() should include appropriate barriers anyway. >> >>> + } >>> mutex_unlock(&memory_tier_lock); >>> >>> migrate_on_reclaim_init();
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aab70355d64f..353812495a70 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -928,6 +928,9 @@ typedef struct pglist_data { /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA + struct memory_tier __rcu *memtier; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index e951f54ce56c..bab4700bf58d 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -7,6 +7,7 @@ #include <linux/moduleparam.h> #include <linux/memory.h> #include <linux/random.h> +#include <linux/rcupdate.h> #include <linux/memory-tiers.h> #include "internal.h" @@ -124,18 +125,23 @@ static struct memory_tier *register_memory_tier(unsigned int tier) static void unregister_memory_tier(struct memory_tier *memtier) { list_del(&memtier->list); - kfree(memtier); + kfree_rcu(memtier); } static struct memory_tier *__node_get_memory_tier(int node) { - struct memory_tier *memtier; + pg_data_t *pgdat; - list_for_each_entry(memtier, &memory_tiers, list) { - if (node_isset(node, memtier->nodelist)) - return memtier; - } - return NULL; + pgdat = NODE_DATA(node); + if (!pgdat) + return NULL; + /* + * Since we hold memory_tier_lock, we can avoid + * RCU read locks when accessing the details. No + * parallel updates are possible here. + */ + return rcu_dereference_check(pgdat->memtier, + lockdep_is_held(&memory_tier_lock)); } static struct memory_tier *__get_memory_tier_from_id(int id) @@ -149,6 +155,33 @@ static struct memory_tier *__get_memory_tier_from_id(int id) return NULL; } +/* + * Called with memory_tier_lock. Hence the device references cannot + * be dropped during this function. + */ +static void memtier_node_set(int node, struct memory_tier *memtier) +{ + pg_data_t *pgdat; + struct memory_tier *current_memtier; + + pgdat = NODE_DATA(node); + if (!pgdat) + return; + /* + * Make sure we mark the memtier NULL before we assign the new memory tier + * to the NUMA node. This make sure that anybody looking at NODE_DATA + * finds a NULL memtier or the one which is still valid. + */ + current_memtier = rcu_dereference_check(pgdat->memtier, + lockdep_is_held(&memory_tier_lock)); + rcu_assign_pointer(pgdat->memtier, NULL); + if (current_memtier) + node_clear(node, current_memtier->nodelist); + synchronize_rcu(); + node_set(node, memtier->nodelist); + rcu_assign_pointer(pgdat->memtier, memtier); +} + static int __node_create_and_set_memory_tier(int node, int tier) { int ret = 0; @@ -162,7 +195,7 @@ static int __node_create_and_set_memory_tier(int node, int tier) goto out; } } - node_set(node, memtier->nodelist); + memtier_node_set(node, memtier); out: return ret; } @@ -184,14 +217,7 @@ int node_create_and_set_memory_tier(int node, int tier) if (current_tier->id == tier) goto out; - node_clear(node, current_tier->nodelist); - ret = __node_create_and_set_memory_tier(node, tier); - if (ret) { - /* reset it back to older tier */ - node_set(node, current_tier->nodelist); - goto out; - } if (nodes_empty(current_tier->nodelist)) unregister_memory_tier(current_tier); @@ -213,7 +239,7 @@ static int __node_set_memory_tier(int node, int tier) ret = -EINVAL; goto out; } - node_set(node, memtier->nodelist); + memtier_node_set(node, memtier); out: return ret; } @@ -428,6 +454,7 @@ static void __init migrate_on_reclaim_init(void) static int __init memory_tier_init(void) { + int node; struct memory_tier *memtier; /* @@ -444,7 +471,10 @@ static int __init memory_tier_init(void) __func__, PTR_ERR(memtier)); /* CPU only nodes are not part of memory tiers. */ - memtier->nodelist = node_states[N_MEMORY]; + for_each_node_state(node, N_MEMORY) { + rcu_assign_pointer(NODE_DATA(node)->memtier, memtier); + node_set(node, memtier->nodelist); + } mutex_unlock(&memory_tier_lock); migrate_on_reclaim_init();
Also update different helpes to use NODE_DATA()->memtier. Since node specific memtier can change based on the reassignment of NUMA node to a different memory tiers, accessing NODE_DATA()->memtier needs to happen under an rcu read lock or memory_tier_lock. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- include/linux/mmzone.h | 3 ++ mm/memory-tiers.c | 64 +++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 17 deletions(-)