Message ID | 20250408073243.488-4-rakie.kim@sk.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Enhance sysfs handling for memory hotplug in weighted interleave | expand |
On Tue, 8 Apr 2025 16:32:42 +0900 Rakie Kim <rakie.kim@sk.com> wrote: Hi Rakie, Looks good to me as well : -) Thank you for working on this! Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com> > The weighted interleave policy distributes page allocations across multiple > NUMA nodes based on their performance weight, thereby improving memory > bandwidth utilization. The weight values for each node are configured > through sysfs. > > Previously, sysfs entries for configuring weighted interleave were created > for all possible nodes (N_POSSIBLE) at initialization, including nodes that > might not have memory. However, not all nodes in N_POSSIBLE are usable at > runtime, as some may remain memoryless or offline. > This led to sysfs entries being created for unusable nodes, causing > potential misconfiguration issues. > > To address this issue, this patch modifies the sysfs creation logic to: > 1) Limit sysfs entries to nodes that are online and have memory, avoiding > the creation of sysfs entries for nodes that cannot be used. > 2) Support memory hotplug by dynamically adding and removing sysfs entries > based on whether a node transitions into or out of the N_MEMORY state. > > Additionally, the patch ensures that sysfs attributes are properly managed > when nodes go offline, preventing stale or redundant entries from persisting > in the system. > > By making these changes, the weighted interleave policy now manages its > sysfs entries more efficiently, ensuring that only relevant nodes are > considered for interleaving, and dynamically adapting to memory hotplug > events. > > Signed-off-by: Rakie Kim <rakie.kim@sk.com> > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> > Reviewed-by: Oscar Salvador <osalvador@suse.de> > --- > mm/mempolicy.c | 106 ++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 83 insertions(+), 23 deletions(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 988575f29c53..9aa884107f4c 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -113,6 +113,7 @@ > #include <asm/tlbflush.h> > #include <asm/tlb.h> > #include <linux/uaccess.h> > +#include <linux/memory.h> > > #include "internal.h" > > @@ -3421,6 +3422,7 @@ struct iw_node_attr { > > struct sysfs_wi_group { > struct kobject wi_kobj; > + struct mutex kobj_lock; > struct iw_node_attr *nattrs[]; > }; > > @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, > > static void sysfs_wi_node_delete(int nid) > { > - if (!wi_group->nattrs[nid]) > + struct iw_node_attr *attr; > + > + if (nid < 0 || nid >= nr_node_ids) > + return; > + > + mutex_lock(&wi_group->kobj_lock); > + attr = wi_group->nattrs[nid]; > + if (!attr) { > + mutex_unlock(&wi_group->kobj_lock); > return; > + } > + > + wi_group->nattrs[nid] = NULL; > + mutex_unlock(&wi_group->kobj_lock); > > - sysfs_remove_file(&wi_group->wi_kobj, > - &wi_group->nattrs[nid]->kobj_attr.attr); > - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > - kfree(wi_group->nattrs[nid]); > + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); > + kfree(attr->kobj_attr.attr.name); > + kfree(attr); > } > > static void sysfs_wi_release(struct kobject *wi_kobj) > @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { > > static int sysfs_wi_node_add(int nid) > { > - struct iw_node_attr *node_attr; > + int ret = 0; > char *name; > + struct iw_node_attr *new_attr = NULL; > > - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); > - if (!node_attr) > + if (nid < 0 || nid >= nr_node_ids) { > + pr_err("Invalid node id: %d\n", nid); > + return -EINVAL; > + } > + > + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); > + if (!new_attr) > return -ENOMEM; > > name = kasprintf(GFP_KERNEL, "node%d", nid); > if (!name) { > - kfree(node_attr); > + kfree(new_attr); > return -ENOMEM; > } > > - sysfs_attr_init(&node_attr->kobj_attr.attr); > - node_attr->kobj_attr.attr.name = name; > - node_attr->kobj_attr.attr.mode = 0644; > - node_attr->kobj_attr.show = node_show; > - node_attr->kobj_attr.store = node_store; > - node_attr->nid = nid; > + mutex_lock(&wi_group->kobj_lock); > + if (wi_group->nattrs[nid]) { > + mutex_unlock(&wi_group->kobj_lock); > + pr_info("Node [%d] already exists\n", nid); > + kfree(new_attr); > + kfree(name); > + return 0; > + } > + wi_group->nattrs[nid] = new_attr; > > - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { > - kfree(node_attr->kobj_attr.attr.name); > - kfree(node_attr); > - pr_err("failed to add attribute to weighted_interleave\n"); > - return -ENOMEM; > + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); > + wi_group->nattrs[nid]->kobj_attr.attr.name = name; > + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; > + wi_group->nattrs[nid]->kobj_attr.show = node_show; > + wi_group->nattrs[nid]->kobj_attr.store = node_store; > + wi_group->nattrs[nid]->nid = nid; > + > + ret = sysfs_create_file(&wi_group->wi_kobj, > + &wi_group->nattrs[nid]->kobj_attr.attr); > + if (ret) { > + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > + kfree(wi_group->nattrs[nid]); > + wi_group->nattrs[nid] = NULL; > + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); > } > + mutex_unlock(&wi_group->kobj_lock); > > - wi_group->nattrs[nid] = node_attr; > - return 0; > + return ret; > +} > + > +static int wi_node_notifier(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + int err; > + struct memory_notify *arg = data; > + int nid = arg->status_change_nid; > + > + if (nid < 0) > + return NOTIFY_OK; > + > + switch(action) { > + case MEM_ONLINE: > + err = sysfs_wi_node_add(nid); > + if (err) > + pr_err("failed to add sysfs [node%d]\n", nid); > + break; > + case MEM_OFFLINE: > + sysfs_wi_node_delete(nid); > + break; > + } > + > + return NOTIFY_OK; > } > > static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) > @@ -3534,13 +3589,17 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) > GFP_KERNEL); > if (!wi_group) > return -ENOMEM; > + mutex_init(&wi_group->kobj_lock); > > err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, > "weighted_interleave"); > if (err) > goto err_put_kobj; > > - for_each_node_state(nid, N_POSSIBLE) { > + for_each_online_node(nid) { > + if (!node_state(nid, N_MEMORY)) > + continue; > + > err = sysfs_wi_node_add(nid); > if (err) { > pr_err("failed to add sysfs [node%d]\n", nid); > @@ -3548,6 +3607,7 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) > } > } > > + hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); > return 0; > > err_del_kobj: > -- > 2.34.1 Sent using hkml (https://github.com/sjp38/hackermail)
On Tue, Apr 08, 2025 at 04:32:42PM +0900, Rakie Kim wrote: > The weighted interleave policy distributes page allocations across multiple > NUMA nodes based on their performance weight, thereby improving memory > bandwidth utilization. The weight values for each node are configured > through sysfs. > > Previously, sysfs entries for configuring weighted interleave were created > for all possible nodes (N_POSSIBLE) at initialization, including nodes that > might not have memory. However, not all nodes in N_POSSIBLE are usable at > runtime, as some may remain memoryless or offline. > This led to sysfs entries being created for unusable nodes, causing > potential misconfiguration issues. > > To address this issue, this patch modifies the sysfs creation logic to: > 1) Limit sysfs entries to nodes that are online and have memory, avoiding > the creation of sysfs entries for nodes that cannot be used. > 2) Support memory hotplug by dynamically adding and removing sysfs entries > based on whether a node transitions into or out of the N_MEMORY state. > > Additionally, the patch ensures that sysfs attributes are properly managed > when nodes go offline, preventing stale or redundant entries from persisting > in the system. > > By making these changes, the weighted interleave policy now manages its > sysfs entries more efficiently, ensuring that only relevant nodes are > considered for interleaving, and dynamically adapting to memory hotplug > events. > > Signed-off-by: Rakie Kim <rakie.kim@sk.com> > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> > Reviewed-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Gregory Price <gourry@gourry.net>
On 08.04.25 09:32, Rakie Kim wrote: > The weighted interleave policy distributes page allocations across multiple > NUMA nodes based on their performance weight, thereby improving memory > bandwidth utilization. The weight values for each node are configured > through sysfs. > > Previously, sysfs entries for configuring weighted interleave were created > for all possible nodes (N_POSSIBLE) at initialization, including nodes that > might not have memory. However, not all nodes in N_POSSIBLE are usable at > runtime, as some may remain memoryless or offline. > This led to sysfs entries being created for unusable nodes, causing > potential misconfiguration issues. > > To address this issue, this patch modifies the sysfs creation logic to: > 1) Limit sysfs entries to nodes that are online and have memory, avoiding > the creation of sysfs entries for nodes that cannot be used. > 2) Support memory hotplug by dynamically adding and removing sysfs entries > based on whether a node transitions into or out of the N_MEMORY state. > > Additionally, the patch ensures that sysfs attributes are properly managed > when nodes go offline, preventing stale or redundant entries from persisting > in the system. > > By making these changes, the weighted interleave policy now manages its > sysfs entries more efficiently, ensuring that only relevant nodes are > considered for interleaving, and dynamically adapting to memory hotplug > events. > > Signed-off-by: Rakie Kim <rakie.kim@sk.com> > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> Why are the other SOF in there? Are there Co-developed-by missing? Acked-by: David Hildenbrand <david@redhat.com>
Hi David, On 4/9/2025 6:05 PM, David Hildenbrand wrote: > On 08.04.25 09:32, Rakie Kim wrote: >> The weighted interleave policy distributes page allocations across multiple >> NUMA nodes based on their performance weight, thereby improving memory >> bandwidth utilization. The weight values for each node are configured >> through sysfs. >> >> Previously, sysfs entries for configuring weighted interleave were created >> for all possible nodes (N_POSSIBLE) at initialization, including nodes that >> might not have memory. However, not all nodes in N_POSSIBLE are usable at >> runtime, as some may remain memoryless or offline. >> This led to sysfs entries being created for unusable nodes, causing >> potential misconfiguration issues. >> >> To address this issue, this patch modifies the sysfs creation logic to: >> 1) Limit sysfs entries to nodes that are online and have memory, avoiding >> the creation of sysfs entries for nodes that cannot be used. >> 2) Support memory hotplug by dynamically adding and removing sysfs entries >> based on whether a node transitions into or out of the N_MEMORY state. >> >> Additionally, the patch ensures that sysfs attributes are properly managed >> when nodes go offline, preventing stale or redundant entries from persisting >> in the system. >> >> By making these changes, the weighted interleave policy now manages its >> sysfs entries more efficiently, ensuring that only relevant nodes are >> considered for interleaving, and dynamically adapting to memory hotplug >> events. >> >> Signed-off-by: Rakie Kim <rakie.kim@sk.com> >> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> > > > Why are the other SOF in there? Are there Co-developed-by missing? I initially found the problem and fixed it with my internal implementation but Rakie also had his idea so he started working on it. His initial implementation has almost been similar to mine. I thought Signed-off-by is a way to express the patch series contains our contribution, but if you think it's unusual, then I can add this. Co-developed-by: Honggyu Kim <honggyu.kim@sk.com> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> For Yunjeong, the following can be added. Tested-by: Yunjeong Mun <yunjeong.mun@sk.com> However, this patch series is already in Andrew's mm-new so I don't want to bother him again unless we need to update this patches for other reasons. Is this okay? Thanks, Honggyu > > > Acked-by: David Hildenbrand <david@redhat.com> >
On 09.04.25 13:39, Honggyu Kim wrote: > Hi David, > > On 4/9/2025 6:05 PM, David Hildenbrand wrote: >> On 08.04.25 09:32, Rakie Kim wrote: >>> The weighted interleave policy distributes page allocations across multiple >>> NUMA nodes based on their performance weight, thereby improving memory >>> bandwidth utilization. The weight values for each node are configured >>> through sysfs. >>> >>> Previously, sysfs entries for configuring weighted interleave were created >>> for all possible nodes (N_POSSIBLE) at initialization, including nodes that >>> might not have memory. However, not all nodes in N_POSSIBLE are usable at >>> runtime, as some may remain memoryless or offline. >>> This led to sysfs entries being created for unusable nodes, causing >>> potential misconfiguration issues. >>> >>> To address this issue, this patch modifies the sysfs creation logic to: >>> 1) Limit sysfs entries to nodes that are online and have memory, avoiding >>> the creation of sysfs entries for nodes that cannot be used. >>> 2) Support memory hotplug by dynamically adding and removing sysfs entries >>> based on whether a node transitions into or out of the N_MEMORY state. >>> >>> Additionally, the patch ensures that sysfs attributes are properly managed >>> when nodes go offline, preventing stale or redundant entries from persisting >>> in the system. >>> >>> By making these changes, the weighted interleave policy now manages its >>> sysfs entries more efficiently, ensuring that only relevant nodes are >>> considered for interleaving, and dynamically adapting to memory hotplug >>> events. >>> >>> Signed-off-by: Rakie Kim <rakie.kim@sk.com> >>> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >>> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> >> >> >> Why are the other SOF in there? Are there Co-developed-by missing? > > I initially found the problem and fixed it with my internal implementation but > Rakie also had his idea so he started working on it. His initial implementation > has almost been similar to mine. > > I thought Signed-off-by is a way to express the patch series contains our > contribution, but if you think it's unusual, then I can add this. Please see Documentation/process/submitting-patches.rst, and note that these are not "patch delivery" SOB. " The Signed-off-by: tag indicates that the signer was involved in the development of the patch, or that he/she was in the patch's delivery path. " and " Co-developed-by: states that the patch was co-created by multiple developers; it is used to give attribution to co-authors (in addition to the author attributed by the From: tag) when several people work on a single patch. Since Co-developed-by: denotes authorship, every Co-developed-by: must be immediately followed by a Signed-off-by: of the associated co-author. Standard sign-off procedure applies, i.e. the ordering of Signed-off-by: tags should reflect the chronological history of the patch insofar as possible, regardless of whether the author is attributed via From: or Co-developed-by:. Notably, the last Signed-off-by: must always be that of the developer submitting the patch. " The SOB order here is also not correct. > > Co-developed-by: Honggyu Kim <honggyu.kim@sk.com> > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > > For Yunjeong, the following can be added. > > Tested-by: Yunjeong Mun <yunjeong.mun@sk.com> That is probably the right thing to do if contribution was focused on testing. > > However, this patch series is already in Andrew's mm-new so I don't want to > bother him again unless we need to update this patches for other reasons. mm-new is exactly for these kind of things. We can ask Andrew to fix it up.
On Wed, 9 Apr 2025 13:52:28 +0200 David Hildenbrand <david@redhat.com> wrote: > On 09.04.25 13:39, Honggyu Kim wrote: > > Hi David, > > > > On 4/9/2025 6:05 PM, David Hildenbrand wrote: > >> On 08.04.25 09:32, Rakie Kim wrote: > >>> The weighted interleave policy distributes page allocations across multiple > >>> NUMA nodes based on their performance weight, thereby improving memory > >>> bandwidth utilization. The weight values for each node are configured > >>> through sysfs. > >>> > >>> Previously, sysfs entries for configuring weighted interleave were created > >>> for all possible nodes (N_POSSIBLE) at initialization, including nodes that > >>> might not have memory. However, not all nodes in N_POSSIBLE are usable at > >>> runtime, as some may remain memoryless or offline. > >>> This led to sysfs entries being created for unusable nodes, causing > >>> potential misconfiguration issues. > >>> > >>> To address this issue, this patch modifies the sysfs creation logic to: > >>> 1) Limit sysfs entries to nodes that are online and have memory, avoiding > >>> the creation of sysfs entries for nodes that cannot be used. > >>> 2) Support memory hotplug by dynamically adding and removing sysfs entries > >>> based on whether a node transitions into or out of the N_MEMORY state. > >>> > >>> Additionally, the patch ensures that sysfs attributes are properly managed > >>> when nodes go offline, preventing stale or redundant entries from persisting > >>> in the system. > >>> > >>> By making these changes, the weighted interleave policy now manages its > >>> sysfs entries more efficiently, ensuring that only relevant nodes are > >>> considered for interleaving, and dynamically adapting to memory hotplug > >>> events. > >>> > >>> Signed-off-by: Rakie Kim <rakie.kim@sk.com> > >>> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > >>> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> > >> > >> > >> Why are the other SOF in there? Are there Co-developed-by missing? > > > > I initially found the problem and fixed it with my internal implementation but > > Rakie also had his idea so he started working on it. His initial implementation > > has almost been similar to mine. > > > > I thought Signed-off-by is a way to express the patch series contains our > > contribution, but if you think it's unusual, then I can add this. > > Please see Documentation/process/submitting-patches.rst, and note that these > are not "patch delivery" SOB. > > " > The Signed-off-by: tag indicates that the signer was involved in the > development of the patch, or that he/she was in the patch's delivery path. > " > > and > > " > Co-developed-by: states that the patch was co-created by multiple developers; > it is used to give attribution to co-authors (in addition to the author > attributed by the From: tag) when several people work on a single patch. Since > Co-developed-by: denotes authorship, every Co-developed-by: must be immediately > followed by a Signed-off-by: of the associated co-author. Standard sign-off > procedure applies, i.e. the ordering of Signed-off-by: tags should reflect the > chronological history of the patch insofar as possible, regardless of whether > the author is attributed via From: or Co-developed-by:. Notably, the last > Signed-off-by: must always be that of the developer submitting the patch. > " > > The SOB order here is also not correct. > > > > > Co-developed-by: Honggyu Kim <honggyu.kim@sk.com> > > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > > > > For Yunjeong, the following can be added. > > > > Tested-by: Yunjeong Mun <yunjeong.mun@sk.com> > > That is probably the right thing to do if contribution was focused on testing. > > > > > However, this patch series is already in Andrew's mm-new so I don't want to > > bother him again unless we need to update this patches for other reasons. > > mm-new is exactly for these kind of things. We can ask Andrew to fix it up. > > -- > Cheers, > > David / dhildenb > Hi David, Thank you for reviewing this patch series and providing your Acked-by tag. As you pointed out, I agree that the Signed-off-by tags in this patch series are not clearly aligned with the actual contributions. Coincidentally, Dan Williams has requested an additional fix for Patch 1 in this series. Therefore, I am planning to prepare a new version, v8. In that version, I will reorganize the Signed-off-by tags as you suggested to accurately reflect the authorship and contributions. Thank you again for your guidance. Rakie
Hi David, On 4/9/2025 8:52 PM, David Hildenbrand wrote: > On 09.04.25 13:39, Honggyu Kim wrote: >> Hi David, >> >> On 4/9/2025 6:05 PM, David Hildenbrand wrote: >>> On 08.04.25 09:32, Rakie Kim wrote: [...snip...] >>>> Signed-off-by: Rakie Kim <rakie.kim@sk.com> >>>> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >>>> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> >>> >>> Why are the other SOF in there? Are there Co-developed-by missing? >> >> I initially found the problem and fixed it with my internal implementation but >> Rakie also had his idea so he started working on it. His initial implementation >> has almost been similar to mine. >> >> I thought Signed-off-by is a way to express the patch series contains our >> contribution, but if you think it's unusual, then I can add this. > > Please see Documentation/process/submitting-patches.rst, Thanks for the info. > and note that these are not "patch delivery" SOB. > > " > The Signed-off-by: tag indicates that the signer was involved in the > development of the patch, or that he/she was in the patch's delivery path. Yunjeong and I have been involved in finding the problem and also concluded this issue is related to hotplug together with our initial implementations before this patch. So I guess it is the former case. > " > > and > > " > Co-developed-by: states that the patch was co-created by multiple developers; > it is used to give attribution to co-authors (in addition to the author > attributed by the From: tag) when several people work on a single patch. Since > Co-developed-by: denotes authorship, every Co-developed-by: must be immediately > followed by a Signed-off-by: of the associated co-author. Standard sign-off So the Co-developed-by comes before Signed-off-by. > procedure applies, i.e. the ordering of Signed-off-by: tags should reflect the > chronological history of the patch insofar as possible, regardless of whether > the author is attributed via From: or Co-developed-by:. Notably, the last > Signed-off-by: must always be that of the developer submitting the patch. > " > > The SOB order here is also not correct. It looks the below order is correct. I saw this order in the official document example as well. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.15-rc1#n516 >> Co-developed-by: Honggyu Kim <honggyu.kim@sk.com> >> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >> >> For Yunjeong, the following can be added. >> >> Tested-by: Yunjeong Mun <yunjeong.mun@sk.com> > > That is probably the right thing to do if contribution was focused on testing. > >> >> However, this patch series is already in Andrew's mm-new so I don't want to >> bother him again unless we need to update this patches for other reasons. > > mm-new is exactly for these kind of things. We can ask Andrew to fix it up. Rakie already asked him and he will update signinig tags at the next spin. Thanks very much for your help! Thanks, Honggyu
On 10.04.25 15:25, Honggyu Kim wrote: > Hi David, > > On 4/9/2025 8:52 PM, David Hildenbrand wrote: >> On 09.04.25 13:39, Honggyu Kim wrote: >>> Hi David, >>> >>> On 4/9/2025 6:05 PM, David Hildenbrand wrote: >>>> On 08.04.25 09:32, Rakie Kim wrote: > [...snip...] >>>>> Signed-off-by: Rakie Kim <rakie.kim@sk.com> >>>>> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >>>>> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> >>>> >>>> Why are the other SOF in there? Are there Co-developed-by missing? >>> >>> I initially found the problem and fixed it with my internal implementation but >>> Rakie also had his idea so he started working on it. His initial implementation >>> has almost been similar to mine. >>> >>> I thought Signed-off-by is a way to express the patch series contains our >>> contribution, but if you think it's unusual, then I can add this. >> >> Please see Documentation/process/submitting-patches.rst, > > Thanks for the info. > >> and note that these are not "patch delivery" SOB. >> >> " >> The Signed-off-by: tag indicates that the signer was involved in the >> development of the patch, or that he/she was in the patch's delivery path. > > Yunjeong and I have been involved in finding the problem and also concluded this > issue is related to hotplug together with our initial implementations before > this patch. So I guess it is the former case. IIRC, usually we use Co-developed-by + SOB only if there are actual code contributions: when you would consider someone a "co-author". "Co-developed-by: denotes authorship" For suggestions we use Suggested-by, and for things that popped up during a review, it's usually a good idea that reviewers supply a Reviewed-by at the end. So I guess Co-developed-by + SOB is appropriate if people consider themselves co-authors, in addition to the main author. > >> " >> >> and >> >> " >> Co-developed-by: states that the patch was co-created by multiple developers; >> it is used to give attribution to co-authors (in addition to the author >> attributed by the From: tag) when several people work on a single patch. Since >> Co-developed-by: denotes authorship, every Co-developed-by: must be immediately >> followed by a Signed-off-by: of the associated co-author. Standard sign-off > > So the Co-developed-by comes before Signed-off-by. Yes.
On Tue, 8 Apr 2025 16:32:42 +0900 Rakie Kim <rakie.kim@sk.com> wrote: > The weighted interleave policy distributes page allocations across multiple > NUMA nodes based on their performance weight, thereby improving memory > bandwidth utilization. The weight values for each node are configured > through sysfs. > > Previously, sysfs entries for configuring weighted interleave were created > for all possible nodes (N_POSSIBLE) at initialization, including nodes that > might not have memory. However, not all nodes in N_POSSIBLE are usable at > runtime, as some may remain memoryless or offline. > This led to sysfs entries being created for unusable nodes, causing > potential misconfiguration issues. > > To address this issue, this patch modifies the sysfs creation logic to: > 1) Limit sysfs entries to nodes that are online and have memory, avoiding > the creation of sysfs entries for nodes that cannot be used. > 2) Support memory hotplug by dynamically adding and removing sysfs entries > based on whether a node transitions into or out of the N_MEMORY state. > > Additionally, the patch ensures that sysfs attributes are properly managed > when nodes go offline, preventing stale or redundant entries from persisting > in the system. > > By making these changes, the weighted interleave policy now manages its > sysfs entries more efficiently, ensuring that only relevant nodes are > considered for interleaving, and dynamically adapting to memory hotplug > events. > > Signed-off-by: Rakie Kim <rakie.kim@sk.com> > Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> > Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> > Reviewed-by: Oscar Salvador <osalvador@suse.de> > --- > mm/mempolicy.c | 106 ++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 83 insertions(+), 23 deletions(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 988575f29c53..9aa884107f4c 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -113,6 +113,7 @@ > #include <asm/tlbflush.h> > #include <asm/tlb.h> > #include <linux/uaccess.h> > +#include <linux/memory.h> > > #include "internal.h" > > @@ -3421,6 +3422,7 @@ struct iw_node_attr { > > struct sysfs_wi_group { > struct kobject wi_kobj; > + struct mutex kobj_lock; > struct iw_node_attr *nattrs[]; > }; > > @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, > > static void sysfs_wi_node_delete(int nid) > { > - if (!wi_group->nattrs[nid]) > + struct iw_node_attr *attr; > + > + if (nid < 0 || nid >= nr_node_ids) > + return; > + > + mutex_lock(&wi_group->kobj_lock); > + attr = wi_group->nattrs[nid]; > + if (!attr) { > + mutex_unlock(&wi_group->kobj_lock); > return; > + } > + > + wi_group->nattrs[nid] = NULL; > + mutex_unlock(&wi_group->kobj_lock); > > - sysfs_remove_file(&wi_group->wi_kobj, > - &wi_group->nattrs[nid]->kobj_attr.attr); > - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > - kfree(wi_group->nattrs[nid]); > + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); > + kfree(attr->kobj_attr.attr.name); > + kfree(attr); Here you go through a careful dance to not touch wi_group->nattrs[nid] except under the lock, but later you are happy to do so in the error handling paths. Maybe better to do similar to here and set it to NULL under the lock but do the freeing on a copy taken under that lock. . > } > > static void sysfs_wi_release(struct kobject *wi_kobj) > @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { > > static int sysfs_wi_node_add(int nid) > { > - struct iw_node_attr *node_attr; > + int ret = 0; Trivial but isn't ret always set when it is used? So no need to initialize here. > char *name; > + struct iw_node_attr *new_attr = NULL; This is also always set before use so I'm not seeing a reason to initialize it to NULL. > > - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); > - if (!node_attr) > + if (nid < 0 || nid >= nr_node_ids) { > + pr_err("Invalid node id: %d\n", nid); > + return -EINVAL; > + } > + > + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); I'd prefer sizeof(*new_attr) because I'm lazy and don't like checking types for allocation sizes :) Local style seems to be a bit of a mix though. > + if (!new_attr) > return -ENOMEM; > > name = kasprintf(GFP_KERNEL, "node%d", nid); > if (!name) { > - kfree(node_attr); > + kfree(new_attr); > return -ENOMEM; > } > > - sysfs_attr_init(&node_attr->kobj_attr.attr); > - node_attr->kobj_attr.attr.name = name; > - node_attr->kobj_attr.attr.mode = 0644; > - node_attr->kobj_attr.show = node_show; > - node_attr->kobj_attr.store = node_store; > - node_attr->nid = nid; > + mutex_lock(&wi_group->kobj_lock); > + if (wi_group->nattrs[nid]) { > + mutex_unlock(&wi_group->kobj_lock); > + pr_info("Node [%d] already exists\n", nid); > + kfree(new_attr); > + kfree(name); > + return 0; > + } > + wi_group->nattrs[nid] = new_attr; > > - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { > - kfree(node_attr->kobj_attr.attr.name); > - kfree(node_attr); > - pr_err("failed to add attribute to weighted_interleave\n"); > - return -ENOMEM; > + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); I'd have been tempted to use the new_attr pointer but perhaps this brings some documentation like advantages. > + wi_group->nattrs[nid]->kobj_attr.attr.name = name; > + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; > + wi_group->nattrs[nid]->kobj_attr.show = node_show; > + wi_group->nattrs[nid]->kobj_attr.store = node_store; > + wi_group->nattrs[nid]->nid = nid; > + > + ret = sysfs_create_file(&wi_group->wi_kobj, > + &wi_group->nattrs[nid]->kobj_attr.attr); > + if (ret) { > + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); See comment above on the rather different handling here to in sysfs_wi_node_delete() where you set it to NULL first, release the lock and tidy up. new_attrand name are still set so you could even combine the handling with the if (wi_group->nattrs[nid]) above via appropriate gotos. > + kfree(wi_group->nattrs[nid]); > + wi_group->nattrs[nid] = NULL; > + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); > } > + mutex_unlock(&wi_group->kobj_lock); > > - wi_group->nattrs[nid] = node_attr; > - return 0; > + return ret; > +}
Hi Jonathan, Thanks for reviewing our patches. I have a few comments and the rest will be addressed by Rakie. On 4/16/2025 1:00 AM, Jonathan Cameron wrote: > On Tue, 8 Apr 2025 16:32:42 +0900 > Rakie Kim <rakie.kim@sk.com> wrote: > >> The weighted interleave policy distributes page allocations across multiple >> NUMA nodes based on their performance weight, thereby improving memory >> bandwidth utilization. The weight values for each node are configured >> through sysfs. >> >> Previously, sysfs entries for configuring weighted interleave were created >> for all possible nodes (N_POSSIBLE) at initialization, including nodes that >> might not have memory. However, not all nodes in N_POSSIBLE are usable at >> runtime, as some may remain memoryless or offline. >> This led to sysfs entries being created for unusable nodes, causing >> potential misconfiguration issues. >> >> To address this issue, this patch modifies the sysfs creation logic to: >> 1) Limit sysfs entries to nodes that are online and have memory, avoiding >> the creation of sysfs entries for nodes that cannot be used. >> 2) Support memory hotplug by dynamically adding and removing sysfs entries >> based on whether a node transitions into or out of the N_MEMORY state. >> >> Additionally, the patch ensures that sysfs attributes are properly managed >> when nodes go offline, preventing stale or redundant entries from persisting >> in the system. >> >> By making these changes, the weighted interleave policy now manages its >> sysfs entries more efficiently, ensuring that only relevant nodes are >> considered for interleaving, and dynamically adapting to memory hotplug >> events. >> >> Signed-off-by: Rakie Kim <rakie.kim@sk.com> >> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com> >> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com> >> Reviewed-by: Oscar Salvador <osalvador@suse.de> >> --- >> mm/mempolicy.c | 106 ++++++++++++++++++++++++++++++++++++++----------- >> 1 file changed, 83 insertions(+), 23 deletions(-) >> >> diff --git a/mm/mempolicy.c b/mm/mempolicy.c >> index 988575f29c53..9aa884107f4c 100644 >> --- a/mm/mempolicy.c >> +++ b/mm/mempolicy.c >> @@ -113,6 +113,7 @@ >> #include <asm/tlbflush.h> >> #include <asm/tlb.h> >> #include <linux/uaccess.h> >> +#include <linux/memory.h> >> >> #include "internal.h" >> >> @@ -3421,6 +3422,7 @@ struct iw_node_attr { >> >> struct sysfs_wi_group { >> struct kobject wi_kobj; >> + struct mutex kobj_lock; >> struct iw_node_attr *nattrs[]; >> }; >> >> @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, >> >> static void sysfs_wi_node_delete(int nid) >> { >> - if (!wi_group->nattrs[nid]) >> + struct iw_node_attr *attr; >> + >> + if (nid < 0 || nid >= nr_node_ids) >> + return; >> + >> + mutex_lock(&wi_group->kobj_lock); >> + attr = wi_group->nattrs[nid]; >> + if (!attr) { >> + mutex_unlock(&wi_group->kobj_lock); >> return; >> + } >> + >> + wi_group->nattrs[nid] = NULL; >> + mutex_unlock(&wi_group->kobj_lock); >> >> - sysfs_remove_file(&wi_group->wi_kobj, >> - &wi_group->nattrs[nid]->kobj_attr.attr); >> - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); >> - kfree(wi_group->nattrs[nid]); >> + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); >> + kfree(attr->kobj_attr.attr.name); >> + kfree(attr); > Here you go through a careful dance to not touch wi_group->nattrs[nid] > except under the lock, but later you are happy to do so in the > error handling paths. Maybe better to do similar to here and > set it to NULL under the lock but do the freeing on a copy taken > under that lock. > . >> } >> >> static void sysfs_wi_release(struct kobject *wi_kobj) >> @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { >> >> static int sysfs_wi_node_add(int nid) >> { >> - struct iw_node_attr *node_attr; >> + int ret = 0; > > Trivial but isn't ret always set when it is used? So no need to initialize > here. If we don't initialize it, then this kind of trivial fixup might be needed later so I think there is no reason not to initialize it. https://lore.kernel.org/mm-commits/20240705010631.46743C4AF07@smtp.kernel.org > >> char *name; >> + struct iw_node_attr *new_attr = NULL; > > This is also always set before use so I'm not seeing a > reason to initialize it to NULL. Ditto. > > >> >> - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); >> - if (!node_attr) >> + if (nid < 0 || nid >= nr_node_ids) { >> + pr_err("Invalid node id: %d\n", nid); >> + return -EINVAL; >> + } >> + >> + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); > > I'd prefer sizeof(*new_attr) because I'm lazy and don't like checking > types for allocation sizes :) Local style seems to be a bit > of a mix though. Agreed. > >> + if (!new_attr) >> return -ENOMEM; >> >> name = kasprintf(GFP_KERNEL, "node%d", nid); >> if (!name) { >> - kfree(node_attr); >> + kfree(new_attr); >> return -ENOMEM; >> } >> >> - sysfs_attr_init(&node_attr->kobj_attr.attr); >> - node_attr->kobj_attr.attr.name = name; >> - node_attr->kobj_attr.attr.mode = 0644; >> - node_attr->kobj_attr.show = node_show; >> - node_attr->kobj_attr.store = node_store; >> - node_attr->nid = nid; >> + mutex_lock(&wi_group->kobj_lock); >> + if (wi_group->nattrs[nid]) { >> + mutex_unlock(&wi_group->kobj_lock); >> + pr_info("Node [%d] already exists\n", nid); >> + kfree(new_attr); >> + kfree(name); >> + return 0; >> + } >> + wi_group->nattrs[nid] = new_attr; This set can be done after all the "wi_group->nattrs[nid]" related set is done. >> >> - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { >> - kfree(node_attr->kobj_attr.attr.name); >> - kfree(node_attr); >> - pr_err("failed to add attribute to weighted_interleave\n"); >> - return -ENOMEM; >> + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); > > I'd have been tempted to use the new_attr pointer but perhaps > this brings some documentation like advantages. +1 > >> + wi_group->nattrs[nid]->kobj_attr.attr.name = name; >> + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; >> + wi_group->nattrs[nid]->kobj_attr.show = node_show; >> + wi_group->nattrs[nid]->kobj_attr.store = node_store; >> + wi_group->nattrs[nid]->nid = nid; As Jonathan mentioned, all the "wi_group->nattrs[nid]" here is better to be "new_attr" for simplicity. Thanks, Honggyu >> + >> + ret = sysfs_create_file(&wi_group->wi_kobj, >> + &wi_group->nattrs[nid]->kobj_attr.attr); >> + if (ret) { >> + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > > See comment above on the rather different handling here to in > sysfs_wi_node_delete() where you set it to NULL first, release the lock and tidy up. > new_attrand name are still set so you could even combine the handling with the > if (wi_group->nattrs[nid]) above via appropriate gotos. > >> + kfree(wi_group->nattrs[nid]); >> + wi_group->nattrs[nid] = NULL; >> + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); >> } >> + mutex_unlock(&wi_group->kobj_lock); >> >> - wi_group->nattrs[nid] = node_attr; >> - return 0; >> + return ret; >> +} > >
On 4/16/2025 1:04 PM, Honggyu Kim wrote: > Hi Jonathan, > > Thanks for reviewing our patches. > > I have a few comments and the rest will be addressed by Rakie. > > On 4/16/2025 1:00 AM, Jonathan Cameron wrote: >> On Tue, 8 Apr 2025 16:32:42 +0900 >> Rakie Kim <rakie.kim@sk.com> wrote: [...snip...] >>> @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { >>> static int sysfs_wi_node_add(int nid) >>> { >>> - struct iw_node_attr *node_attr; >>> + int ret = 0; >> >> Trivial but isn't ret always set when it is used? So no need to initialize >> here. > > If we don't initialize it, then this kind of trivial fixup might be needed later > so I think there is no reason not to initialize it. > https://lore.kernel.org/mm-commits/20240705010631.46743C4AF07@smtp.kernel.org Ah. This is a different case. Please ignore this. > >> >>> char *name; >>> + struct iw_node_attr *new_attr = NULL; >> >> This is also always set before use so I'm not seeing a >> reason to initialize it to NULL. > > Ditto. Please ignore this too. Thanks, Honggyu
On Wed, 16 Apr 2025 13:04:32 +0900 Honggyu Kim <honggyu.kim@sk.com> wrote: Hi Jonathan and Honggyu, Thank you for reviewing this patch and for offering valuable ideas to address the issues. I have accepted all of your suggestions and am currently preparing a new patch series, version v8. > Hi Jonathan, > > Thanks for reviewing our patches. > > I have a few comments and the rest will be addressed by Rakie. > > On 4/16/2025 1:00 AM, Jonathan Cameron wrote: > > On Tue, 8 Apr 2025 16:32:42 +0900 > > Rakie Kim <rakie.kim@sk.com> wrote: > > > >> @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, > >> > >> static void sysfs_wi_node_delete(int nid) > >> { > >> - if (!wi_group->nattrs[nid]) > >> + struct iw_node_attr *attr; > >> + > >> + if (nid < 0 || nid >= nr_node_ids) > >> + return; > >> + > >> + mutex_lock(&wi_group->kobj_lock); > >> + attr = wi_group->nattrs[nid]; > >> + if (!attr) { > >> + mutex_unlock(&wi_group->kobj_lock); > >> return; > >> + } > >> + > >> + wi_group->nattrs[nid] = NULL; > >> + mutex_unlock(&wi_group->kobj_lock); > >> > >> - sysfs_remove_file(&wi_group->wi_kobj, > >> - &wi_group->nattrs[nid]->kobj_attr.attr); > >> - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > >> - kfree(wi_group->nattrs[nid]); > >> + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); > >> + kfree(attr->kobj_attr.attr.name); > >> + kfree(attr); > > Here you go through a careful dance to not touch wi_group->nattrs[nid] > > except under the lock, but later you are happy to do so in the > > error handling paths. Maybe better to do similar to here and > > set it to NULL under the lock but do the freeing on a copy taken > > under that lock. I have updated the error handling path in sysfs_wi_node_add() as you suggested. > > . > >> } > >> > >> static void sysfs_wi_release(struct kobject *wi_kobj) > >> @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { > >> > >> static int sysfs_wi_node_add(int nid) > >> { > >> - struct iw_node_attr *node_attr; > >> + int ret = 0; > > > > Trivial but isn't ret always set when it is used? So no need to initialize > > here. In the updated code for v8, I retained the initialization of `ret = 0` because it is required for proper cleanup handling in the current version. > > If we don't initialize it, then this kind of trivial fixup might be needed later > so I think there is no reason not to initialize it. > https://lore.kernel.org/mm-commits/20240705010631.46743C4AF07@smtp.kernel.org > > > > >> char *name; > >> + struct iw_node_attr *new_attr = NULL; > > > > This is also always set before use so I'm not seeing a > > reason to initialize it to NULL. > > Ditto. I also removed the unnecessary `= NULL` initializer for `new_attr`, as it is always assigned before use. > > > > > > >> > >> - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); > >> - if (!node_attr) > >> + if (nid < 0 || nid >= nr_node_ids) { > >> + pr_err("Invalid node id: %d\n", nid); > >> + return -EINVAL; > >> + } > >> + > >> + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); > > > > I'd prefer sizeof(*new_attr) because I'm lazy and don't like checking > > types for allocation sizes :) Local style seems to be a bit > > of a mix though. > > Agreed. As you recommended, I changed the allocation from `sizeof(struct iw_node_attr)` to `sizeof(*new_attr)` for better readability and consistency. > > > > >> + if (!new_attr) > >> return -ENOMEM; > >> > >> name = kasprintf(GFP_KERNEL, "node%d", nid); > >> if (!name) { > >> - kfree(node_attr); > >> + kfree(new_attr); > >> return -ENOMEM; > >> } > >> > >> - sysfs_attr_init(&node_attr->kobj_attr.attr); > >> - node_attr->kobj_attr.attr.name = name; > >> - node_attr->kobj_attr.attr.mode = 0644; > >> - node_attr->kobj_attr.show = node_show; > >> - node_attr->kobj_attr.store = node_store; > >> - node_attr->nid = nid; > >> + mutex_lock(&wi_group->kobj_lock); > >> + if (wi_group->nattrs[nid]) { > >> + mutex_unlock(&wi_group->kobj_lock); > >> + pr_info("Node [%d] already exists\n", nid); > >> + kfree(new_attr); > >> + kfree(name); > >> + return 0; > >> + } > >> + wi_group->nattrs[nid] = new_attr; > > This set can be done after all the "wi_group->nattrs[nid]" related set is done. > > >> > >> - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { > >> - kfree(node_attr->kobj_attr.attr.name); > >> - kfree(node_attr); > >> - pr_err("failed to add attribute to weighted_interleave\n"); > >> - return -ENOMEM; > >> + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); > > > > I'd have been tempted to use the new_attr pointer but perhaps > > this brings some documentation like advantages. > > +1 Additionally, I replaced all usage of `wi_group->nattrs[nid]` in sysfs_wi_node_add() with the `new_attr` pointer to simplify the logic and improve clarity. This also aligns with your suggestion to treat `new_attr` consistently throughout the function. > > > > >> + wi_group->nattrs[nid]->kobj_attr.attr.name = name; > >> + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; > >> + wi_group->nattrs[nid]->kobj_attr.show = node_show; > >> + wi_group->nattrs[nid]->kobj_attr.store = node_store; > >> + wi_group->nattrs[nid]->nid = nid; > > As Jonathan mentioned, all the "wi_group->nattrs[nid]" here is better to be > "new_attr" for simplicity. > > Thanks, > Honggyu > > >> + > >> + ret = sysfs_create_file(&wi_group->wi_kobj, > >> + &wi_group->nattrs[nid]->kobj_attr.attr); > >> + if (ret) { > >> + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > > > > See comment above on the rather different handling here to in > > sysfs_wi_node_delete() where you set it to NULL first, release the lock and tidy up. > > new_attrand name are still set so you could even combine the handling with the > > if (wi_group->nattrs[nid]) above via appropriate gotos. I agree with your observation regarding the difference in error handling between sysfs_wi_node_add() and sysfs_wi_node_delete(), so I refactored sysfs_wi_node_add() to follow the same structure. I will apply all of these updates in the new v8 series. Thank you again for your thoughtful and detailed feedback. Below is the revised code after incorporating your feedback. Rakie @@ -3532,14 +3532,14 @@ static int sysfs_wi_node_add(int nid) { int ret = 0; char *name; - struct iw_node_attr *new_attr = NULL; + struct iw_node_attr *new_attr; if (nid < 0 || nid >= nr_node_ids) { - pr_err("Invalid node id: %d\n", nid); + pr_err("invalid node id: %d\n", nid); return -EINVAL; } - new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); + new_attr = kzalloc(sizeof(*new_attr), GFP_KERNEL); if (!new_attr) return -ENOMEM; @@ -3549,33 +3549,32 @@ static int sysfs_wi_node_add(int nid) return -ENOMEM; } + sysfs_attr_init(&new_attr->kobj_attr.attr); + new_attr->kobj_attr.attr.name = name; + new_attr->kobj_attr.attr.mode = 0644; + new_attr->kobj_attr.show = node_show; + new_attr->kobj_attr.store = node_store; + new_attr->nid = nid; + mutex_lock(&wi_group->kobj_lock); if (wi_group->nattrs[nid]) { mutex_unlock(&wi_group->kobj_lock); - pr_info("Node [%d] already exists\n", nid); - kfree(new_attr); - kfree(name); - return 0; + pr_info("node%d already exists\n", nid); + goto out; } - wi_group->nattrs[nid] = new_attr; - - sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); - pr_info("Node [%d] already exists\n", nid); - kfree(new_attr); - kfree(name); - return 0; + pr_info("node%d already exists\n", nid); + goto out; } - wi_group->nattrs[nid] = new_attr; - - sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); - wi_group->nattrs[nid]->kobj_attr.attr.name = name; - wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; - wi_group->nattrs[nid]->kobj_attr.show = node_show; - wi_group->nattrs[nid]->kobj_attr.store = node_store; - wi_group->nattrs[nid]->nid = nid; - ret = sysfs_create_file(&wi_group->wi_kobj, - &wi_group->nattrs[nid]->kobj_attr.attr); + ret = sysfs_create_file(&wi_group->wi_kobj, &new_attr->kobj_attr.attr); if (ret) { - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); - kfree(wi_group->nattrs[nid]); - wi_group->nattrs[nid] = NULL; - pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); + mutex_unlock(&wi_group->kobj_lock); + goto out; } + wi_group->nattrs[nid] = new_attr; mutex_unlock(&wi_group->kobj_lock); + return 0; +out: + kfree(new_attr->kobj_attr.attr.name); + kfree(new_attr); return ret; } > > > >> + kfree(wi_group->nattrs[nid]); > >> + wi_group->nattrs[nid] = NULL; > >> + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); > >> } > >> + mutex_unlock(&wi_group->kobj_lock); > >> > >> - wi_group->nattrs[nid] = node_attr; > >> - return 0; > >> + return ret; > >> +} > > > >
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 988575f29c53..9aa884107f4c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -113,6 +113,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> +#include <linux/memory.h> #include "internal.h" @@ -3421,6 +3422,7 @@ struct iw_node_attr { struct sysfs_wi_group { struct kobject wi_kobj; + struct mutex kobj_lock; struct iw_node_attr *nattrs[]; }; @@ -3470,13 +3472,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, static void sysfs_wi_node_delete(int nid) { - if (!wi_group->nattrs[nid]) + struct iw_node_attr *attr; + + if (nid < 0 || nid >= nr_node_ids) + return; + + mutex_lock(&wi_group->kobj_lock); + attr = wi_group->nattrs[nid]; + if (!attr) { + mutex_unlock(&wi_group->kobj_lock); return; + } + + wi_group->nattrs[nid] = NULL; + mutex_unlock(&wi_group->kobj_lock); - sysfs_remove_file(&wi_group->wi_kobj, - &wi_group->nattrs[nid]->kobj_attr.attr); - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); - kfree(wi_group->nattrs[nid]); + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); + kfree(attr->kobj_attr.attr.name); + kfree(attr); } static void sysfs_wi_release(struct kobject *wi_kobj) @@ -3495,35 +3508,77 @@ static const struct kobj_type wi_ktype = { static int sysfs_wi_node_add(int nid) { - struct iw_node_attr *node_attr; + int ret = 0; char *name; + struct iw_node_attr *new_attr = NULL; - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); - if (!node_attr) + if (nid < 0 || nid >= nr_node_ids) { + pr_err("Invalid node id: %d\n", nid); + return -EINVAL; + } + + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); + if (!new_attr) return -ENOMEM; name = kasprintf(GFP_KERNEL, "node%d", nid); if (!name) { - kfree(node_attr); + kfree(new_attr); return -ENOMEM; } - sysfs_attr_init(&node_attr->kobj_attr.attr); - node_attr->kobj_attr.attr.name = name; - node_attr->kobj_attr.attr.mode = 0644; - node_attr->kobj_attr.show = node_show; - node_attr->kobj_attr.store = node_store; - node_attr->nid = nid; + mutex_lock(&wi_group->kobj_lock); + if (wi_group->nattrs[nid]) { + mutex_unlock(&wi_group->kobj_lock); + pr_info("Node [%d] already exists\n", nid); + kfree(new_attr); + kfree(name); + return 0; + } + wi_group->nattrs[nid] = new_attr; - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { - kfree(node_attr->kobj_attr.attr.name); - kfree(node_attr); - pr_err("failed to add attribute to weighted_interleave\n"); - return -ENOMEM; + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); + wi_group->nattrs[nid]->kobj_attr.attr.name = name; + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; + wi_group->nattrs[nid]->kobj_attr.show = node_show; + wi_group->nattrs[nid]->kobj_attr.store = node_store; + wi_group->nattrs[nid]->nid = nid; + + ret = sysfs_create_file(&wi_group->wi_kobj, + &wi_group->nattrs[nid]->kobj_attr.attr); + if (ret) { + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); + kfree(wi_group->nattrs[nid]); + wi_group->nattrs[nid] = NULL; + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); } + mutex_unlock(&wi_group->kobj_lock); - wi_group->nattrs[nid] = node_attr; - return 0; + return ret; +} + +static int wi_node_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err; + struct memory_notify *arg = data; + int nid = arg->status_change_nid; + + if (nid < 0) + return NOTIFY_OK; + + switch(action) { + case MEM_ONLINE: + err = sysfs_wi_node_add(nid); + if (err) + pr_err("failed to add sysfs [node%d]\n", nid); + break; + case MEM_OFFLINE: + sysfs_wi_node_delete(nid); + break; + } + + return NOTIFY_OK; } static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) @@ -3534,13 +3589,17 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) GFP_KERNEL); if (!wi_group) return -ENOMEM; + mutex_init(&wi_group->kobj_lock); err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, "weighted_interleave"); if (err) goto err_put_kobj; - for_each_node_state(nid, N_POSSIBLE) { + for_each_online_node(nid) { + if (!node_state(nid, N_MEMORY)) + continue; + err = sysfs_wi_node_add(nid); if (err) { pr_err("failed to add sysfs [node%d]\n", nid); @@ -3548,6 +3607,7 @@ static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) } } + hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); return 0; err_del_kobj: