Message ID | 20181015153034.32203-3-osalvador@techadventures.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Do not touch pages/zones during hot-remove path | expand |
> index 42d79bcc8aab..d3e52ae71bd9 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -996,6 +996,7 @@ static void hmm_devmem_release(struct device *dev, void *data) > struct zone *zone; > struct page *page; > int nid; > + bool mapping; > > if (percpu_ref_tryget_live(&devmem->ref)) { > dev_WARN(dev, "%s: page mapping is still live!\n", __func__); > @@ -1010,12 +1011,15 @@ static void hmm_devmem_release(struct device *dev, void *data) > zone = page_zone(page); > nid = zone->zone_pgdat->node_id; > > - mem_hotplug_begin(); > if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) > - __remove_pages(zone, start_pfn, npages, NULL); > + mapping = false; > else > - arch_remove_memory(nid, start_pfn << PAGE_SHIFT, > - npages << PAGE_SHIFT, NULL); > + mapping = true; > + > + mem_hotplug_begin(); > + del_device_memory(nid, start_pfn << PAGE_SHIFT, npages << PAGE_SHIFT, > + NULL, > + mapping); > mem_hotplug_done(); > > hmm_devmem_radix_release(resource); > @@ -1026,6 +1030,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) > resource_size_t key, align_start, align_size, align_end; > struct device *device = devmem->device; > int ret, nid, is_ram; > + bool mapping; > > align_start = devmem->resource->start & ~(PA_SECTION_SIZE - 1); > align_size = ALIGN(devmem->resource->start + > @@ -1084,7 +1089,6 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) > if (nid < 0) > nid = numa_mem_id(); > > - mem_hotplug_begin(); > /* > * For device private memory we call add_pages() as we only need to > * allocate and initialize struct page for the device memory. More- > @@ -1096,20 +1100,17 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) > * want the linear mapping and thus use arch_add_memory(). > */ Some parts of this comment should be moved into add_device_memory now. (e.g. we call add_pages() ...) > if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) > - ret = arch_add_memory(nid, align_start, align_size, NULL, > - false); > + mapping = true; > else > - ret = add_pages(nid, align_start >> PAGE_SHIFT, > - align_size >> PAGE_SHIFT, NULL, false); > - if (ret) { > - mem_hotplug_done(); > - goto error_add_memory; > - } > - move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], > - align_start >> PAGE_SHIFT, > - align_size >> PAGE_SHIFT, NULL); > + mapping = false; > + > + mem_hotplug_begin(); > + ret = add_device_memory(nid, align_start, align_size, NULL, mapping); > mem_hotplug_done(); > > + if (ret) > + goto error_add_memory; > + > /* > * Initialization of the pages has been deferred until now in order > * to allow us to do the work while not holding the hotplug lock. > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c > index 33d448314b3f..5874aceb81ac 100644 > --- a/mm/memory_hotplug.c > +++ b/mm/memory_hotplug.c > @@ -1889,4 +1889,45 @@ void remove_memory(int nid, u64 start, u64 size) > unlock_device_hotplug(); > } > EXPORT_SYMBOL_GPL(remove_memory); > + > +#ifdef CONFIG_ZONE_DEVICE > +int del_device_memory(int nid, unsigned long start, unsigned long size, > + struct vmem_altmap *altmap, bool mapping) > +{ > + int ret; nit: personally I prefer short parameters last in the list. > + unsigned long start_pfn = PHYS_PFN(start); > + unsigned long nr_pages = size >> PAGE_SHIFT; > + struct zone *zone = page_zone(pfn_to_page(pfn)); > + > + if (mapping) > + ret = arch_remove_memory(nid, start, size, altmap); > + else > + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); > + > + return ret; > +} > +#endif > #endif /* CONFIG_MEMORY_HOTREMOVE */ > + > +#ifdef CONFIG_ZONE_DEVICE > +int add_device_memory(int nid, unsigned long start, unsigned long size, > + struct vmem_altmap *altmap, bool mapping) > +{ > + int ret; dito > + unsigned long start_pfn = PHYS_PFN(start); > + unsigned long nr_pages = size >> PAGE_SHIFT; > + > + if (mapping) > + ret = arch_add_memory(nid, start, size, altmap, false); > + else > + ret = add_pages(nid, start_pfn, nr_pages, altmap, false); > + > + if (!ret) { > + struct zone *zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; > + > + move_pfn_range_to_zone(zone, start_pfn, nr_pages, altmap); > + } > + > + return ret; > +} > +#endif > Can you document for both functions that they should be called with the memory hotplug lock in write? Apart from that looks good to me.
> > /* > > * For device private memory we call add_pages() as we only need to > > * allocate and initialize struct page for the device memory. More- > > @@ -1096,20 +1100,17 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) > > * want the linear mapping and thus use arch_add_memory(). > > */ > > Some parts of this comment should be moved into add_device_memory now. > (e.g. we call add_pages() ...) I agree. > > +#ifdef CONFIG_ZONE_DEVICE > > +int del_device_memory(int nid, unsigned long start, unsigned long size, > > + struct vmem_altmap *altmap, bool mapping) > > +{ > > + int ret; > > nit: personally I prefer short parameters last in the list. I do not have a strong opinion here. If people think that long parameters should be placed at the end because it improves readability, I am ok with moving them there. > Can you document for both functions that they should be called with the > memory hotplug lock in write? Sure, I will do that in the next version, once I get some more feedback. > Apart from that looks good to me. Thanks for reviewing it David ;-)! May I assume your Reviewed-by here (if the above comments are addressed)? Thanks
On 17/10/2018 11:33, Oscar Salvador wrote: >>> /* >>> * For device private memory we call add_pages() as we only need to >>> * allocate and initialize struct page for the device memory. More- >>> @@ -1096,20 +1100,17 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) >>> * want the linear mapping and thus use arch_add_memory(). >>> */ >> >> Some parts of this comment should be moved into add_device_memory now. >> (e.g. we call add_pages() ...) > > I agree. > >>> +#ifdef CONFIG_ZONE_DEVICE >>> +int del_device_memory(int nid, unsigned long start, unsigned long size, >>> + struct vmem_altmap *altmap, bool mapping) >>> +{ >>> + int ret; >> >> nit: personally I prefer short parameters last in the list. > > I do not have a strong opinion here. > If people think that long parameters should be placed at the end because > it improves readability, I am ok with moving them there. > >> Can you document for both functions that they should be called with the >> memory hotplug lock in write? > > Sure, I will do that in the next version, once I get some more feedback. > >> Apart from that looks good to me. > > Thanks for reviewing it David ;-)! > May I assume your Reviewed-by here (if the above comments are addressed)? Here you go ;) Reviewed-by: David Hildenbrand <david@redhat.com> I'm planning to look into the other patches as well, but I'll be busy with traveling and KVM forum the next 1.5 weeks. Cheers!
On Wed, Oct 17, 2018 at 11:45:50AM +0200, David Hildenbrand wrote: > Here you go ;) > > Reviewed-by: David Hildenbrand <david@redhat.com> thanks! > I'm planning to look into the other patches as well, but I'll be busy > with traveling and KVM forum the next 1.5 weeks. No need to hurry, this can wait.
On 18-10-15 17:30:31, Oscar Salvador wrote: > From: Oscar Salvador <osalvador@suse.de> > > HMM/devm have a particular handling of memory-hotplug. > They do not go through the common path, and so, they do not > call either offline_pages() or online_pages(). > > The operations they perform are the following ones: > > 1) Create the linear mapping in case the memory is not private > 2) Initialize the pages and add the sections > 3) Move the pages to ZONE_DEVICE > > Due to this particular handling of hot-add/remove memory from HMM/devm, > I think it would be nice to provide a helper function in order to > make this cleaner, and not populate other regions with code > that should belong to memory-hotplug. > > The helpers are named: > > del_device_memory > add_device_memory > > The idea is that add_device_memory will be in charge of: > > a) call either arch_add_memory() or add_pages(), depending on whether > we want a linear mapping > b) online the memory sections that correspond to the pfn range > c) call move_pfn_range_to_zone() being zone ZONE_DEVICE to > expand zone/pgdat spanned pages and initialize its pages > > del_device_memory, on the other hand, will be in charge of: > > a) offline the memory sections that correspond to the pfn range > b) call shrink_zone_pgdat_pages(), which shrinks node/zone spanned pages. > c) call either arch_remove_memory() or __remove_pages(), depending on > whether we need to tear down the linear mapping or not > > The reason behind step b) from add_device_memory() and step a) > from del_device_memory is that now find_smallest/biggest_section_pfn > will have to check for online sections, and not for valid sections as > they used to do, because we call offline_mem_sections() in > offline_pages(). > > In order to split up better the patches and ease the review, > this patch will only make a) case work for add_device_memory(), > and case c) for del_device_memory. > > The other cases will be added in the next patch. > > These two functions have to be called from devm/HMM code: > > dd_device_memory: > - devm_memremap_pages() > - hmm_devmem_pages_create() > > del_device_memory: > - hmm_devmem_release > - devm_memremap_pages_release > > One thing I do not know is whether we can move kasan calls out of the > hotplug lock or not. > If we can, we could move the hotplug lock within add/del_device_memory(). > > Signed-off-by: Oscar Salvador <osalvador@suse.de> Looks good to me, thank you. Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com> Pasha
On Mon, Oct 15, 2018 at 8:31 AM Oscar Salvador <osalvador@techadventures.net> wrote: > > From: Oscar Salvador <osalvador@suse.de> > > HMM/devm have a particular handling of memory-hotplug. > They do not go through the common path, and so, they do not > call either offline_pages() or online_pages(). > > The operations they perform are the following ones: > > 1) Create the linear mapping in case the memory is not private > 2) Initialize the pages and add the sections > 3) Move the pages to ZONE_DEVICE > > Due to this particular handling of hot-add/remove memory from HMM/devm, > I think it would be nice to provide a helper function in order to > make this cleaner, and not populate other regions with code > that should belong to memory-hotplug. > > The helpers are named: > > del_device_memory > add_device_memory > > The idea is that add_device_memory will be in charge of: > > a) call either arch_add_memory() or add_pages(), depending on whether > we want a linear mapping > b) online the memory sections that correspond to the pfn range > c) call move_pfn_range_to_zone() being zone ZONE_DEVICE to > expand zone/pgdat spanned pages and initialize its pages > > del_device_memory, on the other hand, will be in charge of: > > a) offline the memory sections that correspond to the pfn range > b) call shrink_zone_pgdat_pages(), which shrinks node/zone spanned pages. > c) call either arch_remove_memory() or __remove_pages(), depending on > whether we need to tear down the linear mapping or not > > The reason behind step b) from add_device_memory() and step a) > from del_device_memory is that now find_smallest/biggest_section_pfn > will have to check for online sections, and not for valid sections as > they used to do, because we call offline_mem_sections() in > offline_pages(). > > In order to split up better the patches and ease the review, > this patch will only make a) case work for add_device_memory(), > and case c) for del_device_memory. > > The other cases will be added in the next patch. > > These two functions have to be called from devm/HMM code: > > dd_device_memory: > - devm_memremap_pages() > - hmm_devmem_pages_create() > > del_device_memory: > - hmm_devmem_release > - devm_memremap_pages_release > > One thing I do not know is whether we can move kasan calls out of the > hotplug lock or not. > If we can, we could move the hotplug lock within add/del_device_memory(). > > Signed-off-by: Oscar Salvador <osalvador@suse.de> > --- > include/linux/memory_hotplug.h | 11 +++++++++++ > kernel/memremap.c | 11 ++++------- > mm/hmm.c | 33 +++++++++++++++++---------------- > mm/memory_hotplug.c | 41 +++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 73 insertions(+), 23 deletions(-) This collides with the refactoring of hmm, to be done in terms of devm_memremap_pages(). I'd rather not introduce another common function *beneath* hmm and devm_memremap_pages() and rather make devm_memremap_pages() the common function. I plan to resubmit that cleanup after Plumbers. So, unless I'm misunderstanding some other benefit a nak from me on this patch as it stands currently.
> > This collides with the refactoring of hmm, to be done in terms of > devm_memremap_pages(). I'd rather not introduce another common > function *beneath* hmm and devm_memremap_pages() and rather make > devm_memremap_pages() the common function. > > I plan to resubmit that cleanup after Plumbers. So, unless I'm > misunderstanding some other benefit a nak from me on this patch as it > stands currently. > Ok, Dan, I will wait for your new refactoring series before continuing reviewing this series. Thank you, Pasha
> This collides with the refactoring of hmm, to be done in terms of > devm_memremap_pages(). I'd rather not introduce another common > function *beneath* hmm and devm_memremap_pages() and rather make > devm_memremap_pages() the common function. Hi Dan, That is true. Previous version of this patchet was based on yours (hmm-refactor), but then I decided to not base it here due to lack of feedback. But if devm_memremap_pages() is going to be the main/common function, I am totally fine to put the memory-hotplug specific there. > I plan to resubmit that cleanup after Plumbers. So, unless I'm > misunderstanding some other benefit a nak from me on this patch as it > stands currently. Great, then I will wait for your new patchset, and then I will base this one on that. Thanks Oscar Salvador
On Mon, 2018-11-12 at 21:28 +0000, Pavel Tatashin wrote: > > > > This collides with the refactoring of hmm, to be done in terms of > > devm_memremap_pages(). I'd rather not introduce another common > > function *beneath* hmm and devm_memremap_pages() and rather make > > devm_memremap_pages() the common function. > > > > I plan to resubmit that cleanup after Plumbers. So, unless I'm > > misunderstanding some other benefit a nak from me on this patch as > > it > > stands currently. > > > > Ok, Dan, I will wait for your new refactoring series before > continuing > reviewing this series. Hi Pavel, thanks for reviewing the other patches. You could still check patch4 and patch5, as they are not strictly related to this one. (Not asking for your Reviewed-by, but I would still like you to check them) I could use your eyes there if you have time ;-) Thanks Oscar Salvador
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 786cdfc9a974..cf014d5edbb2 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -111,8 +111,19 @@ extern int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); + +#ifdef CONFIG_ZONE_DEVICE +extern int del_device_memory(int nid, unsigned long start, unsigned long size, + struct vmem_altmap *altmap, bool private_mem); +#endif + #endif /* CONFIG_MEMORY_HOTREMOVE */ +#ifdef CONFIG_ZONE_DEVICE +extern int add_device_memory(int nid, unsigned long start, unsigned long size, + struct vmem_altmap *altmap, bool private_mem); +#endif + /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap, bool want_memblock); diff --git a/kernel/memremap.c b/kernel/memremap.c index c95df6ed2d4a..b86bba8713b9 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -86,6 +86,8 @@ static void devm_memremap_pages_release(void *data) struct device *dev = pgmap->dev; struct resource *res = &pgmap->res; resource_size_t align_start, align_size; + struct vmem_altmap *altmap = pgmap->altmap_valid ? + &pgmap->altmap : NULL; unsigned long pfn; int nid; @@ -104,8 +106,7 @@ static void devm_memremap_pages_release(void *data) nid = dev_to_node(dev); mem_hotplug_begin(); - arch_remove_memory(nid, align_start, align_size, pgmap->altmap_valid ? - &pgmap->altmap : NULL); + del_device_memory(nid, align_start, align_size, altmap, true); kasan_remove_zero_shadow(__va(align_start), align_size); mem_hotplug_done(); @@ -204,11 +205,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_kasan; } - error = arch_add_memory(nid, align_start, align_size, altmap, false); - if (!error) - move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], - align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, altmap); + error = add_device_memory(nid, align_start, align_size, altmap, true); mem_hotplug_done(); if (error) goto err_add_memory; diff --git a/mm/hmm.c b/mm/hmm.c index 42d79bcc8aab..d3e52ae71bd9 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -996,6 +996,7 @@ static void hmm_devmem_release(struct device *dev, void *data) struct zone *zone; struct page *page; int nid; + bool mapping; if (percpu_ref_tryget_live(&devmem->ref)) { dev_WARN(dev, "%s: page mapping is still live!\n", __func__); @@ -1010,12 +1011,15 @@ static void hmm_devmem_release(struct device *dev, void *data) zone = page_zone(page); nid = zone->zone_pgdat->node_id; - mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages, NULL); + mapping = false; else - arch_remove_memory(nid, start_pfn << PAGE_SHIFT, - npages << PAGE_SHIFT, NULL); + mapping = true; + + mem_hotplug_begin(); + del_device_memory(nid, start_pfn << PAGE_SHIFT, npages << PAGE_SHIFT, + NULL, + mapping); mem_hotplug_done(); hmm_devmem_radix_release(resource); @@ -1026,6 +1030,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) resource_size_t key, align_start, align_size, align_end; struct device *device = devmem->device; int ret, nid, is_ram; + bool mapping; align_start = devmem->resource->start & ~(PA_SECTION_SIZE - 1); align_size = ALIGN(devmem->resource->start + @@ -1084,7 +1089,6 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) if (nid < 0) nid = numa_mem_id(); - mem_hotplug_begin(); /* * For device private memory we call add_pages() as we only need to * allocate and initialize struct page for the device memory. More- @@ -1096,20 +1100,17 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) * want the linear mapping and thus use arch_add_memory(). */ if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) - ret = arch_add_memory(nid, align_start, align_size, NULL, - false); + mapping = true; else - ret = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, NULL, false); - if (ret) { - mem_hotplug_done(); - goto error_add_memory; - } - move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], - align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, NULL); + mapping = false; + + mem_hotplug_begin(); + ret = add_device_memory(nid, align_start, align_size, NULL, mapping); mem_hotplug_done(); + if (ret) + goto error_add_memory; + /* * Initialization of the pages has been deferred until now in order * to allow us to do the work while not holding the hotplug lock. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 33d448314b3f..5874aceb81ac 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1889,4 +1889,45 @@ void remove_memory(int nid, u64 start, u64 size) unlock_device_hotplug(); } EXPORT_SYMBOL_GPL(remove_memory); + +#ifdef CONFIG_ZONE_DEVICE +int del_device_memory(int nid, unsigned long start, unsigned long size, + struct vmem_altmap *altmap, bool mapping) +{ + int ret; + unsigned long start_pfn = PHYS_PFN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + if (mapping) + ret = arch_remove_memory(nid, start, size, altmap); + else + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTREMOVE */ + +#ifdef CONFIG_ZONE_DEVICE +int add_device_memory(int nid, unsigned long start, unsigned long size, + struct vmem_altmap *altmap, bool mapping) +{ + int ret; + unsigned long start_pfn = PHYS_PFN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; + + if (mapping) + ret = arch_add_memory(nid, start, size, altmap, false); + else + ret = add_pages(nid, start_pfn, nr_pages, altmap, false); + + if (!ret) { + struct zone *zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; + + move_pfn_range_to_zone(zone, start_pfn, nr_pages, altmap); + } + + return ret; +} +#endif