Message ID | 20211124191005.20783-5-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm, device-dax: Introduce compound pages in devmap | expand |
On Wed, Nov 24, 2021 at 07:09:59PM +0000, Joao Martins wrote: > Add a new @vmemmap_shift property for struct dev_pagemap which specifies that a > devmap is composed of a set of compound pages of order @vmemmap_shift, instead of > base pages. When a compound page devmap is requested, all but the first > page are initialised as tail pages instead of order-0 pages. Please wrap commit log lines after 73 characters. > #define for_each_device_pfn(pfn, map, i) \ > - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) > + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(map, pfn)) It would be nice to fix up this long line while you're at it. > static void dev_pagemap_kill(struct dev_pagemap *pgmap) > { > @@ -315,8 +315,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, > memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], > PHYS_PFN(range->start), > PHYS_PFN(range_len(range)), pgmap); > - percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) > - - pfn_first(pgmap, range_id)); > + percpu_ref_get_many(pgmap->ref, (pfn_end(pgmap, range_id) > + - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift); In the Linux coding style the - goes ointo the first line. But it would be really nice to clean this up with a helper ala pfn_len anyway: percpu_ref_get_many(pgmap->ref, pfn_len(pgmap, range_id) >> pgmap->vmemmap_shift);
On 11/25/21 06:11, Christoph Hellwig wrote: > On Wed, Nov 24, 2021 at 07:09:59PM +0000, Joao Martins wrote: >> Add a new @vmemmap_shift property for struct dev_pagemap which specifies that a >> devmap is composed of a set of compound pages of order @vmemmap_shift, instead of >> base pages. When a compound page devmap is requested, all but the first >> page are initialised as tail pages instead of order-0 pages. > > Please wrap commit log lines after 73 characters. > Fixed. >> #define for_each_device_pfn(pfn, map, i) \ >> - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) >> + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(map, pfn)) > > It would be nice to fix up this long line while you're at it. > OK -- I am gonna assume that it's enough to move pfn = pfn_next(...) clause into the next line. >> static void dev_pagemap_kill(struct dev_pagemap *pgmap) >> { >> @@ -315,8 +315,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, >> memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], >> PHYS_PFN(range->start), >> PHYS_PFN(range_len(range)), pgmap); >> - percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) >> - - pfn_first(pgmap, range_id)); >> + percpu_ref_get_many(pgmap->ref, (pfn_end(pgmap, range_id) >> + - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift); > > In the Linux coding style the - goes ointo the first line. > > But it would be really nice to clean this up with a helper ala pfn_len > anyway: > > percpu_ref_get_many(pgmap->ref, > pfn_len(pgmap, range_id) >> pgmap->vmemmap_shift); > OK, I moved the computation to an helper. I've staged your comments (see below diff for this patch), plus wrapping the commit message to 73 columns (I've also double-checked and this one seems to be the only one making that mistake). I'll wait a couple days to follow up v7 should you have further comments in other patches. diff --git a/mm/memremap.c b/mm/memremap.c index 3afa246eb1ab..d591f3aa8884 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -109,6 +109,12 @@ static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn) return pfn + pgmap_vmemmap_nr(pgmap); } +static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id) +{ + return (pfn_end(pgmap, range_id) - + pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift; +} + /* * This returns true if the page is reserved by ZONE_DEVICE driver. */ @@ -130,7 +136,8 @@ bool pfn_zone_device_reserved(unsigned long pfn) } #define for_each_device_pfn(pfn, map, i) \ - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(map, pfn)) + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \ + pfn = pfn_next(map, pfn)) static void dev_pagemap_kill(struct dev_pagemap *pgmap) { @@ -315,8 +322,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], PHYS_PFN(range->start), PHYS_PFN(range_len(range)), pgmap); - percpu_ref_get_many(pgmap->ref, (pfn_end(pgmap, range_id) - - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift); + percpu_ref_get_many(pgmap->ref, pfn_len(pgmap, range_id)); return 0; err_add_memory:
diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 119f130ef8f1..aaf85bda093b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -99,6 +99,11 @@ struct dev_pagemap_ops { * @done: completion for @internal_ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior + * @vmemmap_shift: structural definition of how the vmemmap page metadata + * is populated, specifically the metadata page order. + * A zero value (default) uses base pages as the vmemmap metadata + * representation. A bigger value will set up compound struct pages + * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no @@ -114,6 +119,7 @@ struct dev_pagemap { struct completion done; enum memory_type type; unsigned int flags; + unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; @@ -130,6 +136,11 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) return NULL; } +static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) +{ + return 1 << pgmap->vmemmap_shift; +} + #ifdef CONFIG_ZONE_DEVICE bool pfn_zone_device_reserved(unsigned long pfn); void *memremap_pages(struct dev_pagemap *pgmap, int nid); diff --git a/mm/memremap.c b/mm/memremap.c index 84de22c14567..3afa246eb1ab 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -102,11 +102,11 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id) return (range->start + range_len(range)) >> PAGE_SHIFT; } -static unsigned long pfn_next(unsigned long pfn) +static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn) { - if (pfn % 1024 == 0) + if (pfn % (1024 << pgmap->vmemmap_shift)) cond_resched(); - return pfn + 1; + return pfn + pgmap_vmemmap_nr(pgmap); } /* @@ -130,7 +130,7 @@ bool pfn_zone_device_reserved(unsigned long pfn) } #define for_each_device_pfn(pfn, map, i) \ - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(map, pfn)) static void dev_pagemap_kill(struct dev_pagemap *pgmap) { @@ -315,8 +315,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], PHYS_PFN(range->start), PHYS_PFN(range_len(range)), pgmap); - percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) - - pfn_first(pgmap, range_id)); + percpu_ref_get_many(pgmap->ref, (pfn_end(pgmap, range_id) + - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift); return 0; err_add_memory: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f7f33c83222f..ea537839816e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6605,6 +6605,35 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, } } +static void __ref memmap_init_compound(struct page *head, + unsigned long head_pfn, + unsigned long zone_idx, int nid, + struct dev_pagemap *pgmap, + unsigned long nr_pages) +{ + unsigned long pfn, end_pfn = head_pfn + nr_pages; + unsigned int order = pgmap->vmemmap_shift; + + __SetPageHead(head); + for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) { + struct page *page = pfn_to_page(pfn); + + __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); + prep_compound_tail(head, pfn - head_pfn); + set_page_count(page, 0); + + /* + * The first tail page stores compound_mapcount_ptr() and + * compound_order() and the second tail page stores + * compound_pincount_ptr(). Call prep_compound_head() after + * the first and second tail pages have been initialized to + * not have the data overwritten. + */ + if (pfn == head_pfn + 2) + prep_compound_head(head, order); + } +} + void __ref memmap_init_zone_device(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, @@ -6613,6 +6642,7 @@ void __ref memmap_init_zone_device(struct zone *zone, unsigned long pfn, end_pfn = start_pfn + nr_pages; struct pglist_data *pgdat = zone->zone_pgdat; struct vmem_altmap *altmap = pgmap_altmap(pgmap); + unsigned int pfns_per_compound = pgmap_vmemmap_nr(pgmap); unsigned long zone_idx = zone_idx(zone); unsigned long start = jiffies; int nid = pgdat->node_id; @@ -6630,10 +6660,16 @@ void __ref memmap_init_zone_device(struct zone *zone, nr_pages = end_pfn - start_pfn; } - for (pfn = start_pfn; pfn < end_pfn; pfn++) { + for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) { struct page *page = pfn_to_page(pfn); __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); + + if (pfns_per_compound == 1) + continue; + + memmap_init_compound(page, pfn, zone_idx, nid, pgmap, + pfns_per_compound); } pr_info("%s initialised %lu pages in %ums\n", __func__,