Message ID | 20190507040609.21746-3-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Remove 'order' argument from many mm functions | expand |
On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > Save marshalling an extra argument in all the callers at the expense of > using five bits of the GFP flags. We still have three GFP bits remaining > after doing this (and we can release one more by reallocating NORETRY, > RETRY_MAYFAIL and NOFAIL). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > arch/x86/events/intel/ds.c | 4 +-- > arch/x86/kvm/vmx/vmx.c | 4 +-- > include/linux/gfp.h | 51 ++++++++++++++++++++++---------------- > include/linux/migrate.h | 2 +- > mm/filemap.c | 2 +- > mm/gup.c | 4 +-- > mm/hugetlb.c | 5 ++-- > mm/khugepaged.c | 2 +- > mm/mempolicy.c | 30 +++++++++++----------- > mm/migrate.c | 2 +- > mm/page_alloc.c | 4 +-- > mm/shmem.c | 5 ++-- > mm/slub.c | 2 +- > 13 files changed, 63 insertions(+), 54 deletions(-) > > diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c > index 10c99ce1fead..82fee9845b87 100644 > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -315,13 +315,13 @@ static void ds_clear_cea(void *cea, size_t size) > preempt_enable(); > } > > -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) > +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu) > { > unsigned int order = get_order(size); > int node = cpu_to_node(cpu); > struct page *page; > > - page = __alloc_pages_node(node, flags | __GFP_ZERO, order); > + page = __alloc_pages_node(node, gfp | __GFP_ZERO | __GFP_ORDER(order)); Order was derived from size in this function. Is this truely equal to the old function? At a minimum if I am wrong the get_order call above should be removed, no? Ira > return page ? page_address(page) : NULL; > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index ab432a930ae8..323a0f6ffe13 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2380,13 +2380,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, > return 0; > } > > -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) > +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t gfp) > { > int node = cpu_to_node(cpu); > struct page *pages; > struct vmcs *vmcs; > > - pages = __alloc_pages_node(node, flags, vmcs_config.order); > + pages = __alloc_pages_node(node, gfp | __GFP_ORDER(vmcs_config.order)); > if (!pages) > return NULL; > vmcs = page_address(pages); > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index fb07b503dc45..e7845c2510db 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -219,6 +219,18 @@ struct vm_area_struct; > /* Room for N __GFP_FOO bits */ > #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > +#define __GFP_ORDER(order) ((__force gfp_t)(order << __GFP_BITS_SHIFT)) > +#define __GFP_ORDER_PMD __GFP_ORDER(PMD_SHIFT - PAGE_SHIFT) > +#define __GFP_ORDER_PUD __GFP_ORDER(PUD_SHIFT - PAGE_SHIFT) > + > +/* > + * Extract the order from a GFP bitmask. > + * Must be the top bits to avoid an AND operation. Don't let > + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode orders > + * above 15 (some architectures allow configuring MAX_ORDER up to 64, > + * but I doubt larger than 31 are ever used). > + */ > +#define gfp_order(gfp) (((__force unsigned int)gfp) >> __GFP_BITS_SHIFT) > > /** > * DOC: Useful GFP flag combinations > @@ -464,26 +476,23 @@ static inline void arch_alloc_page(struct page *page, int order) { } > #endif > > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > - nodemask_t *nodemask); > +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask); > > -static inline struct page * > -__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) > +static inline struct page *__alloc_pages(gfp_t gfp_mask, int preferred_nid) > { > - return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); > + return __alloc_pages_nodemask(gfp_mask, preferred_nid, NULL); > } > > /* > * Allocate pages, preferring the node given as nid. The node must be valid and > * online. For more general interface, see alloc_pages_node(). > */ > -static inline struct page * > -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) > +static inline struct page *__alloc_pages_node(int nid, gfp_t gfp) > { > VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); > - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); > + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); > > - return __alloc_pages(gfp_mask, order, nid); > + return __alloc_pages(gfp, nid); > } > > /* > @@ -497,35 +506,35 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, > if (nid == NUMA_NO_NODE) > nid = numa_mem_id(); > > - return __alloc_pages_node(nid, gfp_mask, order); > + return __alloc_pages_node(nid, gfp_mask | __GFP_ORDER(order)); > } > > #ifdef CONFIG_NUMA > -extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); > +extern struct page *alloc_pages_current(gfp_t gfp_mask); > > static inline struct page * > alloc_pages(gfp_t gfp_mask, unsigned int order) > { > - return alloc_pages_current(gfp_mask, order); > + return alloc_pages_current(gfp_mask | __GFP_ORDER(order)); > } > -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, > - struct vm_area_struct *vma, unsigned long addr, > - int node, bool hugepage); > +extern struct page *alloc_pages_vma(gfp_t gfp_mask, struct vm_area_struct *vma, > + unsigned long addr, int node, bool hugepage); > #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ > - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) > + alloc_pages_vma(gfp_mask | __GFP_ORDER(order), vma, addr, \ > + numa_node_id(), true) > #else > #define alloc_pages(gfp_mask, order) \ > - alloc_pages_node(numa_node_id(), gfp_mask, order) > -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ > - alloc_pages(gfp_mask, order) > + alloc_pages_node(numa_node_id(), gfp_mask, order) > +#define alloc_pages_vma(gfp_mask, vma, addr, node, false) \ > + alloc_pages(gfp_mask, 0) > #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ > alloc_pages(gfp_mask, order) > #endif > #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) > #define alloc_page_vma(gfp_mask, vma, addr) \ > - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) > + alloc_pages_vma(gfp_mask, vma, addr, numa_node_id(), false) > #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ > - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) > + alloc_pages_vma(gfp_mask, vma, addr, node, false) > > extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); > extern unsigned long get_zeroed_page(gfp_t gfp_mask); > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > index e13d9bf2f9a5..ba4385144cc9 100644 > --- a/include/linux/migrate.h > +++ b/include/linux/migrate.h > @@ -50,7 +50,7 @@ static inline struct page *new_page_nodemask(struct page *page, > if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) > gfp_mask |= __GFP_HIGHMEM; > > - new_page = __alloc_pages_nodemask(gfp_mask, order, > + new_page = __alloc_pages_nodemask(gfp_mask | __GFP_ORDER(order), > preferred_nid, nodemask); > > if (new_page && PageTransHuge(new_page)) > diff --git a/mm/filemap.c b/mm/filemap.c > index 3ad18fa56057..b7b0841312c9 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -945,7 +945,7 @@ struct page *__page_cache_alloc(gfp_t gfp) > do { > cpuset_mems_cookie = read_mems_allowed_begin(); > n = cpuset_mem_spread_node(); > - page = __alloc_pages_node(n, gfp, 0); > + page = __alloc_pages_node(n, gfp); > } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); > > return page; > diff --git a/mm/gup.c b/mm/gup.c > index 294e87ae5b9a..7b06962a4630 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -1306,14 +1306,14 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) > * CMA area again. > */ > thp_gfpmask &= ~__GFP_MOVABLE; > - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); > + thp = __alloc_pages_node(nid, thp_gfpmask | __GFP_PMD_ORDER); > if (!thp) > return NULL; > prep_transhuge_page(thp); > return thp; > } > > - return __alloc_pages_node(nid, gfp_mask, 0); > + return __alloc_pages_node(nid, gfp_mask); > } > > static long check_and_migrate_cma_pages(struct task_struct *tsk, > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 109f5de82910..f3f0f2902a52 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1401,10 +1401,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, > int order = huge_page_order(h); > struct page *page; > > - gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; > + gfp_mask |= __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | > + __GFP_ORDER(order); > if (nid == NUMA_NO_NODE) > nid = numa_mem_id(); > - page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask); > + page = __alloc_pages_nodemask(gfp_mask, nid, nmask); > if (page) > __count_vm_event(HTLB_BUDDY_PGALLOC); > else > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index a335f7c1fac4..3d9267394881 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -770,7 +770,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node) > { > VM_BUG_ON_PAGE(*hpage, *hpage); > > - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); > + *hpage = __alloc_pages_node(node, gfp | __GFP_PMD_ORDER); > if (unlikely(!*hpage)) { > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > *hpage = ERR_PTR(-ENOMEM); > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 2219e747df49..bad60476d5ad 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -975,7 +975,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node) > return thp; > } else > return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | > - __GFP_THISNODE, 0); > + __GFP_THISNODE); > } > > /* > @@ -2006,12 +2006,11 @@ bool mempolicy_nodemask_intersects(struct task_struct *tsk, > > /* Allocate a page in interleaved policy. > Own path because it needs to do special accounting. */ > -static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > - unsigned nid) > +static struct page *alloc_page_interleave(gfp_t gfp, unsigned nid) > { > struct page *page; > > - page = __alloc_pages(gfp, order, nid); > + page = __alloc_pages(gfp, nid); > /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */ > if (!static_branch_likely(&vm_numa_stat_key)) > return page; > @@ -2033,7 +2032,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > * %GFP_FS allocation should not call back into a file system. > * %GFP_ATOMIC don't sleep. > * > - * @order:Order of the GFP allocation. > * @vma: Pointer to VMA or NULL if not available. > * @addr: Virtual Address of the allocation. Must be inside the VMA. > * @node: Which node to prefer for allocation (modulo policy). > @@ -2047,8 +2045,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > * NULL when no page can be allocated. > */ > struct page * > -alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > - unsigned long addr, int node, bool hugepage) > +alloc_pages_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr, > + int node, bool hugepage) > { > struct mempolicy *pol; > struct page *page; > @@ -2060,9 +2058,10 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > if (pol->mode == MPOL_INTERLEAVE) { > unsigned nid; > > - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); > + nid = interleave_nid(pol, vma, addr, > + PAGE_SHIFT + gfp_order(gfp)); > mpol_cond_put(pol); > - page = alloc_page_interleave(gfp, order, nid); > + page = alloc_page_interleave(gfp, nid); > goto out; > } > > @@ -2086,14 +2085,14 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > if (!nmask || node_isset(hpage_node, *nmask)) { > mpol_cond_put(pol); > page = __alloc_pages_node(hpage_node, > - gfp | __GFP_THISNODE, order); > + gfp | __GFP_THISNODE); > goto out; > } > } > > nmask = policy_nodemask(gfp, pol); > preferred_nid = policy_node(gfp, pol, node); > - page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); > + page = __alloc_pages_nodemask(gfp, preferred_nid, nmask); > mpol_cond_put(pol); > out: > return page; > @@ -2108,13 +2107,12 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > * %GFP_HIGHMEM highmem allocation, > * %GFP_FS don't call back into a file system. > * %GFP_ATOMIC don't sleep. > - * @order: Power of two of allocation size in pages. 0 is a single page. > * > * Allocate a page from the kernel page pool. When not in > - * interrupt context and apply the current process NUMA policy. > + * interrupt context apply the current process NUMA policy. > * Returns NULL when no page can be allocated. > */ > -struct page *alloc_pages_current(gfp_t gfp, unsigned order) > +struct page *alloc_pages_current(gfp_t gfp) > { > struct mempolicy *pol = &default_policy; > struct page *page; > @@ -2127,9 +2125,9 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) > * nor system default_policy > */ > if (pol->mode == MPOL_INTERLEAVE) > - page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); > + page = alloc_page_interleave(gfp, interleave_nodes(pol)); > else > - page = __alloc_pages_nodemask(gfp, order, > + page = __alloc_pages_nodemask(gfp, > policy_node(gfp, pol, numa_node_id()), > policy_nodemask(gfp, pol)); > > diff --git a/mm/migrate.c b/mm/migrate.c > index f2ecc2855a12..acb479132398 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1884,7 +1884,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, > (GFP_HIGHUSER_MOVABLE | > __GFP_THISNODE | __GFP_NOMEMALLOC | > __GFP_NORETRY | __GFP_NOWARN) & > - ~__GFP_RECLAIM, 0); > + ~__GFP_RECLAIM); > > return newpage; > } > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index deea16489e2b..13191fe2f19d 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -4610,11 +4610,11 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) > * This is the 'heart' of the zoned buddy allocator. > */ > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > - nodemask_t *nodemask) > +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask) > { > struct page *page; > unsigned int alloc_flags = ALLOC_WMARK_LOW; > + int order = gfp_order(gfp_mask); > gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ > struct alloc_context ac = { }; > > diff --git a/mm/shmem.c b/mm/shmem.c > index a1e9f6194138..445e76e5c0c2 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1463,8 +1463,9 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, > return NULL; > > shmem_pseudo_vma_init(&pvma, info, hindex); > - page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, > - HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); > + page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | > + __GFP_NOWARN | __GFP_PMD_ORDER, > + &pvma, 0, numa_node_id(), true); > shmem_pseudo_vma_destroy(&pvma); > if (page) > prep_transhuge_page(page); > diff --git a/mm/slub.c b/mm/slub.c > index a34fbe1f6ede..7504fa3f844b 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1497,7 +1497,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, > if (node == NUMA_NO_NODE) > page = alloc_pages(flags, order); > else > - page = __alloc_pages_node(node, flags, order); > + page = __alloc_pages_node(node, flags | __GFP_ORDER(order)); > > if (page && memcg_charge_slab(page, flags, order, s)) { > __free_pages(page, order); > -- > 2.20.1 >
On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote: > +/* > + * Extract the order from a GFP bitmask. > + * Must be the top bits to avoid an AND operation. Don't let > + * __GFP_BITS_SHIFT get over 27 Should we have BUILD_BUG_ON() for this?
On Wed, May 08, 2019 at 06:50:16PM -0700, Ira Weiny wrote: > On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote: > > Save marshalling an extra argument in all the callers at the expense of > > using five bits of the GFP flags. We still have three GFP bits remaining > > after doing this (and we can release one more by reallocating NORETRY, > > RETRY_MAYFAIL and NOFAIL). > > -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) > > +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu) > > { > > unsigned int order = get_order(size); > > int node = cpu_to_node(cpu); > > struct page *page; > > > > - page = __alloc_pages_node(node, flags | __GFP_ZERO, order); > > + page = __alloc_pages_node(node, gfp | __GFP_ZERO | __GFP_ORDER(order)); > > Order was derived from size in this function. Is this truely equal to the old > function? > > At a minimum if I am wrong the get_order call above should be removed, no? I think you have a misunderstanding, but I'm not sure what it is. Before this patch, we pass 'order' (a small integer generally less than 10) in the bottom few bits of a parameter called 'order'. After this patch, we pass the order in some of the high bits of the GFP flags. So we can't remove the call to get_order() because that's what calculates 'order' from 'size'. > > +#define __GFP_ORDER(order) ((__force gfp_t)(order << __GFP_BITS_SHIFT)) > > +#define __GFP_ORDER_PMD __GFP_ORDER(PMD_SHIFT - PAGE_SHIFT) > > +#define __GFP_ORDER_PUD __GFP_ORDER(PUD_SHIFT - PAGE_SHIFT) > > + > > +/* > > + * Extract the order from a GFP bitmask. > > + * Must be the top bits to avoid an AND operation. Don't let > > + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode orders > > + * above 15 (some architectures allow configuring MAX_ORDER up to 64, > > + * but I doubt larger than 31 are ever used). > > + */ > > +#define gfp_order(gfp) (((__force unsigned int)gfp) >> __GFP_BITS_SHIFT)
> > On Wed, May 08, 2019 at 06:50:16PM -0700, Ira Weiny wrote: > > On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote: > > > Save marshalling an extra argument in all the callers at the expense > > > of using five bits of the GFP flags. We still have three GFP bits > > > remaining after doing this (and we can release one more by > > > reallocating NORETRY, RETRY_MAYFAIL and NOFAIL). > > > > -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) > > > +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu) > > > { > > > unsigned int order = get_order(size); > > > int node = cpu_to_node(cpu); > > > struct page *page; > > > > > > - page = __alloc_pages_node(node, flags | __GFP_ZERO, order); > > > + page = __alloc_pages_node(node, gfp | __GFP_ZERO | > > > +__GFP_ORDER(order)); > > > > Order was derived from size in this function. Is this truely equal to > > the old function? > > > > At a minimum if I am wrong the get_order call above should be removed, > no? > > I think you have a misunderstanding, but I'm not sure what it is. > > Before this patch, we pass 'order' (a small integer generally less than 10) in > the bottom few bits of a parameter called 'order'. After this patch, we pass > the order in some of the high bits of the GFP flags. So we can't remove the > call to get_order() because that's what calculates 'order' from 'size'. Ah I see it now. Sorry was thinking the wrong thing when I saw that line. Yep you are correct, Ira > > > > +#define __GFP_ORDER(order) ((__force gfp_t)(order << > __GFP_BITS_SHIFT)) > > > +#define __GFP_ORDER_PMD __GFP_ORDER(PMD_SHIFT - > PAGE_SHIFT) > > > +#define __GFP_ORDER_PUD __GFP_ORDER(PUD_SHIFT - > PAGE_SHIFT) > > > + > > > +/* > > > + * Extract the order from a GFP bitmask. > > > + * Must be the top bits to avoid an AND operation. Don't let > > > + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode > > > +orders > > > + * above 15 (some architectures allow configuring MAX_ORDER up to > > > +64, > > > + * but I doubt larger than 31 are ever used). > > > + */ > > > +#define gfp_order(gfp) (((__force unsigned int)gfp) >> > __GFP_BITS_SHIFT)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 10c99ce1fead..82fee9845b87 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -315,13 +315,13 @@ static void ds_clear_cea(void *cea, size_t size) preempt_enable(); } -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu) { unsigned int order = get_order(size); int node = cpu_to_node(cpu); struct page *page; - page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + page = __alloc_pages_node(node, gfp | __GFP_ZERO | __GFP_ORDER(order)); return page ? page_address(page) : NULL; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ab432a930ae8..323a0f6ffe13 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2380,13 +2380,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, return 0; } -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t gfp) { int node = cpu_to_node(cpu); struct page *pages; struct vmcs *vmcs; - pages = __alloc_pages_node(node, flags, vmcs_config.order); + pages = __alloc_pages_node(node, gfp | __GFP_ORDER(vmcs_config.order)); if (!pages) return NULL; vmcs = page_address(pages); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fb07b503dc45..e7845c2510db 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -219,6 +219,18 @@ struct vm_area_struct; /* Room for N __GFP_FOO bits */ #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) +#define __GFP_ORDER(order) ((__force gfp_t)(order << __GFP_BITS_SHIFT)) +#define __GFP_ORDER_PMD __GFP_ORDER(PMD_SHIFT - PAGE_SHIFT) +#define __GFP_ORDER_PUD __GFP_ORDER(PUD_SHIFT - PAGE_SHIFT) + +/* + * Extract the order from a GFP bitmask. + * Must be the top bits to avoid an AND operation. Don't let + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode orders + * above 15 (some architectures allow configuring MAX_ORDER up to 64, + * but I doubt larger than 31 are ever used). + */ +#define gfp_order(gfp) (((__force unsigned int)gfp) >> __GFP_BITS_SHIFT) /** * DOC: Useful GFP flag combinations @@ -464,26 +476,23 @@ static inline void arch_alloc_page(struct page *page, int order) { } #endif struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, - nodemask_t *nodemask); +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask); -static inline struct page * -__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) +static inline struct page *__alloc_pages(gfp_t gfp_mask, int preferred_nid) { - return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); + return __alloc_pages_nodemask(gfp_mask, preferred_nid, NULL); } /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). */ -static inline struct page * -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) +static inline struct page *__alloc_pages_node(int nid, gfp_t gfp) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); - return __alloc_pages(gfp_mask, order, nid); + return __alloc_pages(gfp, nid); } /* @@ -497,35 +506,35 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return __alloc_pages_node(nid, gfp_mask, order); + return __alloc_pages_node(nid, gfp_mask | __GFP_ORDER(order)); } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); +extern struct page *alloc_pages_current(gfp_t gfp_mask); static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order) { - return alloc_pages_current(gfp_mask, order); + return alloc_pages_current(gfp_mask | __GFP_ORDER(order)); } -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, - struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); +extern struct page *alloc_pages_vma(gfp_t gfp_mask, struct vm_area_struct *vma, + unsigned long addr, int node, bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + alloc_pages_vma(gfp_mask | __GFP_ORDER(order), vma, addr, \ + numa_node_id(), true) #else #define alloc_pages(gfp_mask, order) \ - alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ - alloc_pages(gfp_mask, order) + alloc_pages_node(numa_node_id(), gfp_mask, order) +#define alloc_pages_vma(gfp_mask, vma, addr, node, false) \ + alloc_pages(gfp_mask, 0) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, vma, addr, numa_node_id(), false) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) + alloc_pages_vma(gfp_mask, vma, addr, node, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e13d9bf2f9a5..ba4385144cc9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -50,7 +50,7 @@ static inline struct page *new_page_nodemask(struct page *page, if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, order, + new_page = __alloc_pages_nodemask(gfp_mask | __GFP_ORDER(order), preferred_nid, nodemask); if (new_page && PageTransHuge(new_page)) diff --git a/mm/filemap.c b/mm/filemap.c index 3ad18fa56057..b7b0841312c9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -945,7 +945,7 @@ struct page *__page_cache_alloc(gfp_t gfp) do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = __alloc_pages_node(n, gfp, 0); + page = __alloc_pages_node(n, gfp); } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; diff --git a/mm/gup.c b/mm/gup.c index 294e87ae5b9a..7b06962a4630 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1306,14 +1306,14 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) * CMA area again. */ thp_gfpmask &= ~__GFP_MOVABLE; - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); + thp = __alloc_pages_node(nid, thp_gfpmask | __GFP_PMD_ORDER); if (!thp) return NULL; prep_transhuge_page(thp); return thp; } - return __alloc_pages_node(nid, gfp_mask, 0); + return __alloc_pages_node(nid, gfp_mask); } static long check_and_migrate_cma_pages(struct task_struct *tsk, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 109f5de82910..f3f0f2902a52 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1401,10 +1401,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int order = huge_page_order(h); struct page *page; - gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; + gfp_mask |= __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | + __GFP_ORDER(order); if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask); + page = __alloc_pages_nodemask(gfp_mask, nid, nmask); if (page) __count_vm_event(HTLB_BUDDY_PGALLOC); else diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a335f7c1fac4..3d9267394881 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -770,7 +770,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node) { VM_BUG_ON_PAGE(*hpage, *hpage); - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages_node(node, gfp | __GFP_PMD_ORDER); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); *hpage = ERR_PTR(-ENOMEM); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 2219e747df49..bad60476d5ad 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -975,7 +975,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node) return thp; } else return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE, 0); + __GFP_THISNODE); } /* @@ -2006,12 +2006,11 @@ bool mempolicy_nodemask_intersects(struct task_struct *tsk, /* Allocate a page in interleaved policy. Own path because it needs to do special accounting. */ -static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, - unsigned nid) +static struct page *alloc_page_interleave(gfp_t gfp, unsigned nid) { struct page *page; - page = __alloc_pages(gfp, order, nid); + page = __alloc_pages(gfp, nid); /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */ if (!static_branch_likely(&vm_numa_stat_key)) return page; @@ -2033,7 +2032,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * %GFP_FS allocation should not call back into a file system. * %GFP_ATOMIC don't sleep. * - * @order:Order of the GFP allocation. * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual Address of the allocation. Must be inside the VMA. * @node: Which node to prefer for allocation (modulo policy). @@ -2047,8 +2045,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * NULL when no page can be allocated. */ struct page * -alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node, bool hugepage) +alloc_pages_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr, + int node, bool hugepage) { struct mempolicy *pol; struct page *page; @@ -2060,9 +2058,10 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, if (pol->mode == MPOL_INTERLEAVE) { unsigned nid; - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); + nid = interleave_nid(pol, vma, addr, + PAGE_SHIFT + gfp_order(gfp)); mpol_cond_put(pol); - page = alloc_page_interleave(gfp, order, nid); + page = alloc_page_interleave(gfp, nid); goto out; } @@ -2086,14 +2085,14 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + gfp | __GFP_THISNODE); goto out; } } nmask = policy_nodemask(gfp, pol); preferred_nid = policy_node(gfp, pol, node); - page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); + page = __alloc_pages_nodemask(gfp, preferred_nid, nmask); mpol_cond_put(pol); out: return page; @@ -2108,13 +2107,12 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * %GFP_HIGHMEM highmem allocation, * %GFP_FS don't call back into a file system. * %GFP_ATOMIC don't sleep. - * @order: Power of two of allocation size in pages. 0 is a single page. * * Allocate a page from the kernel page pool. When not in - * interrupt context and apply the current process NUMA policy. + * interrupt context apply the current process NUMA policy. * Returns NULL when no page can be allocated. */ -struct page *alloc_pages_current(gfp_t gfp, unsigned order) +struct page *alloc_pages_current(gfp_t gfp) { struct mempolicy *pol = &default_policy; struct page *page; @@ -2127,9 +2125,9 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) * nor system default_policy */ if (pol->mode == MPOL_INTERLEAVE) - page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); + page = alloc_page_interleave(gfp, interleave_nodes(pol)); else - page = __alloc_pages_nodemask(gfp, order, + page = __alloc_pages_nodemask(gfp, policy_node(gfp, pol, numa_node_id()), policy_nodemask(gfp, pol)); diff --git a/mm/migrate.c b/mm/migrate.c index f2ecc2855a12..acb479132398 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1884,7 +1884,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~__GFP_RECLAIM, 0); + ~__GFP_RECLAIM); return newpage; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index deea16489e2b..13191fe2f19d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4610,11 +4610,11 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) * This is the 'heart' of the zoned buddy allocator. */ struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, - nodemask_t *nodemask) +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask) { struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; + int order = gfp_order(gfp_mask); gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; diff --git a/mm/shmem.c b/mm/shmem.c index a1e9f6194138..445e76e5c0c2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1463,8 +1463,9 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, return NULL; shmem_pseudo_vma_init(&pvma, info, hindex); - page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, - HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); + page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | + __GFP_NOWARN | __GFP_PMD_ORDER, + &pvma, 0, numa_node_id(), true); shmem_pseudo_vma_destroy(&pvma); if (page) prep_transhuge_page(page); diff --git a/mm/slub.c b/mm/slub.c index a34fbe1f6ede..7504fa3f844b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1497,7 +1497,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, if (node == NUMA_NO_NODE) page = alloc_pages(flags, order); else - page = __alloc_pages_node(node, flags, order); + page = __alloc_pages_node(node, flags | __GFP_ORDER(order)); if (page && memcg_charge_slab(page, flags, order, s)) { __free_pages(page, order);