diff mbox

[V2,fix,5/6] mm: hugetlb: add a new function to allocate a new gigantic page

Message ID 1479279304-31379-1-git-send-email-shijie.huang@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Huang Shijie Nov. 16, 2016, 6:55 a.m. UTC
There are three ways we can allocate a new gigantic page:

1. When the NUMA is not enabled, use alloc_gigantic_page() to get
   the gigantic page.

2. The NUMA is enabled, but the vma is NULL.
   There is no memory policy we can refer to.
   So create a @nodes_allowed, initialize it with init_nodemask_of_mempolicy()
   or init_nodemask_of_node(). Then use alloc_fresh_gigantic_page() to get
   the gigantic page.

3. The NUMA is enabled, and the vma is valid.
   We can follow the memory policy of the @vma.

   Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
   to get the gigantic page.

Signed-off-by: Huang Shijie <shijie.huang@arm.com>
---
Since the huge_nodemask() is changed, we have to change this function a little.

---
 mm/hugetlb.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

Comments

Vlastimil Babka Nov. 28, 2016, 2:17 p.m. UTC | #1
On 11/16/2016 07:55 AM, Huang Shijie wrote:
> There are three ways we can allocate a new gigantic page:
>
> 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
>    the gigantic page.
>
> 2. The NUMA is enabled, but the vma is NULL.
>    There is no memory policy we can refer to.
>    So create a @nodes_allowed, initialize it with init_nodemask_of_mempolicy()
>    or init_nodemask_of_node(). Then use alloc_fresh_gigantic_page() to get
>    the gigantic page.
>
> 3. The NUMA is enabled, and the vma is valid.
>    We can follow the memory policy of the @vma.
>
>    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
>    to get the gigantic page.
>
> Signed-off-by: Huang Shijie <shijie.huang@arm.com>
> ---
> Since the huge_nodemask() is changed, we have to change this function a little.
>
> ---
>  mm/hugetlb.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 6995087..c33bddc 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1502,6 +1502,69 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
>
>  /*
>   * There are 3 ways this can get called:
> + *
> + * 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
> + *    the gigantic page.
> + *
> + * 2. The NUMA is enabled, but the vma is NULL.
> + *    Create a @nodes_allowed, and use alloc_fresh_gigantic_page() to get
> + *    the gigantic page.
> + *
> + * 3. The NUMA is enabled, and the vma is valid.
> + *    Use the @vma's memory policy.
> + *    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
> + *    to get the gigantic page.
> + */
> +static struct page *__hugetlb_alloc_gigantic_page(struct hstate *h,
> +		struct vm_area_struct *vma, unsigned long addr, int nid)
> +{
> +	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);

What if the allocation fails and nodes_allowed is NULL?
It might work fine now, but it's rather fragile, so I'd rather see an 
explicit check.

BTW same thing applies to __nr_hugepages_store_common().

> +	struct page *page = NULL;
> +
> +	/* Not NUMA */
> +	if (!IS_ENABLED(CONFIG_NUMA)) {
> +		if (nid == NUMA_NO_NODE)
> +			nid = numa_mem_id();
> +
> +		page = alloc_gigantic_page(nid, huge_page_order(h));
> +		if (page)
> +			prep_compound_gigantic_page(page, huge_page_order(h));
> +
> +		NODEMASK_FREE(nodes_allowed);
> +		return page;
> +	}
> +
> +	/* NUMA && !vma */
> +	if (!vma) {
> +		if (nid == NUMA_NO_NODE) {
> +			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
> +				NODEMASK_FREE(nodes_allowed);
> +				nodes_allowed = &node_states[N_MEMORY];
> +			}
> +		} else if (nodes_allowed) {
> +			init_nodemask_of_node(nodes_allowed, nid);
> +		} else {
> +			nodes_allowed = &node_states[N_MEMORY];
> +		}
> +
> +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
> +
> +		if (nodes_allowed != &node_states[N_MEMORY])
> +			NODEMASK_FREE(nodes_allowed);
> +
> +		return page;
> +	}
> +
> +	/* NUMA && vma */
> +	if (huge_nodemask(vma, addr, nodes_allowed))
> +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
> +
> +	NODEMASK_FREE(nodes_allowed);
> +	return page;
> +}
> +
> +/*
> + * There are 3 ways this can get called:
>   * 1. With vma+addr: we use the VMA's memory policy
>   * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
>   *    page from any node, and let the buddy allocator itself figure
>
Huang Shijie Nov. 29, 2016, 9:03 a.m. UTC | #2
On Mon, Nov 28, 2016 at 03:17:28PM +0100, Vlastimil Babka wrote:
> On 11/16/2016 07:55 AM, Huang Shijie wrote:
> > +static struct page *__hugetlb_alloc_gigantic_page(struct hstate *h,
> > +		struct vm_area_struct *vma, unsigned long addr, int nid)
> > +{
> > +	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
> 
> What if the allocation fails and nodes_allowed is NULL?
> It might work fine now, but it's rather fragile, so I'd rather see an
Yes.

> explicit check.
See the comment below.

> 
> BTW same thing applies to __nr_hugepages_store_common().
> 
> > +	struct page *page = NULL;
> > +
> > +	/* Not NUMA */
> > +	if (!IS_ENABLED(CONFIG_NUMA)) {
> > +		if (nid == NUMA_NO_NODE)
> > +			nid = numa_mem_id();
> > +
> > +		page = alloc_gigantic_page(nid, huge_page_order(h));
> > +		if (page)
> > +			prep_compound_gigantic_page(page, huge_page_order(h));
> > +
> > +		NODEMASK_FREE(nodes_allowed);
> > +		return page;
> > +	}
> > +
> > +	/* NUMA && !vma */
> > +	if (!vma) {
> > +		if (nid == NUMA_NO_NODE) {
> > +			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
> > +				NODEMASK_FREE(nodes_allowed);
> > +				nodes_allowed = &node_states[N_MEMORY];
> > +			}
> > +		} else if (nodes_allowed) {
The check is here.

Do we really need to re-arrange the code here for the explicit check? :)


Thanks
Huang Shijie
> > +			init_nodemask_of_node(nodes_allowed, nid);
> > +		} else {
> > +			nodes_allowed = &node_states[N_MEMORY];
> > +		}
> > +
> > +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
> > +
Vlastimil Babka Nov. 29, 2016, 10:50 a.m. UTC | #3
On 11/29/2016 10:03 AM, Huang Shijie wrote:
> On Mon, Nov 28, 2016 at 03:17:28PM +0100, Vlastimil Babka wrote:
>> On 11/16/2016 07:55 AM, Huang Shijie wrote:
>> > +static struct page *__hugetlb_alloc_gigantic_page(struct hstate *h,
>> > +		struct vm_area_struct *vma, unsigned long addr, int nid)
>> > +{
>> > +	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
>>
>> What if the allocation fails and nodes_allowed is NULL?
>> It might work fine now, but it's rather fragile, so I'd rather see an
> Yes.
>
>> explicit check.
> See the comment below.
>
>>
>> BTW same thing applies to __nr_hugepages_store_common().
>>
>> > +	struct page *page = NULL;
>> > +
>> > +	/* Not NUMA */
>> > +	if (!IS_ENABLED(CONFIG_NUMA)) {
>> > +		if (nid == NUMA_NO_NODE)
>> > +			nid = numa_mem_id();
>> > +
>> > +		page = alloc_gigantic_page(nid, huge_page_order(h));
>> > +		if (page)
>> > +			prep_compound_gigantic_page(page, huge_page_order(h));
>> > +
>> > +		NODEMASK_FREE(nodes_allowed);
>> > +		return page;
>> > +	}
>> > +
>> > +	/* NUMA && !vma */
>> > +	if (!vma) {
>> > +		if (nid == NUMA_NO_NODE) {
>> > +			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
>> > +				NODEMASK_FREE(nodes_allowed);
>> > +				nodes_allowed = &node_states[N_MEMORY];
>> > +			}
>> > +		} else if (nodes_allowed) {
> The check is here.

It's below a possible usage of nodes_allowed as an argument of 
init_nodemask_of_mempolicy(mask). Which does

         if (!(mask && current->mempolicy))
                 return false;

which itself looks like an error at first sight :)

> Do we really need to re-arrange the code here for the explicit check? :)

We don't need it *now* to be correct, but I still find it fragile. Also it
mixes up the semantic of NULL as a conscious "default" value, and NULL as
a side-effect of memory allocation failure. Nothing good can come from that in 
the long term :)

> Thanks
> Huang Shijie
>> > +			init_nodemask_of_node(nodes_allowed, nid);
>> > +		} else {
>> > +			nodes_allowed = &node_states[N_MEMORY];
>> > +		}
>> > +
>> > +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
>> > +
>
Huang Shijie Nov. 30, 2016, 3:02 a.m. UTC | #4
On Tue, Nov 29, 2016 at 11:50:37AM +0100, Vlastimil Babka wrote:
> > > > +	if (!vma) {
> > > > +		if (nid == NUMA_NO_NODE) {
> > > > +			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
> > > > +				NODEMASK_FREE(nodes_allowed);
> > > > +				nodes_allowed = &node_states[N_MEMORY];
> > > > +			}
> > > > +		} else if (nodes_allowed) {
> > The check is here.
> 
> It's below a possible usage of nodes_allowed as an argument of
> init_nodemask_of_mempolicy(mask). Which does
Sorry, I missed that.
> 
>         if (!(mask && current->mempolicy))
>                 return false;
> 
> which itself looks like an error at first sight :)
Yes. I agree.
> 
> > Do we really need to re-arrange the code here for the explicit check? :)
> 
> We don't need it *now* to be correct, but I still find it fragile. Also it
> mixes up the semantic of NULL as a conscious "default" value, and NULL as
> a side-effect of memory allocation failure. Nothing good can come from that
> in the long term :)
Okay, I think we do have the need to do the NULL check for
@nodes_allowed. :)

Thanks
Huang Shijie
Michal Hocko Dec. 2, 2016, 2:03 p.m. UTC | #5
On Wed 16-11-16 14:55:04, Huang Shijie wrote:
> There are three ways we can allocate a new gigantic page:
> 
> 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
>    the gigantic page.
> 
> 2. The NUMA is enabled, but the vma is NULL.
>    There is no memory policy we can refer to.
>    So create a @nodes_allowed, initialize it with init_nodemask_of_mempolicy()
>    or init_nodemask_of_node(). Then use alloc_fresh_gigantic_page() to get
>    the gigantic page.
> 
> 3. The NUMA is enabled, and the vma is valid.
>    We can follow the memory policy of the @vma.
> 
>    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
>    to get the gigantic page.

Again __hugetlb_alloc_gigantic_page is not used and it is hard to deduce
its usage from this commit. The above shouldn't be really much different from
what we do in alloc_pages_vma so please make sure to check it before
coming up with something hugetlb specific.

> Signed-off-by: Huang Shijie <shijie.huang@arm.com>
> ---
> Since the huge_nodemask() is changed, we have to change this function a little.
> 
> ---
>  mm/hugetlb.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 6995087..c33bddc 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1502,6 +1502,69 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
>  
>  /*
>   * There are 3 ways this can get called:
> + *
> + * 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
> + *    the gigantic page.
> + *
> + * 2. The NUMA is enabled, but the vma is NULL.
> + *    Create a @nodes_allowed, and use alloc_fresh_gigantic_page() to get
> + *    the gigantic page.
> + *
> + * 3. The NUMA is enabled, and the vma is valid.
> + *    Use the @vma's memory policy.
> + *    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
> + *    to get the gigantic page.
> + */
> +static struct page *__hugetlb_alloc_gigantic_page(struct hstate *h,
> +		struct vm_area_struct *vma, unsigned long addr, int nid)
> +{
> +	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
> +	struct page *page = NULL;
> +
> +	/* Not NUMA */
> +	if (!IS_ENABLED(CONFIG_NUMA)) {
> +		if (nid == NUMA_NO_NODE)
> +			nid = numa_mem_id();
> +
> +		page = alloc_gigantic_page(nid, huge_page_order(h));
> +		if (page)
> +			prep_compound_gigantic_page(page, huge_page_order(h));
> +
> +		NODEMASK_FREE(nodes_allowed);
> +		return page;
> +	}
> +
> +	/* NUMA && !vma */
> +	if (!vma) {
> +		if (nid == NUMA_NO_NODE) {
> +			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
> +				NODEMASK_FREE(nodes_allowed);
> +				nodes_allowed = &node_states[N_MEMORY];
> +			}
> +		} else if (nodes_allowed) {
> +			init_nodemask_of_node(nodes_allowed, nid);
> +		} else {
> +			nodes_allowed = &node_states[N_MEMORY];
> +		}
> +
> +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
> +
> +		if (nodes_allowed != &node_states[N_MEMORY])
> +			NODEMASK_FREE(nodes_allowed);
> +
> +		return page;
> +	}
> +
> +	/* NUMA && vma */
> +	if (huge_nodemask(vma, addr, nodes_allowed))
> +		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
> +
> +	NODEMASK_FREE(nodes_allowed);
> +	return page;
> +}
> +
> +/*
> + * There are 3 ways this can get called:
>   * 1. With vma+addr: we use the VMA's memory policy
>   * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
>   *    page from any node, and let the buddy allocator itself figure
> -- 
> 2.5.5
>
Huang Shijie Dec. 5, 2016, 3:15 a.m. UTC | #6
On Fri, Dec 02, 2016 at 03:03:30PM +0100, Michal Hocko wrote:
> On Wed 16-11-16 14:55:04, Huang Shijie wrote:
> > There are three ways we can allocate a new gigantic page:
> > 
> > 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
> >    the gigantic page.
> > 
> > 2. The NUMA is enabled, but the vma is NULL.
> >    There is no memory policy we can refer to.
> >    So create a @nodes_allowed, initialize it with init_nodemask_of_mempolicy()
> >    or init_nodemask_of_node(). Then use alloc_fresh_gigantic_page() to get
> >    the gigantic page.
> > 
> > 3. The NUMA is enabled, and the vma is valid.
> >    We can follow the memory policy of the @vma.
> > 
> >    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
> >    to get the gigantic page.
> 
> Again __hugetlb_alloc_gigantic_page is not used and it is hard to deduce
> its usage from this commit. The above shouldn't be really much different from

Okay, I will merge it into the later patch.

> what we do in alloc_pages_vma so please make sure to check it before
> coming up with something hugetlb specific.
No problem. Thanks for the hint.

Thanks
Huang Shijie
diff mbox

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6995087..c33bddc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1502,6 +1502,69 @@  int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 
 /*
  * There are 3 ways this can get called:
+ *
+ * 1. When the NUMA is not enabled, use alloc_gigantic_page() to get
+ *    the gigantic page.
+ *
+ * 2. The NUMA is enabled, but the vma is NULL.
+ *    Create a @nodes_allowed, and use alloc_fresh_gigantic_page() to get
+ *    the gigantic page.
+ *
+ * 3. The NUMA is enabled, and the vma is valid.
+ *    Use the @vma's memory policy.
+ *    Get @nodes_allowed by huge_nodemask(), and use alloc_fresh_gigantic_page()
+ *    to get the gigantic page.
+ */
+static struct page *__hugetlb_alloc_gigantic_page(struct hstate *h,
+		struct vm_area_struct *vma, unsigned long addr, int nid)
+{
+	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
+	struct page *page = NULL;
+
+	/* Not NUMA */
+	if (!IS_ENABLED(CONFIG_NUMA)) {
+		if (nid == NUMA_NO_NODE)
+			nid = numa_mem_id();
+
+		page = alloc_gigantic_page(nid, huge_page_order(h));
+		if (page)
+			prep_compound_gigantic_page(page, huge_page_order(h));
+
+		NODEMASK_FREE(nodes_allowed);
+		return page;
+	}
+
+	/* NUMA && !vma */
+	if (!vma) {
+		if (nid == NUMA_NO_NODE) {
+			if (!init_nodemask_of_mempolicy(nodes_allowed)) {
+				NODEMASK_FREE(nodes_allowed);
+				nodes_allowed = &node_states[N_MEMORY];
+			}
+		} else if (nodes_allowed) {
+			init_nodemask_of_node(nodes_allowed, nid);
+		} else {
+			nodes_allowed = &node_states[N_MEMORY];
+		}
+
+		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
+
+		if (nodes_allowed != &node_states[N_MEMORY])
+			NODEMASK_FREE(nodes_allowed);
+
+		return page;
+	}
+
+	/* NUMA && vma */
+	if (huge_nodemask(vma, addr, nodes_allowed))
+		page = alloc_fresh_gigantic_page(h, nodes_allowed, true);
+
+	NODEMASK_FREE(nodes_allowed);
+	return page;
+}
+
+/*
+ * There are 3 ways this can get called:
  * 1. With vma+addr: we use the VMA's memory policy
  * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
  *    page from any node, and let the buddy allocator itself figure