diff mbox series

[1/6] zsmalloc: switch from alloc_vm_area to get_vm_area

Message ID 20200918163724.2511-2-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/6] zsmalloc: switch from alloc_vm_area to get_vm_area | expand

Commit Message

Christoph Hellwig Sept. 18, 2020, 4:37 p.m. UTC
There is no obvious reason why zsmalloc needs to pre-fault the PTEs
given that it later uses map_kernel_range to just like vmap().

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/zsmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Minchan Kim Sept. 21, 2020, 5:42 p.m. UTC | #1
On Fri, Sep 18, 2020 at 06:37:19PM +0200, Christoph Hellwig wrote:
> There is no obvious reason why zsmalloc needs to pre-fault the PTEs
> given that it later uses map_kernel_range to just like vmap().

IIRC, the problem was runtime pte popluating needs GFP_KERNEL but
zs_map_object API runs under non-preemtible section.

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/zsmalloc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index c36fdff9a37131..3e4fe3259612fd 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1122,7 +1122,7 @@ static inline int __zs_cpu_up(struct mapping_area *area)
>  	 */
>  	if (area->vm)
>  		return 0;
> -	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
> +	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
>  	if (!area->vm)
>  		return -ENOMEM;
>  	return 0;

I think it shoud work.

diff --git a/mm/memory.c b/mm/memory.c
index 05789aa4af12..6a1e4d854593 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2232,7 +2232,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 
 	do {
-		if (create || !pte_none(*pte)) {
+		if ((create || !pte_none(*pte)) && fn) {
 			err = fn(pte++, addr, data);
 			if (err)
 				break;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 3e4fe3259612..9ef7daf3d279 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1116,6 +1116,8 @@ static struct zspage *find_get_zspage(struct size_class *class)
 #ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
+	int ret;
+
 	/*
 	 * Make sure we don't leak memory if a cpu UP notification
 	 * and zs_init() race and both call zs_cpu_up() on the same cpu
@@ -1125,7 +1127,13 @@ static inline int __zs_cpu_up(struct mapping_area *area)
 	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
 	if (!area->vm)
 		return -ENOMEM;
-	return 0;
+
+	/*
+	 * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
+	 * in non-preemtible context of zs_map_object.
+	 */
+	ret = apply_to_page_range(&init_mm, NULL, PAGE_SIZE * 2, NULL, NULL);
+	return ret;
 }
 
 static inline void __zs_cpu_down(struct mapping_area *area)
Christoph Hellwig Sept. 21, 2020, 6:17 p.m. UTC | #2
On Mon, Sep 21, 2020 at 10:42:56AM -0700, Minchan Kim wrote:
> IIRC, the problem was runtime pte popluating needs GFP_KERNEL but
> zs_map_object API runs under non-preemtible section.

Make sense.

> > -	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
> > +	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
> >  	if (!area->vm)
> >  		return -ENOMEM;
> >  	return 0;
> 
> I think it shoud work.
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index 05789aa4af12..6a1e4d854593 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2232,7 +2232,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
>  	arch_enter_lazy_mmu_mode();
>  
>  	do {
> -		if (create || !pte_none(*pte)) {
> +		if ((create || !pte_none(*pte)) && fn) {
>  			err = fn(pte++, addr, data);
>  			if (err)
>  				break;
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 3e4fe3259612..9ef7daf3d279 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1116,6 +1116,8 @@ static struct zspage *find_get_zspage(struct size_class *class)
>  #ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
>  static inline int __zs_cpu_up(struct mapping_area *area)
>  {
> +	int ret;
> +
>  	/*
>  	 * Make sure we don't leak memory if a cpu UP notification
>  	 * and zs_init() race and both call zs_cpu_up() on the same cpu
> @@ -1125,7 +1127,13 @@ static inline int __zs_cpu_up(struct mapping_area *area)
>  	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
>  	if (!area->vm)
>  		return -ENOMEM;
> -	return 0;
> +
> +	/*
> +	 * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
> +	 * in non-preemtible context of zs_map_object.
> +	 */
> +	ret = apply_to_page_range(&init_mm, NULL, PAGE_SIZE * 2, NULL, NULL);
> +	return ret;

I think this needs the addr from the vm area somewhere..

We probaby want to add a trivial helper to prefault an area instead of
the open coded variant.
Minchan Kim Sept. 21, 2020, 6:42 p.m. UTC | #3
On Mon, Sep 21, 2020 at 08:17:08PM +0200, Christoph Hellwig wrote:
> On Mon, Sep 21, 2020 at 10:42:56AM -0700, Minchan Kim wrote:
> > IIRC, the problem was runtime pte popluating needs GFP_KERNEL but
> > zs_map_object API runs under non-preemtible section.
> 
> Make sense.
> 
> > > -	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
> > > +	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
> > >  	if (!area->vm)
> > >  		return -ENOMEM;
> > >  	return 0;
> > 
> > I think it shoud work.
> > 
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 05789aa4af12..6a1e4d854593 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2232,7 +2232,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
> >  	arch_enter_lazy_mmu_mode();
> >  
> >  	do {
> > -		if (create || !pte_none(*pte)) {
> > +		if ((create || !pte_none(*pte)) && fn) {
> >  			err = fn(pte++, addr, data);
> >  			if (err)
> >  				break;
> > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > index 3e4fe3259612..9ef7daf3d279 100644
> > --- a/mm/zsmalloc.c
> > +++ b/mm/zsmalloc.c
> > @@ -1116,6 +1116,8 @@ static struct zspage *find_get_zspage(struct size_class *class)
> >  #ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
> >  static inline int __zs_cpu_up(struct mapping_area *area)
> >  {
> > +	int ret;
> > +
> >  	/*
> >  	 * Make sure we don't leak memory if a cpu UP notification
> >  	 * and zs_init() race and both call zs_cpu_up() on the same cpu
> > @@ -1125,7 +1127,13 @@ static inline int __zs_cpu_up(struct mapping_area *area)
> >  	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
> >  	if (!area->vm)
> >  		return -ENOMEM;
> > -	return 0;
> > +
> > +	/*
> > +	 * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
> > +	 * in non-preemtible context of zs_map_object.
> > +	 */
> > +	ret = apply_to_page_range(&init_mm, NULL, PAGE_SIZE * 2, NULL, NULL);
> > +	return ret;
> 
> I think this needs the addr from the vm area somewhere..

Yeah, let's assign the addres we got get_vm_area.

> 
> We probaby want to add a trivial helper to prefault an area instead of
> the open coded variant.

It seems zsmalloc is only customer the function so let's have the helper
when we see another customer.

If we don't have objection, I'd like to ask to Andrew fold this up.

---
 mm/memory.c   | 2 +-
 mm/zsmalloc.c | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 05789aa4af12..6a1e4d854593 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2232,7 +2232,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 
 	do {
-		if (create || !pte_none(*pte)) {
+		if ((create || !pte_none(*pte)) && fn) {
 			err = fn(pte++, addr, data);
 			if (err)
 				break;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 3e4fe3259612..918c7b019b3d 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1125,7 +1125,13 @@ static inline int __zs_cpu_up(struct mapping_area *area)
 	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
 	if (!area->vm)
 		return -ENOMEM;
-	return 0;
+
+	/*
+	 * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
+	 * in non-preemtible context of zs_map_object.
+	 */
+	return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr,
+			PAGE_SIZE * 2, NULL, NULL);
 }
 
 static inline void __zs_cpu_down(struct mapping_area *area)
Christoph Hellwig Sept. 21, 2020, 6:43 p.m. UTC | #4
On Mon, Sep 21, 2020 at 11:42:29AM -0700, Minchan Kim wrote:
> It seems zsmalloc is only customer the function so let's have the helper
> when we see another customer.
> 
> If we don't have objection, I'd like to ask to Andrew fold this up.

Fine with me.
diff mbox series

Patch

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c36fdff9a37131..3e4fe3259612fd 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1122,7 +1122,7 @@  static inline int __zs_cpu_up(struct mapping_area *area)
 	 */
 	if (area->vm)
 		return 0;
-	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
+	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
 	if (!area->vm)
 		return -ENOMEM;
 	return 0;