diff mbox series

[1/1] mm/vmalloc: convert vmap_lazy_nr to atomic_long_t

Message ID 20190131162452.25879-1-urezki@gmail.com (mailing list archive)
State New, archived
Headers show
Series [1/1] mm/vmalloc: convert vmap_lazy_nr to atomic_long_t | expand

Commit Message

Uladzislau Rezki Jan. 31, 2019, 4:24 p.m. UTC
vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
value on both 32 and 64 bit systems. lazy_max_pages() deals with
"unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
should be 8 bytes on 64 bit as well.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

Comments

William Kucharski Jan. 31, 2019, 9:40 p.m. UTC | #1
> On Jan 31, 2019, at 9:24 AM, Uladzislau Rezki (Sony) <urezki@gmail.com> wrote:
> 
> vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
> value on both 32 and 64 bit systems. lazy_max_pages() deals with
> "unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
> should be 8 bytes on 64 bit as well.

Looks good.

Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Michal Hocko Feb. 1, 2019, 12:45 p.m. UTC | #2
On Thu 31-01-19 17:24:52, Uladzislau Rezki (Sony) wrote:
> vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
> value on both 32 and 64 bit systems. lazy_max_pages() deals with
> "unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
> should be 8 bytes on 64 bit as well.

But do we really need 64b number of _pages_? I have hard time imagine
that we would have that many lazy pages to accumulate.

> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index abe83f885069..755b02983d8d 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -632,7 +632,7 @@ static unsigned long lazy_max_pages(void)
>  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
>  }
>  
> -static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
> +static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
>  
>  /*
>   * Serialize vmap purging.  There is no actual criticial section protected
> @@ -650,7 +650,7 @@ static void purge_fragmented_blocks_allcpus(void);
>   */
>  void set_iounmap_nonlazy(void)
>  {
> -	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
> +	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
>  }
>  
>  /*
> @@ -658,10 +658,10 @@ void set_iounmap_nonlazy(void)
>   */
>  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
>  {
> +	unsigned long resched_threshold;
>  	struct llist_node *valist;
>  	struct vmap_area *va;
>  	struct vmap_area *n_va;
> -	int resched_threshold;
>  
>  	lockdep_assert_held(&vmap_purge_lock);
>  
> @@ -681,16 +681,16 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
>  	}
>  
>  	flush_tlb_kernel_range(start, end);
> -	resched_threshold = (int) lazy_max_pages() << 1;
> +	resched_threshold = lazy_max_pages() << 1;
>  
>  	spin_lock(&vmap_area_lock);
>  	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
> -		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
> +		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
>  
>  		__free_vmap_area(va);
> -		atomic_sub(nr, &vmap_lazy_nr);
> +		atomic_long_sub(nr, &vmap_lazy_nr);
>  
> -		if (atomic_read(&vmap_lazy_nr) < resched_threshold)
> +		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
>  			cond_resched_lock(&vmap_area_lock);
>  	}
>  	spin_unlock(&vmap_area_lock);
> @@ -727,10 +727,10 @@ static void purge_vmap_area_lazy(void)
>   */
>  static void free_vmap_area_noflush(struct vmap_area *va)
>  {
> -	int nr_lazy;
> +	unsigned long nr_lazy;
>  
> -	nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
> -				    &vmap_lazy_nr);
> +	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
> +				PAGE_SHIFT, &vmap_lazy_nr);
>  
>  	/* After this point, we may free va at any time */
>  	llist_add(&va->purge_list, &vmap_purge_list);
> -- 
> 2.11.0
>
Uladzislau Rezki Feb. 4, 2019, 10:49 a.m. UTC | #3
Hello, Michal.

On Fri, Feb 01, 2019 at 01:45:28PM +0100, Michal Hocko wrote:
> On Thu 31-01-19 17:24:52, Uladzislau Rezki (Sony) wrote:
> > vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
> > value on both 32 and 64 bit systems. lazy_max_pages() deals with
> > "unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
> > should be 8 bytes on 64 bit as well.
> 
> But do we really need 64b number of _pages_? I have hard time imagine
> that we would have that many lazy pages to accumulate.
> 
That is more about of using the same type of variables thus the same size
in 32/64 bit address space.

<snip>
static void free_vmap_area_noflush(struct vmap_area *va)
{
    int nr_lazy;
 
    nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
                                &vmap_lazy_nr);
...
    if (unlikely(nr_lazy > lazy_max_pages()))
        try_purge_vmap_area_lazy();
<snip>

va_end/va_start are "unsigned long" whereas atomit_t(vmap_lazy_nr) is "int". 
The same with lazy_max_pages(), it returns "unsigned long" value.

Answering your question, in 64bit, the "vmalloc" address space is ~8589719406
pages if PAGE_SIZE is 4096, i.e. a regular 4 byte integer is not enough to hold
it. I agree it is hard to imagine, but it also depends on physical memory a
system has, it has to be terabytes. I am not sure if such systems exists.

Thank you.

--
Vlad Rezki

> > 
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > ---
> >  mm/vmalloc.c | 20 ++++++++++----------
> >  1 file changed, 10 insertions(+), 10 deletions(-)
> > 
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index abe83f885069..755b02983d8d 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -632,7 +632,7 @@ static unsigned long lazy_max_pages(void)
> >  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
> >  }
> >  
> > -static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
> > +static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
> >  
> >  /*
> >   * Serialize vmap purging.  There is no actual criticial section protected
> > @@ -650,7 +650,7 @@ static void purge_fragmented_blocks_allcpus(void);
> >   */
> >  void set_iounmap_nonlazy(void)
> >  {
> > -	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
> > +	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
> >  }
> >  
> >  /*
> > @@ -658,10 +658,10 @@ void set_iounmap_nonlazy(void)
> >   */
> >  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
> >  {
> > +	unsigned long resched_threshold;
> >  	struct llist_node *valist;
> >  	struct vmap_area *va;
> >  	struct vmap_area *n_va;
> > -	int resched_threshold;
> >  
> >  	lockdep_assert_held(&vmap_purge_lock);
> >  
> > @@ -681,16 +681,16 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
> >  	}
> >  
> >  	flush_tlb_kernel_range(start, end);
> > -	resched_threshold = (int) lazy_max_pages() << 1;
> > +	resched_threshold = lazy_max_pages() << 1;
> >  
> >  	spin_lock(&vmap_area_lock);
> >  	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
> > -		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
> > +		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
> >  
> >  		__free_vmap_area(va);
> > -		atomic_sub(nr, &vmap_lazy_nr);
> > +		atomic_long_sub(nr, &vmap_lazy_nr);
> >  
> > -		if (atomic_read(&vmap_lazy_nr) < resched_threshold)
> > +		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
> >  			cond_resched_lock(&vmap_area_lock);
> >  	}
> >  	spin_unlock(&vmap_area_lock);
> > @@ -727,10 +727,10 @@ static void purge_vmap_area_lazy(void)
> >   */
> >  static void free_vmap_area_noflush(struct vmap_area *va)
> >  {
> > -	int nr_lazy;
> > +	unsigned long nr_lazy;
> >  
> > -	nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
> > -				    &vmap_lazy_nr);
> > +	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
> > +				PAGE_SHIFT, &vmap_lazy_nr);
> >  
> >  	/* After this point, we may free va at any time */
> >  	llist_add(&va->purge_list, &vmap_purge_list);
> > -- 
> > 2.11.0
> > 
> 
> -- 
> Michal Hocko
> SUSE Labs
Matthew Wilcox Feb. 4, 2019, 1:33 p.m. UTC | #4
On Mon, Feb 04, 2019 at 11:49:56AM +0100, Uladzislau Rezki wrote:
> On Fri, Feb 01, 2019 at 01:45:28PM +0100, Michal Hocko wrote:
> > On Thu 31-01-19 17:24:52, Uladzislau Rezki (Sony) wrote:
> > > vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
> > > value on both 32 and 64 bit systems. lazy_max_pages() deals with
> > > "unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
> > > should be 8 bytes on 64 bit as well.
> > 
> > But do we really need 64b number of _pages_? I have hard time imagine
> > that we would have that many lazy pages to accumulate.
> > 
> That is more about of using the same type of variables thus the same size
> in 32/64 bit address space.
> 
> <snip>
> static void free_vmap_area_noflush(struct vmap_area *va)
> {
>     int nr_lazy;
>  
>     nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
>                                 &vmap_lazy_nr);
> ...
>     if (unlikely(nr_lazy > lazy_max_pages()))
>         try_purge_vmap_area_lazy();
> <snip>
> 
> va_end/va_start are "unsigned long" whereas atomit_t(vmap_lazy_nr) is "int". 
> The same with lazy_max_pages(), it returns "unsigned long" value.
> 
> Answering your question, in 64bit, the "vmalloc" address space is ~8589719406
> pages if PAGE_SIZE is 4096, i.e. a regular 4 byte integer is not enough to hold
> it. I agree it is hard to imagine, but it also depends on physical memory a
> system has, it has to be terabytes. I am not sure if such systems exists.

There are certainly systems with more than 16TB of memory out there.
The question is whether we want to allow individual vmaps of 16TB.
We currently have a 32TB vmap space (on x86-64), so that's one limit.
Should we restrict it further to avoid this ever wrapping past a 32-bit
limit?
Uladzislau Rezki Feb. 4, 2019, 6:06 p.m. UTC | #5
Hello, Matthew.

On Mon, Feb 04, 2019 at 05:33:00AM -0800, Matthew Wilcox wrote:
> On Mon, Feb 04, 2019 at 11:49:56AM +0100, Uladzislau Rezki wrote:
> > On Fri, Feb 01, 2019 at 01:45:28PM +0100, Michal Hocko wrote:
> > > On Thu 31-01-19 17:24:52, Uladzislau Rezki (Sony) wrote:
> > > > vmap_lazy_nr variable has atomic_t type that is 4 bytes integer
> > > > value on both 32 and 64 bit systems. lazy_max_pages() deals with
> > > > "unsigned long" that is 8 bytes on 64 bit system, thus vmap_lazy_nr
> > > > should be 8 bytes on 64 bit as well.
> > > 
> > > But do we really need 64b number of _pages_? I have hard time imagine
> > > that we would have that many lazy pages to accumulate.
> > > 
> > That is more about of using the same type of variables thus the same size
> > in 32/64 bit address space.
> > 
> > <snip>
> > static void free_vmap_area_noflush(struct vmap_area *va)
> > {
> >     int nr_lazy;
> >  
> >     nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
> >                                 &vmap_lazy_nr);
> > ...
> >     if (unlikely(nr_lazy > lazy_max_pages()))
> >         try_purge_vmap_area_lazy();
> > <snip>
> > 
> > va_end/va_start are "unsigned long" whereas atomit_t(vmap_lazy_nr) is "int". 
> > The same with lazy_max_pages(), it returns "unsigned long" value.
> > 
> > Answering your question, in 64bit, the "vmalloc" address space is ~8589719406
> > pages if PAGE_SIZE is 4096, i.e. a regular 4 byte integer is not enough to hold
> > it. I agree it is hard to imagine, but it also depends on physical memory a
> > system has, it has to be terabytes. I am not sure if such systems exists.
> 
> There are certainly systems with more than 16TB of memory out there.
> The question is whether we want to allow individual vmaps of 16TB.
Honestly saying, i do not know. But what i see is we are allowed to
do individual mapping as much as physical memory we have. If i do not
miss something.

>
> We currently have a 32TB vmap space (on x86-64), so that's one limit.
> Should we restrict it further to avoid this ever wrapping past a 32-bit
> limit?
We can restrict vmap space to 1 << 32 pages in 64 bit systems, but then
probably all archs have to follow that rule and patched accordingly. Apart
of that i am not sure how KASAN calculates start point for its allocation,
i mean offset within VMALLOC_START - VMALLOC_END address space. The same
regarding kernel module mapping space(if built to allocate in vmalloc space).

Also, since atomic_t is integer it can be negative, therefore we have to
use casting to "unsigned int" everywhere if deal with "vmap_lazy_nr".

Thank you.

--
Vlad Rezki
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index abe83f885069..755b02983d8d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -632,7 +632,7 @@  static unsigned long lazy_max_pages(void)
 	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
 }
 
-static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
 
 /*
  * Serialize vmap purging.  There is no actual criticial section protected
@@ -650,7 +650,7 @@  static void purge_fragmented_blocks_allcpus(void);
  */
 void set_iounmap_nonlazy(void)
 {
-	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
+	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
 }
 
 /*
@@ -658,10 +658,10 @@  void set_iounmap_nonlazy(void)
  */
 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 {
+	unsigned long resched_threshold;
 	struct llist_node *valist;
 	struct vmap_area *va;
 	struct vmap_area *n_va;
-	int resched_threshold;
 
 	lockdep_assert_held(&vmap_purge_lock);
 
@@ -681,16 +681,16 @@  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 	}
 
 	flush_tlb_kernel_range(start, end);
-	resched_threshold = (int) lazy_max_pages() << 1;
+	resched_threshold = lazy_max_pages() << 1;
 
 	spin_lock(&vmap_area_lock);
 	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
-		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
+		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
 
 		__free_vmap_area(va);
-		atomic_sub(nr, &vmap_lazy_nr);
+		atomic_long_sub(nr, &vmap_lazy_nr);
 
-		if (atomic_read(&vmap_lazy_nr) < resched_threshold)
+		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
 			cond_resched_lock(&vmap_area_lock);
 	}
 	spin_unlock(&vmap_area_lock);
@@ -727,10 +727,10 @@  static void purge_vmap_area_lazy(void)
  */
 static void free_vmap_area_noflush(struct vmap_area *va)
 {
-	int nr_lazy;
+	unsigned long nr_lazy;
 
-	nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
-				    &vmap_lazy_nr);
+	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
+				PAGE_SHIFT, &vmap_lazy_nr);
 
 	/* After this point, we may free va at any time */
 	llist_add(&va->purge_list, &vmap_purge_list);