diff mbox series

[v2,1/2] mm/vmalloc: Add a safer version of find_vm_area() for debug

Message ID 20230831171827.2625016-1-joel@joelfernandes.org (mailing list archive)
State New
Headers show
Series [v2,1/2] mm/vmalloc: Add a safer version of find_vm_area() for debug | expand

Commit Message

Joel Fernandes Aug. 31, 2023, 5:18 p.m. UTC
It is unsafe to dump vmalloc area information when trying to do so from
some contexts. Add a safer trylock version of the same function to do a
best-effort VMA finding and use it from vmalloc_dump_obj().

[apply test robot feedback on unused function fix.]

Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: rcu@vger.kernel.org
Cc: Zqiang <qiang.zhang1211@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
v1->v2: Apply review tags and test robot feedback.

 mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

Comments

Uladzislau Rezki Aug. 31, 2023, 7:47 p.m. UTC | #1
On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
> It is unsafe to dump vmalloc area information when trying to do so from
> some contexts. Add a safer trylock version of the same function to do a
> best-effort VMA finding and use it from vmalloc_dump_obj().
> 
> [apply test robot feedback on unused function fix.]
> 
> Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Cc: rcu@vger.kernel.org
> Cc: Zqiang <qiang.zhang1211@gmail.com>
> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
> v1->v2: Apply review tags and test robot feedback.
> 
>  mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 93cf99aba335..f09e882ae3b8 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
>  	return va;
>  }
>  
> +#ifdef CONFIG_PRINTK
> +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
> +{
> +	struct vmap_area *va;
> +
> +	if (!spin_trylock(&vmap_area_lock))
> +		return NULL;
> +	va = __find_vmap_area(addr, &vmap_area_root);
> +	spin_unlock(&vmap_area_lock);
> +
> +	return va;
> +}
> +#endif
> +
>  static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
>  {
>  	struct vmap_area *va;
> @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
>  	return va->vm;
>  }
>  
> +/**
> + * try_to_find_vm_area - find a continuous kernel virtual area
> + * @addr:	  base address
> + *
> + * This function is the same as find_vm_area() except that it is
> + * safe to call if vmap_area_lock is already held and returns NULL
> + * if it is. See comments in find_vmap_area() for other details.
> + *
> + * Return: the area descriptor on success or %NULL on failure.
> + */
> +#ifdef CONFIG_PRINTK
> +static struct vm_struct *try_to_find_vm_area(const void *addr)
> +{
> +	struct vmap_area *va;
> +
> +	va = find_vmap_area_trylock((unsigned long)addr);
> +	if (!va)
> +		return NULL;
> +
> +	return va->vm;
> +}
> +#endif
> +
>  /**
>   * remove_vm_area - find and remove a continuous kernel virtual area
>   * @addr:	    base address
> @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
>  	struct vm_struct *vm;
>  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
>  
> -	vm = find_vm_area(objp);
> +	vm = try_to_find_vm_area(objp);
>  	if (!vm)
>  		return false;
>  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
>
I am not sure if this patch makes a lot of sense. I agree, this is a
problem and it mitigates it. But it is broken in terms of once you drop
the lock, the VA should not be accessed.

Is that a real issue or it gets triggered due to some syntetic test case?

If i were you, i would go with open-coded version of trylock. Because
there is only one user so far.

--
Uladzislau Rezki
Joel Fernandes Sept. 1, 2023, 12:19 a.m. UTC | #2
On Thu, Aug 31, 2023 at 09:47:52PM +0200, Uladzislau Rezki wrote:
> On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
> > It is unsafe to dump vmalloc area information when trying to do so from
> > some contexts. Add a safer trylock version of the same function to do a
> > best-effort VMA finding and use it from vmalloc_dump_obj().
> > 
> > [apply test robot feedback on unused function fix.]
> > 
> > Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
> > Cc: Paul E. McKenney <paulmck@kernel.org>
> > Cc: rcu@vger.kernel.org
> > Cc: Zqiang <qiang.zhang1211@gmail.com>
> > Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> > v1->v2: Apply review tags and test robot feedback.
> > 
> >  mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 38 insertions(+), 1 deletion(-)
> > 
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index 93cf99aba335..f09e882ae3b8 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
> >  	return va;
> >  }
> >  
> > +#ifdef CONFIG_PRINTK
> > +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
> > +{
> > +	struct vmap_area *va;
> > +
> > +	if (!spin_trylock(&vmap_area_lock))
> > +		return NULL;
> > +	va = __find_vmap_area(addr, &vmap_area_root);
> > +	spin_unlock(&vmap_area_lock);
> > +
> > +	return va;
> > +}
> > +#endif
> > +
> >  static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
> >  {
> >  	struct vmap_area *va;
> > @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
> >  	return va->vm;
> >  }
> >  
> > +/**
> > + * try_to_find_vm_area - find a continuous kernel virtual area
> > + * @addr:	  base address
> > + *
> > + * This function is the same as find_vm_area() except that it is
> > + * safe to call if vmap_area_lock is already held and returns NULL
> > + * if it is. See comments in find_vmap_area() for other details.
> > + *
> > + * Return: the area descriptor on success or %NULL on failure.
> > + */
> > +#ifdef CONFIG_PRINTK
> > +static struct vm_struct *try_to_find_vm_area(const void *addr)
> > +{
> > +	struct vmap_area *va;
> > +
> > +	va = find_vmap_area_trylock((unsigned long)addr);
> > +	if (!va)
> > +		return NULL;
> > +
> > +	return va->vm;
> > +}
> > +#endif
> > +
> >  /**
> >   * remove_vm_area - find and remove a continuous kernel virtual area
> >   * @addr:	    base address
> > @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
> >  	struct vm_struct *vm;
> >  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> >  
> > -	vm = find_vm_area(objp);
> > +	vm = try_to_find_vm_area(objp);
> >  	if (!vm)
> >  		return false;
> >  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",

Hi Vlad,
Thanks for taking a look.

> I am not sure if this patch makes a lot of sense. I agree, this is a
> problem and it mitigates it. But it is broken in terms of once you drop
> the lock, the VA should not be accessed.

Just to note the lockless-access issue you are referring to is not introduced
by this patch but is rather in the existing code. Also just to note this is
debug code.

> Is that a real issue or it gets triggered due to some syntetic test case?

It is a real issue. See 2/2.

> If i were you, i would go with open-coded version of trylock. Because
> there is only one user so far.

Taking your open coding and locking suggestions, I came up with the below
which actually results in a smaller patch. Does it look good to you?

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..aaf6bad997a7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4274,14 +4274,31 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 #ifdef CONFIG_PRINTK
 bool vmalloc_dump_obj(void *object)
 {
+	void *caller, *objp = (void *)PAGE_ALIGN((unsigned long)object);
 	struct vm_struct *vm;
-	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+	struct vmap_area *va;
+	unsigned long addr;
+	unsigned int nr_pages;
 
-	vm = find_vm_area(objp);
-	if (!vm)
+	if (!spin_trylock(&vmap_area_lock))
+		return false;
+	va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
+	if (!va) {
+		spin_unlock(&vmap_area_lock);
 		return false;
+	}
+
+	vm = va->vm;
+	if (!vm) {
+		spin_unlock(&vmap_area_lock);
+		return false;
+	}
+	addr = vm->addr;
+	caller = vm->caller;
+	nr_pages = vm->nr_pages;
+	spin_unlock(&vmap_area_lock);
 	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
-		vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+		nr_pages, addr, caller);
 	return true;
 }
 #endif
Joel Fernandes Sept. 1, 2023, 12:33 a.m. UTC | #3
On Fri, Sep 01, 2023 at 12:19:17AM +0000, Joel Fernandes wrote:
> On Thu, Aug 31, 2023 at 09:47:52PM +0200, Uladzislau Rezki wrote:
> > On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
> > > It is unsafe to dump vmalloc area information when trying to do so from
> > > some contexts. Add a safer trylock version of the same function to do a
> > > best-effort VMA finding and use it from vmalloc_dump_obj().
> > > 
> > > [apply test robot feedback on unused function fix.]
> > > 
> > > Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
> > > Cc: Paul E. McKenney <paulmck@kernel.org>
> > > Cc: rcu@vger.kernel.org
> > > Cc: Zqiang <qiang.zhang1211@gmail.com>
> > > Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > > v1->v2: Apply review tags and test robot feedback.
> > > 
> > >  mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 38 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index 93cf99aba335..f09e882ae3b8 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
> > >  	return va;
> > >  }
> > >  
> > > +#ifdef CONFIG_PRINTK
> > > +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
> > > +{
> > > +	struct vmap_area *va;
> > > +
> > > +	if (!spin_trylock(&vmap_area_lock))
> > > +		return NULL;
> > > +	va = __find_vmap_area(addr, &vmap_area_root);
> > > +	spin_unlock(&vmap_area_lock);
> > > +
> > > +	return va;
> > > +}
> > > +#endif
> > > +
> > >  static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
> > >  {
> > >  	struct vmap_area *va;
> > > @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
> > >  	return va->vm;
> > >  }
> > >  
> > > +/**
> > > + * try_to_find_vm_area - find a continuous kernel virtual area
> > > + * @addr:	  base address
> > > + *
> > > + * This function is the same as find_vm_area() except that it is
> > > + * safe to call if vmap_area_lock is already held and returns NULL
> > > + * if it is. See comments in find_vmap_area() for other details.
> > > + *
> > > + * Return: the area descriptor on success or %NULL on failure.
> > > + */
> > > +#ifdef CONFIG_PRINTK
> > > +static struct vm_struct *try_to_find_vm_area(const void *addr)
> > > +{
> > > +	struct vmap_area *va;
> > > +
> > > +	va = find_vmap_area_trylock((unsigned long)addr);
> > > +	if (!va)
> > > +		return NULL;
> > > +
> > > +	return va->vm;
> > > +}
> > > +#endif
> > > +
> > >  /**
> > >   * remove_vm_area - find and remove a continuous kernel virtual area
> > >   * @addr:	    base address
> > > @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
> > >  	struct vm_struct *vm;
> > >  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> > >  
> > > -	vm = find_vm_area(objp);
> > > +	vm = try_to_find_vm_area(objp);
> > >  	if (!vm)
> > >  		return false;
> > >  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> 
> Hi Vlad,
> Thanks for taking a look.
> 
> > I am not sure if this patch makes a lot of sense. I agree, this is a
> > problem and it mitigates it. But it is broken in terms of once you drop
> > the lock, the VA should not be accessed.
> 
> Just to note the lockless-access issue you are referring to is not introduced
> by this patch but is rather in the existing code. Also just to note this is
> debug code.
> 
> > Is that a real issue or it gets triggered due to some syntetic test case?
> 
> It is a real issue. See 2/2.
> 
> > If i were you, i would go with open-coded version of trylock. Because
> > there is only one user so far.
> 
> Taking your open coding and locking suggestions, I came up with the below
> which actually results in a smaller patch. Does it look good to you?
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 93cf99aba335..aaf6bad997a7 100644

And with some trivial compiler errors fixed (sorry should have build tested
but wanted to just share the idea earlier):

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..2c6a0e2ff404 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4274,14 +4274,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 #ifdef CONFIG_PRINTK
 bool vmalloc_dump_obj(void *object)
 {
-	struct vm_struct *vm;
 	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+	const void *caller;
+	struct vm_struct *vm;
+	struct vmap_area *va;
+	unsigned long addr;
+	unsigned int nr_pages;
 
-	vm = find_vm_area(objp);
-	if (!vm)
+	if (!spin_trylock(&vmap_area_lock))
+		return false;
+	va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+	if (!va) {
+		spin_unlock(&vmap_area_lock);
 		return false;
+	}
+
+	vm = va->vm;
+	if (!vm) {
+		spin_unlock(&vmap_area_lock);
+		return false;
+	}
+	addr = (unsigned long)vm->addr;
+	caller = vm->caller;
+	nr_pages = vm->nr_pages;
+	spin_unlock(&vmap_area_lock);
 	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
-		vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+		nr_pages, addr, caller);
 	return true;
 }
 #endif
Uladzislau Rezki Sept. 1, 2023, 12:48 p.m. UTC | #4
On Fri, Sep 01, 2023 at 12:33:21AM +0000, Joel Fernandes wrote:
> On Fri, Sep 01, 2023 at 12:19:17AM +0000, Joel Fernandes wrote:
> > On Thu, Aug 31, 2023 at 09:47:52PM +0200, Uladzislau Rezki wrote:
> > > On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
> > > > It is unsafe to dump vmalloc area information when trying to do so from
> > > > some contexts. Add a safer trylock version of the same function to do a
> > > > best-effort VMA finding and use it from vmalloc_dump_obj().
> > > > 
> > > > [apply test robot feedback on unused function fix.]
> > > > 
> > > > Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
> > > > Cc: Paul E. McKenney <paulmck@kernel.org>
> > > > Cc: rcu@vger.kernel.org
> > > > Cc: Zqiang <qiang.zhang1211@gmail.com>
> > > > Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > ---
> > > > v1->v2: Apply review tags and test robot feedback.
> > > > 
> > > >  mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
> > > >  1 file changed, 38 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > > index 93cf99aba335..f09e882ae3b8 100644
> > > > --- a/mm/vmalloc.c
> > > > +++ b/mm/vmalloc.c
> > > > @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
> > > >  	return va;
> > > >  }
> > > >  
> > > > +#ifdef CONFIG_PRINTK
> > > > +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
> > > > +{
> > > > +	struct vmap_area *va;
> > > > +
> > > > +	if (!spin_trylock(&vmap_area_lock))
> > > > +		return NULL;
> > > > +	va = __find_vmap_area(addr, &vmap_area_root);
> > > > +	spin_unlock(&vmap_area_lock);
> > > > +
> > > > +	return va;
> > > > +}
> > > > +#endif
> > > > +
> > > >  static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
> > > >  {
> > > >  	struct vmap_area *va;
> > > > @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
> > > >  	return va->vm;
> > > >  }
> > > >  
> > > > +/**
> > > > + * try_to_find_vm_area - find a continuous kernel virtual area
> > > > + * @addr:	  base address
> > > > + *
> > > > + * This function is the same as find_vm_area() except that it is
> > > > + * safe to call if vmap_area_lock is already held and returns NULL
> > > > + * if it is. See comments in find_vmap_area() for other details.
> > > > + *
> > > > + * Return: the area descriptor on success or %NULL on failure.
> > > > + */
> > > > +#ifdef CONFIG_PRINTK
> > > > +static struct vm_struct *try_to_find_vm_area(const void *addr)
> > > > +{
> > > > +	struct vmap_area *va;
> > > > +
> > > > +	va = find_vmap_area_trylock((unsigned long)addr);
> > > > +	if (!va)
> > > > +		return NULL;
> > > > +
> > > > +	return va->vm;
> > > > +}
> > > > +#endif
> > > > +
> > > >  /**
> > > >   * remove_vm_area - find and remove a continuous kernel virtual area
> > > >   * @addr:	    base address
> > > > @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
> > > >  	struct vm_struct *vm;
> > > >  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> > > >  
> > > > -	vm = find_vm_area(objp);
> > > > +	vm = try_to_find_vm_area(objp);
> > > >  	if (!vm)
> > > >  		return false;
> > > >  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> > 
> > Hi Vlad,
> > Thanks for taking a look.
> > 
> > > I am not sure if this patch makes a lot of sense. I agree, this is a
> > > problem and it mitigates it. But it is broken in terms of once you drop
> > > the lock, the VA should not be accessed.
> > 
> > Just to note the lockless-access issue you are referring to is not introduced
> > by this patch but is rather in the existing code. Also just to note this is
> > debug code.
> > 
> > > Is that a real issue or it gets triggered due to some syntetic test case?
> > 
> > It is a real issue. See 2/2.
> > 
> > > If i were you, i would go with open-coded version of trylock. Because
> > > there is only one user so far.
> > 
> > Taking your open coding and locking suggestions, I came up with the below
> > which actually results in a smaller patch. Does it look good to you?
> > 
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index 93cf99aba335..aaf6bad997a7 100644
> 
> And with some trivial compiler errors fixed (sorry should have build tested
> but wanted to just share the idea earlier):
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 93cf99aba335..2c6a0e2ff404 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -4274,14 +4274,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
>  #ifdef CONFIG_PRINTK
>  bool vmalloc_dump_obj(void *object)
>  {
> -	struct vm_struct *vm;
>  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> +	const void *caller;
> +	struct vm_struct *vm;
> +	struct vmap_area *va;
> +	unsigned long addr;
> +	unsigned int nr_pages;
>  
> -	vm = find_vm_area(objp);
> -	if (!vm)
> +	if (!spin_trylock(&vmap_area_lock))
> +		return false;
> +	va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
> +	if (!va) {
> +		spin_unlock(&vmap_area_lock);
>  		return false;
> +	}
> +
> +	vm = va->vm;
> +	if (!vm) {
> +		spin_unlock(&vmap_area_lock);
> +		return false;
> +	}
> +	addr = (unsigned long)vm->addr;
> +	caller = vm->caller;
> +	nr_pages = vm->nr_pages;
> +	spin_unlock(&vmap_area_lock);
>  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> -		vm->nr_pages, (unsigned long)vm->addr, vm->caller);
> +		nr_pages, addr, caller);
>  	return true;
>  }
>  #endif
>
Looks good to me and thank you for fixing a locking issue :)
I think you will re-spin and resend it one more time?

--
Uladzislau Rezki
Joel Fernandes Sept. 1, 2023, 4:41 p.m. UTC | #5
> On Sep 1, 2023, at 8:48 AM, Uladzislau Rezki <urezki@gmail.com> wrote:
> 
> On Fri, Sep 01, 2023 at 12:33:21AM +0000, Joel Fernandes wrote:
>>> On Fri, Sep 01, 2023 at 12:19:17AM +0000, Joel Fernandes wrote:
>>> On Thu, Aug 31, 2023 at 09:47:52PM +0200, Uladzislau Rezki wrote:
>>>> On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
>>>>> It is unsafe to dump vmalloc area information when trying to do so from
>>>>> some contexts. Add a safer trylock version of the same function to do a
>>>>> best-effort VMA finding and use it from vmalloc_dump_obj().
>>>>> 
>>>>> [apply test robot feedback on unused function fix.]
>>>>> 
>>>>> Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
>>>>> Cc: Paul E. McKenney <paulmck@kernel.org>
>>>>> Cc: rcu@vger.kernel.org
>>>>> Cc: Zqiang <qiang.zhang1211@gmail.com>
>>>>> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>>>>> ---
>>>>> v1->v2: Apply review tags and test robot feedback.
>>>>> 
>>>>> mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
>>>>> 1 file changed, 38 insertions(+), 1 deletion(-)
>>>>> 
>>>>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
>>>>> index 93cf99aba335..f09e882ae3b8 100644
>>>>> --- a/mm/vmalloc.c
>>>>> +++ b/mm/vmalloc.c
>>>>> @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
>>>>>    return va;
>>>>> }
>>>>> 
>>>>> +#ifdef CONFIG_PRINTK
>>>>> +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
>>>>> +{
>>>>> +    struct vmap_area *va;
>>>>> +
>>>>> +    if (!spin_trylock(&vmap_area_lock))
>>>>> +        return NULL;
>>>>> +    va = __find_vmap_area(addr, &vmap_area_root);
>>>>> +    spin_unlock(&vmap_area_lock);
>>>>> +
>>>>> +    return va;
>>>>> +}
>>>>> +#endif
>>>>> +
>>>>> static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
>>>>> {
>>>>>    struct vmap_area *va;
>>>>> @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
>>>>>    return va->vm;
>>>>> }
>>>>> 
>>>>> +/**
>>>>> + * try_to_find_vm_area - find a continuous kernel virtual area
>>>>> + * @addr:      base address
>>>>> + *
>>>>> + * This function is the same as find_vm_area() except that it is
>>>>> + * safe to call if vmap_area_lock is already held and returns NULL
>>>>> + * if it is. See comments in find_vmap_area() for other details.
>>>>> + *
>>>>> + * Return: the area descriptor on success or %NULL on failure.
>>>>> + */
>>>>> +#ifdef CONFIG_PRINTK
>>>>> +static struct vm_struct *try_to_find_vm_area(const void *addr)
>>>>> +{
>>>>> +    struct vmap_area *va;
>>>>> +
>>>>> +    va = find_vmap_area_trylock((unsigned long)addr);
>>>>> +    if (!va)
>>>>> +        return NULL;
>>>>> +
>>>>> +    return va->vm;
>>>>> +}
>>>>> +#endif
>>>>> +
>>>>> /**
>>>>>  * remove_vm_area - find and remove a continuous kernel virtual area
>>>>>  * @addr:        base address
>>>>> @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
>>>>>    struct vm_struct *vm;
>>>>>    void *objp = (void *)PAGE_ALIGN((unsigned long)object);
>>>>> 
>>>>> -    vm = find_vm_area(objp);
>>>>> +    vm = try_to_find_vm_area(objp);
>>>>>    if (!vm)
>>>>>        return false;
>>>>>    pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
>>> 
>>> Hi Vlad,
>>> Thanks for taking a look.
>>> 
>>>> I am not sure if this patch makes a lot of sense. I agree, this is a
>>>> problem and it mitigates it. But it is broken in terms of once you drop
>>>> the lock, the VA should not be accessed.
>>> 
>>> Just to note the lockless-access issue you are referring to is not introduced
>>> by this patch but is rather in the existing code. Also just to note this is
>>> debug code.
>>> 
>>>> Is that a real issue or it gets triggered due to some syntetic test case?
>>> 
>>> It is a real issue. See 2/2.
>>> 
>>>> If i were you, i would go with open-coded version of trylock. Because
>>>> there is only one user so far.
>>> 
>>> Taking your open coding and locking suggestions, I came up with the below
>>> which actually results in a smaller patch. Does it look good to you?
>>> 
>>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
>>> index 93cf99aba335..aaf6bad997a7 100644
>> 
>> And with some trivial compiler errors fixed (sorry should have build tested
>> but wanted to just share the idea earlier):
>> 
>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
>> index 93cf99aba335..2c6a0e2ff404 100644
>> --- a/mm/vmalloc.c
>> +++ b/mm/vmalloc.c
>> @@ -4274,14 +4274,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
>> #ifdef CONFIG_PRINTK
>> bool vmalloc_dump_obj(void *object)
>> {
>> -    struct vm_struct *vm;
>>    void *objp = (void *)PAGE_ALIGN((unsigned long)object);
>> +    const void *caller;
>> +    struct vm_struct *vm;
>> +    struct vmap_area *va;
>> +    unsigned long addr;
>> +    unsigned int nr_pages;
>> 
>> -    vm = find_vm_area(objp);
>> -    if (!vm)
>> +    if (!spin_trylock(&vmap_area_lock))
>> +        return false;
>> +    va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
>> +    if (!va) {
>> +        spin_unlock(&vmap_area_lock);
>>        return false;
>> +    }
>> +
>> +    vm = va->vm;
>> +    if (!vm) {
>> +        spin_unlock(&vmap_area_lock);
>> +        return false;
>> +    }
>> +    addr = (unsigned long)vm->addr;
>> +    caller = vm->caller;
>> +    nr_pages = vm->nr_pages;
>> +    spin_unlock(&vmap_area_lock);
>>    pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
>> -        vm->nr_pages, (unsigned long)vm->addr, vm->caller);
>> +        nr_pages, addr, caller);
>>    return true;
>> }
>> #endif
>> 
> Looks good to me and thank you for fixing a locking issue :)
> I think you will re-spin and resend it one more time?

Yes. May I add your Reviewed-by tag to both patches after re-spinning as mentioned above?

thanks!

 - Joel

> 
> --
> Uladzislau Rezki
Uladzislau Rezki Sept. 4, 2023, 8:29 a.m. UTC | #6
On Fri, Sep 01, 2023 at 12:41:24PM -0400, Joel Fernandes wrote:
> 
> 
> > On Sep 1, 2023, at 8:48 AM, Uladzislau Rezki <urezki@gmail.com> wrote:
> > 
> > On Fri, Sep 01, 2023 at 12:33:21AM +0000, Joel Fernandes wrote:
> >>> On Fri, Sep 01, 2023 at 12:19:17AM +0000, Joel Fernandes wrote:
> >>> On Thu, Aug 31, 2023 at 09:47:52PM +0200, Uladzislau Rezki wrote:
> >>>> On Thu, Aug 31, 2023 at 05:18:25PM +0000, Joel Fernandes (Google) wrote:
> >>>>> It is unsafe to dump vmalloc area information when trying to do so from
> >>>>> some contexts. Add a safer trylock version of the same function to do a
> >>>>> best-effort VMA finding and use it from vmalloc_dump_obj().
> >>>>> 
> >>>>> [apply test robot feedback on unused function fix.]
> >>>>> 
> >>>>> Reported-by: Zhen Lei <thunder.leizhen@huaweicloud.com>
> >>>>> Cc: Paul E. McKenney <paulmck@kernel.org>
> >>>>> Cc: rcu@vger.kernel.org
> >>>>> Cc: Zqiang <qiang.zhang1211@gmail.com>
> >>>>> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> >>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> >>>>> ---
> >>>>> v1->v2: Apply review tags and test robot feedback.
> >>>>> 
> >>>>> mm/vmalloc.c | 39 ++++++++++++++++++++++++++++++++++++++-
> >>>>> 1 file changed, 38 insertions(+), 1 deletion(-)
> >>>>> 
> >>>>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> >>>>> index 93cf99aba335..f09e882ae3b8 100644
> >>>>> --- a/mm/vmalloc.c
> >>>>> +++ b/mm/vmalloc.c
> >>>>> @@ -1865,6 +1865,20 @@ struct vmap_area *find_vmap_area(unsigned long addr)
> >>>>>    return va;
> >>>>> }
> >>>>> 
> >>>>> +#ifdef CONFIG_PRINTK
> >>>>> +static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
> >>>>> +{
> >>>>> +    struct vmap_area *va;
> >>>>> +
> >>>>> +    if (!spin_trylock(&vmap_area_lock))
> >>>>> +        return NULL;
> >>>>> +    va = __find_vmap_area(addr, &vmap_area_root);
> >>>>> +    spin_unlock(&vmap_area_lock);
> >>>>> +
> >>>>> +    return va;
> >>>>> +}
> >>>>> +#endif
> >>>>> +
> >>>>> static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
> >>>>> {
> >>>>>    struct vmap_area *va;
> >>>>> @@ -2671,6 +2685,29 @@ struct vm_struct *find_vm_area(const void *addr)
> >>>>>    return va->vm;
> >>>>> }
> >>>>> 
> >>>>> +/**
> >>>>> + * try_to_find_vm_area - find a continuous kernel virtual area
> >>>>> + * @addr:      base address
> >>>>> + *
> >>>>> + * This function is the same as find_vm_area() except that it is
> >>>>> + * safe to call if vmap_area_lock is already held and returns NULL
> >>>>> + * if it is. See comments in find_vmap_area() for other details.
> >>>>> + *
> >>>>> + * Return: the area descriptor on success or %NULL on failure.
> >>>>> + */
> >>>>> +#ifdef CONFIG_PRINTK
> >>>>> +static struct vm_struct *try_to_find_vm_area(const void *addr)
> >>>>> +{
> >>>>> +    struct vmap_area *va;
> >>>>> +
> >>>>> +    va = find_vmap_area_trylock((unsigned long)addr);
> >>>>> +    if (!va)
> >>>>> +        return NULL;
> >>>>> +
> >>>>> +    return va->vm;
> >>>>> +}
> >>>>> +#endif
> >>>>> +
> >>>>> /**
> >>>>>  * remove_vm_area - find and remove a continuous kernel virtual area
> >>>>>  * @addr:        base address
> >>>>> @@ -4277,7 +4314,7 @@ bool vmalloc_dump_obj(void *object)
> >>>>>    struct vm_struct *vm;
> >>>>>    void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> >>>>> 
> >>>>> -    vm = find_vm_area(objp);
> >>>>> +    vm = try_to_find_vm_area(objp);
> >>>>>    if (!vm)
> >>>>>        return false;
> >>>>>    pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> >>> 
> >>> Hi Vlad,
> >>> Thanks for taking a look.
> >>> 
> >>>> I am not sure if this patch makes a lot of sense. I agree, this is a
> >>>> problem and it mitigates it. But it is broken in terms of once you drop
> >>>> the lock, the VA should not be accessed.
> >>> 
> >>> Just to note the lockless-access issue you are referring to is not introduced
> >>> by this patch but is rather in the existing code. Also just to note this is
> >>> debug code.
> >>> 
> >>>> Is that a real issue or it gets triggered due to some syntetic test case?
> >>> 
> >>> It is a real issue. See 2/2.
> >>> 
> >>>> If i were you, i would go with open-coded version of trylock. Because
> >>>> there is only one user so far.
> >>> 
> >>> Taking your open coding and locking suggestions, I came up with the below
> >>> which actually results in a smaller patch. Does it look good to you?
> >>> 
> >>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> >>> index 93cf99aba335..aaf6bad997a7 100644
> >> 
> >> And with some trivial compiler errors fixed (sorry should have build tested
> >> but wanted to just share the idea earlier):
> >> 
> >> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> >> index 93cf99aba335..2c6a0e2ff404 100644
> >> --- a/mm/vmalloc.c
> >> +++ b/mm/vmalloc.c
> >> @@ -4274,14 +4274,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
> >> #ifdef CONFIG_PRINTK
> >> bool vmalloc_dump_obj(void *object)
> >> {
> >> -    struct vm_struct *vm;
> >>    void *objp = (void *)PAGE_ALIGN((unsigned long)object);
> >> +    const void *caller;
> >> +    struct vm_struct *vm;
> >> +    struct vmap_area *va;
> >> +    unsigned long addr;
> >> +    unsigned int nr_pages;
> >> 
> >> -    vm = find_vm_area(objp);
> >> -    if (!vm)
> >> +    if (!spin_trylock(&vmap_area_lock))
> >> +        return false;
> >> +    va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
> >> +    if (!va) {
> >> +        spin_unlock(&vmap_area_lock);
> >>        return false;
> >> +    }
> >> +
> >> +    vm = va->vm;
> >> +    if (!vm) {
> >> +        spin_unlock(&vmap_area_lock);
> >> +        return false;
> >> +    }
> >> +    addr = (unsigned long)vm->addr;
> >> +    caller = vm->caller;
> >> +    nr_pages = vm->nr_pages;
> >> +    spin_unlock(&vmap_area_lock);
> >>    pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> >> -        vm->nr_pages, (unsigned long)vm->addr, vm->caller);
> >> +        nr_pages, addr, caller);
> >>    return true;
> >> }
> >> #endif
> >> 
> > Looks good to me and thank you for fixing a locking issue :)
> > I think you will re-spin and resend it one more time?
> 
> Yes. May I add your Reviewed-by tag to both patches after re-spinning as mentioned above?
> 
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

--
Uladzislau Rezki
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..f09e882ae3b8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1865,6 +1865,20 @@  struct vmap_area *find_vmap_area(unsigned long addr)
 	return va;
 }
 
+#ifdef CONFIG_PRINTK
+static struct vmap_area *find_vmap_area_trylock(unsigned long addr)
+{
+	struct vmap_area *va;
+
+	if (!spin_trylock(&vmap_area_lock))
+		return NULL;
+	va = __find_vmap_area(addr, &vmap_area_root);
+	spin_unlock(&vmap_area_lock);
+
+	return va;
+}
+#endif
+
 static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
 {
 	struct vmap_area *va;
@@ -2671,6 +2685,29 @@  struct vm_struct *find_vm_area(const void *addr)
 	return va->vm;
 }
 
+/**
+ * try_to_find_vm_area - find a continuous kernel virtual area
+ * @addr:	  base address
+ *
+ * This function is the same as find_vm_area() except that it is
+ * safe to call if vmap_area_lock is already held and returns NULL
+ * if it is. See comments in find_vmap_area() for other details.
+ *
+ * Return: the area descriptor on success or %NULL on failure.
+ */
+#ifdef CONFIG_PRINTK
+static struct vm_struct *try_to_find_vm_area(const void *addr)
+{
+	struct vmap_area *va;
+
+	va = find_vmap_area_trylock((unsigned long)addr);
+	if (!va)
+		return NULL;
+
+	return va->vm;
+}
+#endif
+
 /**
  * remove_vm_area - find and remove a continuous kernel virtual area
  * @addr:	    base address
@@ -4277,7 +4314,7 @@  bool vmalloc_dump_obj(void *object)
 	struct vm_struct *vm;
 	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
 
-	vm = find_vm_area(objp);
+	vm = try_to_find_vm_area(objp);
 	if (!vm)
 		return false;
 	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",