diff mbox series

[RFC,02/11] mm/mempolicy: swap cond reference counting logic in do_get_mempolicy

Message ID 20231122211200.31620-3-gregory.price@memverge.com (mailing list archive)
State New
Headers show
Series mm/mempolicy: Make task->mempolicy externally modifiable via syscall and procfs | expand

Commit Message

Gregory Price Nov. 22, 2023, 9:11 p.m. UTC
In preparation for making get/set mempolicy possible from outside the
context of the task being changed, we will need to take a reference
count on the task mempolicy in do_get_mempolicy.

do_get_mempolicy, operations on one of three policies

1) when MPOL_F_ADDR is set, it operates on a vma mempolicy
2) if the task does not have a mempolicy, default_policy is used
3) otherwise the task mempolicy is operated on

When the policy is from a vma, and that vma is a shared memory region,
the __get_vma_policy stack will take an additional reference

Change the behavior of do_get_mempolicy to unconditionally reference
whichever policy is operated on so that the cleanup logic can mpol_put
unconditionally, and mpol_cond_put is only called when a vma policy is
used.

Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
 mm/mempolicy.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

Comments

Michal Hocko Nov. 28, 2023, 2:07 p.m. UTC | #1
On Wed 22-11-23 16:11:51, Gregory Price wrote:
[...]
> @@ -982,11 +991,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
>  	}
>  
>   out:
> -	mpol_cond_put(pol);
> +	mpol_put(pol);
>  	if (vma)
>  		mmap_read_unlock(mm);
>  	if (pol_refcount)
> -		mpol_put(pol_refcount);
> +		mpol_cond_put(pol_refcount);

Maybe I am just misreading the patch but pol_refcount should be always
NULL with this patch

>  	return err;
>  }
>  
> -- 
> 2.39.1
>
Michal Hocko Nov. 28, 2023, 2:28 p.m. UTC | #2
[restoring the CC list, I supect you didn't want this to be a private
discussion]

On Tue 28-11-23 09:10:18, Gregory Price wrote:
> On Tue, Nov 28, 2023 at 03:07:10PM +0100, Michal Hocko wrote:
> > On Wed 22-11-23 16:11:51, Gregory Price wrote:
> > [...]
> > > @@ -982,11 +991,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
> > >  	}
> > >  
> > >   out:
> > > -	mpol_cond_put(pol);
> > > +	mpol_put(pol);
> > >  	if (vma)
> > >  		mmap_read_unlock(mm);
> > >  	if (pol_refcount)
> > > -		mpol_put(pol_refcount);
> > > +		mpol_cond_put(pol_refcount);
> > 
> > Maybe I am just misreading the patch but pol_refcount should be always
> > NULL with this patch
> > 
> 
> earlier:
> 
> +               pol = pol_refcount = __get_vma_policy(vma, addr, &ilx);
> 
> i can split this into two lines if preferred.
> 
> If addr is not set, then yes pol_refcount is always null.

My bad, missed that. Making that two lines would be easier to read but
nothing I would insist on of course.
diff mbox series

Patch

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 410754d56e46..37da712259d7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -900,9 +900,9 @@  static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 			     unsigned long addr, unsigned long flags)
 {
 	int err;
-	struct mm_struct *mm = current->mm;
+	struct mm_struct *mm;
 	struct vm_area_struct *vma = NULL;
-	struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL;
+	struct mempolicy *pol = NULL, *pol_refcount = NULL;
 
 	if (flags &
 		~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
@@ -925,29 +925,38 @@  static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		 * vma/shared policy at addr is NULL.  We
 		 * want to return MPOL_DEFAULT in this case.
 		 */
+		mm = current->mm;
 		mmap_read_lock(mm);
 		vma = vma_lookup(mm, addr);
 		if (!vma) {
 			mmap_read_unlock(mm);
 			return -EFAULT;
 		}
-		pol = __get_vma_policy(vma, addr, &ilx);
+		/*
+		 * __get_vma_policy can refcount if a shared policy is
+		 * referenced.  We'll need to do a cond_put on the way
+		 * out, but we need to reference this policy either way
+		 * because we may drop the mmap read lock.
+		 */
+		pol = pol_refcount = __get_vma_policy(vma, addr, &ilx);
+		mpol_get(pol);
 	} else if (addr)
 		return -EINVAL;
+	else {
+		/* take a reference of the task policy now */
+		pol = current->mempolicy;
+		mpol_get(pol);
+	}
 
-	if (!pol)
+	if (!pol) {
 		pol = &default_policy;	/* indicates default behavior */
+		mpol_get(pol);
+	}
+	/* we now have at least one reference on the policy */
 
 	if (flags & MPOL_F_NODE) {
 		if (flags & MPOL_F_ADDR) {
-			/*
-			 * Take a refcount on the mpol, because we are about to
-			 * drop the mmap_lock, after which only "pol" remains
-			 * valid, "vma" is stale.
-			 */
-			pol_refcount = pol;
 			vma = NULL;
-			mpol_get(pol);
 			mmap_read_unlock(mm);
 			err = lookup_node(mm, addr);
 			if (err < 0)
@@ -982,11 +991,11 @@  static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	}
 
  out:
-	mpol_cond_put(pol);
+	mpol_put(pol);
 	if (vma)
 		mmap_read_unlock(mm);
 	if (pol_refcount)
-		mpol_put(pol_refcount);
+		mpol_cond_put(pol_refcount);
 	return err;
 }