diff mbox series

[v3,3/8] mm/rmap: Split try_to_munlock from try_to_unmap

Message ID 20210226071832.31547-4-apopple@nvidia.com (mailing list archive)
State New
Headers show
Series Add support for SVM atomics in Nouveau | expand

Commit Message

Alistair Popple Feb. 26, 2021, 7:18 a.m. UTC
The behaviour of try_to_unmap_one() is difficult to follow because it
performs different operations based on a fairly large set of flags used
in different combinations.

TTU_MUNLOCK is one such flag. However it is exclusively used by
try_to_munlock() which specifies no other flags. Therefore rather than
overload try_to_unmap_one() with unrelated behaviour split this out into
it's own function and remove the flag.

Signed-off-by: Alistair Popple <apopple@nvidia.com>

---

Given the comments on not needing to hold mmap_lock it was not 100% clear
to me if it is safe to check vma->vma_flags & VM_LOCKED and if re-checking
under the ptl was significant. I left the extra check in case it was, but
it seems one of the checks is redunant as either the first check is racey
or the second check is unneccsary.
---
 include/linux/rmap.h |  1 -
 mm/rmap.c            | 47 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 9 deletions(-)

Comments

Christoph Hellwig Feb. 26, 2021, 4:01 p.m. UTC | #1
> +	while (page_vma_mapped_walk(&pvmw)) {
> +		/*
> +		 * If the page is mlock()d, we cannot swap it out.
> +		 * If it's recently referenced (perhaps page_referenced
> +		 * skipped over this mm) then we should reactivate it.
> +		 */
> +		if (vma->vm_flags & VM_LOCKED) {
> +			/* PTE-mapped THP are never mlocked */
> +			if (!PageTransCompound(page)) {
> +				/*
> +				 * Holding pte lock, we do *not* need
> +				 * mmap_lock here
> +				 */
> +				mlock_vma_page(page);
> +			}
> +			ret = false;
> +			page_vma_mapped_walk_done(&pvmw);
> +			break;

Just return false here directly and remove the ret variable?

Very nice cleanup!

Reviewed-by: Christoph Hellwig <hch@lst.de>
Jason Gunthorpe March 1, 2021, 4:10 p.m. UTC | #2
On Fri, Feb 26, 2021 at 06:18:27PM +1100, Alistair Popple wrote:
> The behaviour of try_to_unmap_one() is difficult to follow because it
> performs different operations based on a fairly large set of flags used
> in different combinations.
> 
> TTU_MUNLOCK is one such flag. However it is exclusively used by
> try_to_munlock() which specifies no other flags. Therefore rather than
> overload try_to_unmap_one() with unrelated behaviour split this out into
> it's own function and remove the flag.
> 
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> 
> 
> Given the comments on not needing to hold mmap_lock it was not 100% clear
> to me if it is safe to check vma->vma_flags & VM_LOCKED and if re-checking
> under the ptl was significant. I left the extra check in case it was, but
> it seems one of the checks is redunant as either the first check is racey
> or the second check is unneccsary.

The rmap doesn't hold the mmap_lock so I think both of these cases are
racey.

eg 

apply_vma_lock_flags()

	vma = find_vma(current->mm, start);
	if (!vma || vma->vm_start > start)
		return -ENOMEM;

	prev = vma->vm_prev;
	if (start > vma->vm_start)
		prev = vma;

	for (nstart = start ; ; ) {
		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;

		newflags |= flags;
 [...]
mlock_fixup()
	/*
	 * vm_flags is protected by the mmap_lock held in write mode.
	 * It's okay if try_to_unmap_one unmaps a page just after we
	 * set VM_LOCKED, populate_vma_page_range will bring it back.
	 */

	if (lock)
		vma->vm_flags = newflags;
	else
               	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;

Which is only done under the mmap_sem

> +static bool try_to_munlock_one(struct page *page, struct vm_area_struct *vma,
> +		     unsigned long address, void *arg)
> +{
> +	struct page_vma_mapped_walk pvmw = {
> +		.page = page,
> +		.vma = vma,
> +		.address = address,
> +	};
> +	bool ret = true;
> +
> +	/* munlock has nothing to gain from examining un-locked vmas */
> +	if (!(vma->vm_flags & VM_LOCKED))
> +		return true;

The mmap_sem can't be obtained in the rmap walkers due to lock
ordering, the various rmap locks are nested under the mmap_sem

So, when reading data that is not locked it should be written as:

   READ_ONCE(vma->vm_flags) & VM_LOCKED

> +	while (page_vma_mapped_walk(&pvmw)) {
> +		/*
> +		 * If the page is mlock()d, we cannot swap it out.
> +		 * If it's recently referenced (perhaps page_referenced
> +		 * skipped over this mm) then we should reactivate it.
> +		 */
> +		if (vma->vm_flags & VM_LOCKED) {

And since we write the data without holding the PTLs this looks
pointless, unless there is some other VM_LOCKED manipulation

Jason
Alistair Popple March 4, 2021, 4:27 a.m. UTC | #3
On Tuesday, 2 March 2021 3:10:49 AM AEDT Jason Gunthorpe wrote:
> > +       while (page_vma_mapped_walk(&pvmw)) {
> > +               /*
> > +                * If the page is mlock()d, we cannot swap it out.
> > +                * If it's recently referenced (perhaps page_referenced
> > +                * skipped over this mm) then we should reactivate it.
> > +                */
> > +               if (vma->vm_flags & VM_LOCKED) {
> 
> And since we write the data without holding the PTLs this looks
> pointless, unless there is some other VM_LOCKED manipulation
> 

Thanks. I couldn't find any other manipulation of VM_LOCKED whilst holding the 
PTL so I'll remove this redundant check.

 - Alistair
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 70085ca1a3fc..7f1ee411bd7b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -87,7 +87,6 @@  struct anon_vma_chain {
 
 enum ttu_flags {
 	TTU_MIGRATION		= 0x1,	/* migration mode */
-	TTU_MUNLOCK		= 0x2,	/* munlock mode */
 
 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
diff --git a/mm/rmap.c b/mm/rmap.c
index ef9ef2694c58..850eecdd866a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1391,10 +1391,6 @@  static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
-	/* munlock has nothing to gain from examining un-locked vmas */
-	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
-		return true;
-
 	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
 	    is_zone_device_page(page) && !is_device_private_page(page))
 		return true;
@@ -1455,8 +1451,6 @@  static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				page_vma_mapped_walk_done(&pvmw);
 				break;
 			}
-			if (flags & TTU_MUNLOCK)
-				continue;
 		}
 
 		/* Unexpected PMD-mapped THP? */
@@ -1775,6 +1769,44 @@  static int page_not_mapped(struct page *page)
 	return !page_mapped(page);
 };
 
+static bool try_to_munlock_one(struct page *page, struct vm_area_struct *vma,
+		     unsigned long address, void *arg)
+{
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+		.address = address,
+	};
+	bool ret = true;
+
+	/* munlock has nothing to gain from examining un-locked vmas */
+	if (!(vma->vm_flags & VM_LOCKED))
+		return true;
+
+	while (page_vma_mapped_walk(&pvmw)) {
+		/*
+		 * If the page is mlock()d, we cannot swap it out.
+		 * If it's recently referenced (perhaps page_referenced
+		 * skipped over this mm) then we should reactivate it.
+		 */
+		if (vma->vm_flags & VM_LOCKED) {
+			/* PTE-mapped THP are never mlocked */
+			if (!PageTransCompound(page)) {
+				/*
+				 * Holding pte lock, we do *not* need
+				 * mmap_lock here
+				 */
+				mlock_vma_page(page);
+			}
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			break;
+		}
+	}
+
+	return ret;
+}
+
 /**
  * try_to_munlock - try to munlock a page
  * @page: the page to be munlocked
@@ -1787,8 +1819,7 @@  static int page_not_mapped(struct page *page)
 void try_to_munlock(struct page *page)
 {
 	struct rmap_walk_control rwc = {
-		.rmap_one = try_to_unmap_one,
-		.arg = (void *)TTU_MUNLOCK,
+		.rmap_one = try_to_munlock_one,
 		.done = page_not_mapped,
 		.anon_lock = page_lock_anon_vma_read,