diff mbox series

[v3,5/5] mm: Add ZAP_FLAG_SKIP_SWAP and zap_flags

Message ID 20210908163628.215052-1-peterx@redhat.com (mailing list archive)
State New
Headers show
Series mm: A few cleanup patches around zap, shmem and uffd | expand

Commit Message

Peter Xu Sept. 8, 2021, 4:36 p.m. UTC
Firstly, the comment in zap_pte_range() is misleading because it checks against
details rather than check_mappings, so it's against what the code did.

Meanwhile, there's no explicit reason why passing in the details pointer should
mean to skip all swap entries.  New user of zap_details could very possibly
miss this fact if they don't read deep until zap_pte_range() because there's no
comment at zap_details talking about it at all, so swap entries could be
erroneously skipped without being noticed.

This partly reverts 3e8715fdc03e ("mm: drop zap_details::check_swap_entries"),
but introduce ZAP_FLAG_SKIP_SWAP flag, which means the opposite of previous
"details" parameter: the caller should explicitly set this to skip swap
entries, otherwise swap entries will always be considered (which should still
be the major case here).

We may want to look into when exactly we need ZAP_FLAG_SKIP_SWAP and we should
have it in a synchronous manner, e.g., currently even if ZAP_FLAG_SKIP_SWAP is
set we'll still look into swap pmds no matter what.  But that should be a
separate effort of this patch.

The flag introduced in this patch will be a preparation for more bits defined
in the future, e.g., for a new bit in flag to show whether to persist the
upcoming uffd-wp bit in pgtable entries.

Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/mm.h | 16 ++++++++++++++++
 mm/memory.c        |  6 +++---
 2 files changed, 19 insertions(+), 3 deletions(-)

Comments

Alistair Popple Sept. 15, 2021, 2:25 a.m. UTC | #1
On Thursday, 9 September 2021 2:36:28 AM AEST Peter Xu wrote:
> Firstly, the comment in zap_pte_range() is misleading because it checks against
> details rather than check_mappings, so it's against what the code did.
> 
> Meanwhile, there's no explicit reason why passing in the details pointer should
> mean to skip all swap entries.  New user of zap_details could very possibly
> miss this fact if they don't read deep until zap_pte_range() because there's no
> comment at zap_details talking about it at all, so swap entries could be
> erroneously skipped without being noticed.
> 
> This partly reverts 3e8715fdc03e ("mm: drop zap_details::check_swap_entries"),
> but introduce ZAP_FLAG_SKIP_SWAP flag, which means the opposite of previous
> "details" parameter: the caller should explicitly set this to skip swap
> entries, otherwise swap entries will always be considered (which should still
> be the major case here).
> 
> We may want to look into when exactly we need ZAP_FLAG_SKIP_SWAP and we should
> have it in a synchronous manner, e.g., currently even if ZAP_FLAG_SKIP_SWAP is
> set we'll still look into swap pmds no matter what.  But that should be a
> separate effort of this patch.

I didn't really follow what you mean by "synchronous" here, although the
explanation about pmds makes sense so it's probably just terminology.
 
> The flag introduced in this patch will be a preparation for more bits defined
> in the future, e.g., for a new bit in flag to show whether to persist the
> upcoming uffd-wp bit in pgtable entries.

That's kind of the problem. The patch itself looks correct to me however as
mentioned it is mostly reverting a previous cleanup and it's hard to tell why
that's justified without the subsequent patches. Perhaps it makes the usage of
zap_details a bit clearer, but a comment also would with less code.

I know you want to try and shrink the uffd-wp series but I think this patch
might be easier to review if it was included as part of that series.

> Cc: Kirill A. Shutemov <kirill@shutemov.name>
> Cc: Hugh Dickins <hughd@google.com>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  include/linux/mm.h | 16 ++++++++++++++++
>  mm/memory.c        |  6 +++---
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index ed44f31615d9..beb784ce35b9 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1717,12 +1717,18 @@ static inline bool can_do_mlock(void) { return false; }
>  extern int user_shm_lock(size_t, struct ucounts *);
>  extern void user_shm_unlock(size_t, struct ucounts *);
>  
> +typedef unsigned int __bitwise zap_flags_t;
> +
> +/* Whether to skip zapping swap entries */
> +#define  ZAP_FLAG_SKIP_SWAP  ((__force zap_flags_t) BIT(0))
> +
>  /*
>   * Parameter block passed down to zap_pte_range in exceptional cases.
>   */
>  struct zap_details {
>  	struct address_space *zap_mapping;	/* Check page->mapping if set */
>  	struct page *single_page;		/* Locked page to be unmapped */
> +	zap_flags_t zap_flags;			/* Extra flags for zapping */
>  };
>  
>  /*
> @@ -1739,6 +1745,16 @@ zap_skip_check_mapping(struct zap_details *details, struct page *page)
>  	    (details->zap_mapping != page_rmapping(page));
>  }
>  
> +/* Return true if skip swap entries, false otherwise */
> +static inline bool
> +zap_skip_swap(struct zap_details *details)
> +{
> +	if (!details)
> +		return false;
> +
> +	return details->zap_flags & ZAP_FLAG_SKIP_SWAP;
> +}
> +
>  struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>  			     pte_t pte);
>  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
> diff --git a/mm/memory.c b/mm/memory.c
> index e5ee8399d270..26e37bef1888 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1379,8 +1379,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  			continue;
>  		}
>  
> -		/* If details->check_mapping, we leave swap entries. */
> -		if (unlikely(details))
> +		if (unlikely(zap_skip_swap(details)))
>  			continue;
>  
>  		if (!non_swap_entry(entry))
> @@ -3353,6 +3352,7 @@ void unmap_mapping_page(struct page *page)
>  
>  	details.zap_mapping = mapping;
>  	details.single_page = page;
> +	details.zap_flags = ZAP_FLAG_SKIP_SWAP;
>  
>  	i_mmap_lock_write(mapping);
>  	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
> @@ -3377,7 +3377,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
>  		pgoff_t nr, bool even_cows)
>  {
>  	pgoff_t	first_index = start, last_index = start + nr - 1;
> -	struct zap_details details = { };
> +	struct zap_details details = { .zap_flags = ZAP_FLAG_SKIP_SWAP };
>  
>  	details.zap_mapping = even_cows ? NULL : mapping;
>  	if (last_index < first_index)
>
Peter Xu Sept. 15, 2021, 2:52 a.m. UTC | #2
Hi, Alistair,

On Wed, Sep 15, 2021 at 12:25:07PM +1000, Alistair Popple wrote:
> On Thursday, 9 September 2021 2:36:28 AM AEST Peter Xu wrote:
> > Firstly, the comment in zap_pte_range() is misleading because it checks against
> > details rather than check_mappings, so it's against what the code did.
> > 
> > Meanwhile, there's no explicit reason why passing in the details pointer should
> > mean to skip all swap entries.  New user of zap_details could very possibly
> > miss this fact if they don't read deep until zap_pte_range() because there's no
> > comment at zap_details talking about it at all, so swap entries could be
> > erroneously skipped without being noticed.
> > 
> > This partly reverts 3e8715fdc03e ("mm: drop zap_details::check_swap_entries"),
> > but introduce ZAP_FLAG_SKIP_SWAP flag, which means the opposite of previous
> > "details" parameter: the caller should explicitly set this to skip swap
> > entries, otherwise swap entries will always be considered (which should still
> > be the major case here).
> > 
> > We may want to look into when exactly we need ZAP_FLAG_SKIP_SWAP and we should
> > have it in a synchronous manner, e.g., currently even if ZAP_FLAG_SKIP_SWAP is
> > set we'll still look into swap pmds no matter what.  But that should be a
> > separate effort of this patch.
> 
> I didn't really follow what you mean by "synchronous" here, although the
> explanation about pmds makes sense so it's probably just terminology.

Yes, maybe I should use "aligned manner", or please suggest anything that
sounds better; sorry for my awkward English.

>  
> > The flag introduced in this patch will be a preparation for more bits defined
> > in the future, e.g., for a new bit in flag to show whether to persist the
> > upcoming uffd-wp bit in pgtable entries.
> 
> That's kind of the problem. The patch itself looks correct to me however as
> mentioned it is mostly reverting a previous cleanup and it's hard to tell why
> that's justified without the subsequent patches. Perhaps it makes the usage of
> zap_details a bit clearer, but a comment also would with less code.
> 
> I know you want to try and shrink the uffd-wp series but I think this patch
> might be easier to review if it was included as part of that series.

I posted it because I think it's suitable to have it even without uffd-wp.

I tried to explain it above on two things this patch wanted to fix:

Firstly the comment is wrong; we've moved back and forth on changing the
zap_details flags but the comment is not changing along the way and it's not
matching the code right now.

Secondly I do think we should have a flag showing explicit willingness to skip
swap entries.  Yes, uffd-wp is the planned new one, but my point is anyone who
will introduce a new user of zap_details pointer could overlook this fact.  The
new flag helps us to make sure someone will at least read the flags and know
what'll happen with it.

For the 2nd reasoning, I also explicitly CCed Kirill too, so Kirill can provide
any comment if he disagrees.  For now, I still think we should keep having such
a flag otherwise it could be error-prone.

Could you buy-in above reasoning?

Basically above is what I wanted to express in my commit message.  I hope that
can justify that this patch (even if extremly simple) can still be considered
as acceptable upstream even without uffd-wp series.

If you still insist on this patch not suitable for standalone merging and
especially if some other reviewer would think the same, I can move it back to
uffd-wp series for sure.  Then I'll repost this series with 4 patches only.

In all cases, thanks for looking at the series.
Alistair Popple Sept. 15, 2021, 3:21 a.m. UTC | #3
On Wednesday, 15 September 2021 12:52:48 PM AEST Peter Xu wrote:

> > > The flag introduced in this patch will be a preparation for more bits defined
> > > in the future, e.g., for a new bit in flag to show whether to persist the
> > > upcoming uffd-wp bit in pgtable entries.
> > 
> > That's kind of the problem. The patch itself looks correct to me however as
> > mentioned it is mostly reverting a previous cleanup and it's hard to tell why
> > that's justified without the subsequent patches. Perhaps it makes the usage of
> > zap_details a bit clearer, but a comment also would with less code.
> > 
> > I know you want to try and shrink the uffd-wp series but I think this patch
> > might be easier to review if it was included as part of that series.
> 
> I posted it because I think it's suitable to have it even without uffd-wp.
> 
> I tried to explain it above on two things this patch wanted to fix:
> 
> Firstly the comment is wrong; we've moved back and forth on changing the
> zap_details flags but the comment is not changing along the way and it's not
> matching the code right now.
> 
> Secondly I do think we should have a flag showing explicit willingness to skip
> swap entries.  Yes, uffd-wp is the planned new one, but my point is anyone who
> will introduce a new user of zap_details pointer could overlook this fact.  The
> new flag helps us to make sure someone will at least read the flags and know
> what'll happen with it.
> 
> For the 2nd reasoning, I also explicitly CCed Kirill too, so Kirill can provide
> any comment if he disagrees.  For now, I still think we should keep having such
> a flag otherwise it could be error-prone.
> 
> Could you buy-in above reasoning?

Kind of, I do think it makes the usage of details a bit clearer or at least
harder to miss. It is just that if that was the sole aim of this patch I think
there might be simpler (less code) ways of doing so.

> Basically above is what I wanted to express in my commit message.  I hope that
> can justify that this patch (even if extremly simple) can still be considered
> as acceptable upstream even without uffd-wp series.
> 
> If you still insist on this patch not suitable for standalone merging and
> especially if some other reviewer would think the same, I can move it back to
> uffd-wp series for sure.  Then I'll repost this series with 4 patches only.

I won't insist, the code looks correct and it doesn't make things any less
clear so you can put my Reviewed-by on it and perhaps leave it to Andrew or
another reviewer to determine if this should be taken in this series or as part
of a future uffd-wp series.

 - Alistair

> In all cases, thanks for looking at the series.
> 
>
Peter Xu Sept. 15, 2021, 4:01 a.m. UTC | #4
On Wed, Sep 15, 2021 at 01:21:30PM +1000, Alistair Popple wrote:
> On Wednesday, 15 September 2021 12:52:48 PM AEST Peter Xu wrote:
> 
> > > > The flag introduced in this patch will be a preparation for more bits defined
> > > > in the future, e.g., for a new bit in flag to show whether to persist the
> > > > upcoming uffd-wp bit in pgtable entries.
> > > 
> > > That's kind of the problem. The patch itself looks correct to me however as
> > > mentioned it is mostly reverting a previous cleanup and it's hard to tell why
> > > that's justified without the subsequent patches. Perhaps it makes the usage of
> > > zap_details a bit clearer, but a comment also would with less code.
> > > 
> > > I know you want to try and shrink the uffd-wp series but I think this patch
> > > might be easier to review if it was included as part of that series.
> > 
> > I posted it because I think it's suitable to have it even without uffd-wp.
> > 
> > I tried to explain it above on two things this patch wanted to fix:
> > 
> > Firstly the comment is wrong; we've moved back and forth on changing the
> > zap_details flags but the comment is not changing along the way and it's not
> > matching the code right now.
> > 
> > Secondly I do think we should have a flag showing explicit willingness to skip
> > swap entries.  Yes, uffd-wp is the planned new one, but my point is anyone who
> > will introduce a new user of zap_details pointer could overlook this fact.  The
> > new flag helps us to make sure someone will at least read the flags and know
> > what'll happen with it.
> > 
> > For the 2nd reasoning, I also explicitly CCed Kirill too, so Kirill can provide
> > any comment if he disagrees.  For now, I still think we should keep having such
> > a flag otherwise it could be error-prone.
> > 
> > Could you buy-in above reasoning?
> 
> Kind of, I do think it makes the usage of details a bit clearer or at least
> harder to miss. It is just that if that was the sole aim of this patch I think
> there might be simpler (less code) ways of doing so.

Yes you're right, we can add a big enough comment above zap_details to state
that, but then it'll be reverted when adding the uffd-wp flag in the other
series, because uffd-wp will still needs a way to specify !SKIP_SWAP and
KEEP_UFFD_WP.  Then it'll make the "series split" make less sense as you said.

I split the series only because I hope it could ease the reviewers, and also
that's probably the only thing I can do now to still try to smooth the process
of having a complete uffd-wp finally got proper reviewed and merged.

> 
> > Basically above is what I wanted to express in my commit message.  I hope that
> > can justify that this patch (even if extremly simple) can still be considered
> > as acceptable upstream even without uffd-wp series.
> > 
> > If you still insist on this patch not suitable for standalone merging and
> > especially if some other reviewer would think the same, I can move it back to
> > uffd-wp series for sure.  Then I'll repost this series with 4 patches only.
> 
> I won't insist, the code looks correct and it doesn't make things any less
> clear so you can put my Reviewed-by on it and perhaps leave it to Andrew or
> another reviewer to determine if this should be taken in this series or as part
> of a future uffd-wp series.

Will do; thanks.
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed44f31615d9..beb784ce35b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1717,12 +1717,18 @@  static inline bool can_do_mlock(void) { return false; }
 extern int user_shm_lock(size_t, struct ucounts *);
 extern void user_shm_unlock(size_t, struct ucounts *);
 
+typedef unsigned int __bitwise zap_flags_t;
+
+/* Whether to skip zapping swap entries */
+#define  ZAP_FLAG_SKIP_SWAP  ((__force zap_flags_t) BIT(0))
+
 /*
  * Parameter block passed down to zap_pte_range in exceptional cases.
  */
 struct zap_details {
 	struct address_space *zap_mapping;	/* Check page->mapping if set */
 	struct page *single_page;		/* Locked page to be unmapped */
+	zap_flags_t zap_flags;			/* Extra flags for zapping */
 };
 
 /*
@@ -1739,6 +1745,16 @@  zap_skip_check_mapping(struct zap_details *details, struct page *page)
 	    (details->zap_mapping != page_rmapping(page));
 }
 
+/* Return true if skip swap entries, false otherwise */
+static inline bool
+zap_skip_swap(struct zap_details *details)
+{
+	if (!details)
+		return false;
+
+	return details->zap_flags & ZAP_FLAG_SKIP_SWAP;
+}
+
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t pte);
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index e5ee8399d270..26e37bef1888 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1379,8 +1379,7 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			continue;
 		}
 
-		/* If details->check_mapping, we leave swap entries. */
-		if (unlikely(details))
+		if (unlikely(zap_skip_swap(details)))
 			continue;
 
 		if (!non_swap_entry(entry))
@@ -3353,6 +3352,7 @@  void unmap_mapping_page(struct page *page)
 
 	details.zap_mapping = mapping;
 	details.single_page = page;
+	details.zap_flags = ZAP_FLAG_SKIP_SWAP;
 
 	i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
@@ -3377,7 +3377,7 @@  void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 		pgoff_t nr, bool even_cows)
 {
 	pgoff_t	first_index = start, last_index = start + nr - 1;
-	struct zap_details details = { };
+	struct zap_details details = { .zap_flags = ZAP_FLAG_SKIP_SWAP };
 
 	details.zap_mapping = even_cows ? NULL : mapping;
 	if (last_index < first_index)