diff mbox series

[v2,28/40] mm/memory: page_remove_rmap() -> folio_remove_rmap_pte()

Message ID 20231220224504.646757-29-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/rmap: interface overhaul | expand

Commit Message

David Hildenbrand Dec. 20, 2023, 10:44 p.m. UTC
Let's convert zap_pte_range() and closely-related
tlb_flush_rmap_batch(). While at it, perform some more folio conversion
in zap_pte_range().

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/memory.c     | 23 +++++++++++++----------
 mm/mmu_gather.c |  2 +-
 2 files changed, 14 insertions(+), 11 deletions(-)

Comments

Ryan Roberts Jan. 22, 2024, 4:58 p.m. UTC | #1
On 20/12/2023 22:44, David Hildenbrand wrote:
> Let's convert zap_pte_range() and closely-related
> tlb_flush_rmap_batch(). While at it, perform some more folio conversion
> in zap_pte_range().
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  mm/memory.c     | 23 +++++++++++++----------
>  mm/mmu_gather.c |  2 +-
>  2 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index 6552ea27b0bfa..eda2181275d9b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1434,6 +1434,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  	arch_enter_lazy_mmu_mode();
>  	do {
>  		pte_t ptent = ptep_get(pte);
> +		struct folio *folio;
>  		struct page *page;
>  
>  		if (pte_none(ptent))
> @@ -1459,21 +1460,22 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  				continue;
>  			}
>  
> +			folio = page_folio(page);
>  			delay_rmap = 0;
> -			if (!PageAnon(page)) {
> +			if (!folio_test_anon(folio)) {
>  				if (pte_dirty(ptent)) {
> -					set_page_dirty(page);
> +					folio_set_dirty(folio);

Is this foliation change definitely correct? I note that set_page_dirty() is
defined as this:

bool set_page_dirty(struct page *page)
{
	return folio_mark_dirty(page_folio(page));
}

And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
equivalent change, as part of the contpte series, I've swapped set_page_dirty()
for folio_mark_dirty().


>  					if (tlb_delay_rmap(tlb)) {
>  						delay_rmap = 1;
>  						force_flush = 1;
>  					}
>  				}
>  				if (pte_young(ptent) && likely(vma_has_recency(vma)))
> -					mark_page_accessed(page);
> +					folio_mark_accessed(folio);
>  			}
>  			rss[mm_counter(page)]--;
>  			if (!delay_rmap) {
> -				page_remove_rmap(page, vma, false);
> +				folio_remove_rmap_pte(folio, page, vma);
>  				if (unlikely(page_mapcount(page) < 0))
>  					print_bad_pte(vma, addr, ptent, page);
>  			}
> @@ -1489,6 +1491,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  		if (is_device_private_entry(entry) ||
>  		    is_device_exclusive_entry(entry)) {
>  			page = pfn_swap_entry_to_page(entry);
> +			folio = page_folio(page);
>  			if (unlikely(!should_zap_page(details, page)))
>  				continue;
>  			/*
> @@ -1500,8 +1503,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  			WARN_ON_ONCE(!vma_is_anonymous(vma));
>  			rss[mm_counter(page)]--;
>  			if (is_device_private_entry(entry))
> -				page_remove_rmap(page, vma, false);
> -			put_page(page);
> +				folio_remove_rmap_pte(folio, page, vma);
> +			folio_put(folio);
>  		} else if (!non_swap_entry(entry)) {
>  			/* Genuine swap entry, hence a private anon page */
>  			if (!should_zap_cows(details))
> @@ -3220,10 +3223,10 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  			 * threads.
>  			 *
>  			 * The critical issue is to order this
> -			 * page_remove_rmap with the ptp_clear_flush above.
> -			 * Those stores are ordered by (if nothing else,)
> +			 * folio_remove_rmap_pte() with the ptp_clear_flush
> +			 * above. Those stores are ordered by (if nothing else,)
>  			 * the barrier present in the atomic_add_negative
> -			 * in page_remove_rmap.
> +			 * in folio_remove_rmap_pte();
>  			 *
>  			 * Then the TLB flush in ptep_clear_flush ensures that
>  			 * no process can access the old page before the
> @@ -3232,7 +3235,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  			 * mapcount is visible. So transitively, TLBs to
>  			 * old page will be flushed before it can be reused.
>  			 */
> -			page_remove_rmap(vmf->page, vma, false);
> +			folio_remove_rmap_pte(old_folio, vmf->page, vma);
>  		}
>  
>  		/* Free the old page.. */
> diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
> index 4f559f4ddd217..604ddf08affed 100644
> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -55,7 +55,7 @@ static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_
>  
>  		if (encoded_page_flags(enc)) {
>  			struct page *page = encoded_page_ptr(enc);
> -			page_remove_rmap(page, vma, false);
> +			folio_remove_rmap_pte(page_folio(page), page, vma);
>  		}
>  	}
>  }
David Hildenbrand Jan. 22, 2024, 5:01 p.m. UTC | #2
On 22.01.24 17:58, Ryan Roberts wrote:
> On 20/12/2023 22:44, David Hildenbrand wrote:
>> Let's convert zap_pte_range() and closely-related
>> tlb_flush_rmap_batch(). While at it, perform some more folio conversion
>> in zap_pte_range().
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   mm/memory.c     | 23 +++++++++++++----------
>>   mm/mmu_gather.c |  2 +-
>>   2 files changed, 14 insertions(+), 11 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 6552ea27b0bfa..eda2181275d9b 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -1434,6 +1434,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>   	arch_enter_lazy_mmu_mode();
>>   	do {
>>   		pte_t ptent = ptep_get(pte);
>> +		struct folio *folio;
>>   		struct page *page;
>>   
>>   		if (pte_none(ptent))
>> @@ -1459,21 +1460,22 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>   				continue;
>>   			}
>>   
>> +			folio = page_folio(page);
>>   			delay_rmap = 0;
>> -			if (!PageAnon(page)) {
>> +			if (!folio_test_anon(folio)) {
>>   				if (pte_dirty(ptent)) {
>> -					set_page_dirty(page);
>> +					folio_set_dirty(folio);
> 
> Is this foliation change definitely correct? I note that set_page_dirty() is
> defined as this: >
> bool set_page_dirty(struct page *page)
> {
> 	return folio_mark_dirty(page_folio(page));
> }
> 
> And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
> equivalent change, as part of the contpte series, I've swapped set_page_dirty()
> for folio_mark_dirty().

Good catch, that should be folio_mark_dirty(). Let me send a fixup.

(the difference in naming for both functions really is bad)
Matthew Wilcox Jan. 22, 2024, 5:20 p.m. UTC | #3
On Mon, Jan 22, 2024 at 06:01:58PM +0100, David Hildenbrand wrote:
> > And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
> > equivalent change, as part of the contpte series, I've swapped set_page_dirty()
> > for folio_mark_dirty().
> 
> Good catch, that should be folio_mark_dirty(). Let me send a fixup.
> 
> (the difference in naming for both functions really is bad)

It really is, and I don't know what to do about it.

We need a function that literally just sets the flag.  For every other
flag, that's folio_set_FLAG.  We can't use __folio_set_flag because that
means "set the flag non-atomically".

We need a function that does all of the work involved with tracking
dirty folios.  I chose folio_mark_dirty() to align with
folio_mark_uptodate() (ie mark is not just 'set" but also "do some extra
work").

But because we're converting from set_page_dirty(), the OBVIOUS rename
is to folio_set_dirty(), which is WRONG.

So we're in the part of the design space where the consistent naming and
the-obvious-thing-to-do-is-wrong are in collision, and I do not have a
good answer.

Maybe we can call the first function _folio_set_dirty(), and we don't
have a folio_set_dirty() at all?  We don't have a folio_set_uptodate(),
so there's some precedent there.
Ryan Roberts Jan. 22, 2024, 5:26 p.m. UTC | #4
On 22/01/2024 17:20, Matthew Wilcox wrote:
> On Mon, Jan 22, 2024 at 06:01:58PM +0100, David Hildenbrand wrote:
>>> And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
>>> equivalent change, as part of the contpte series, I've swapped set_page_dirty()
>>> for folio_mark_dirty().
>>
>> Good catch, that should be folio_mark_dirty(). Let me send a fixup.
>>
>> (the difference in naming for both functions really is bad)
> 
> It really is, and I don't know what to do about it.
> 
> We need a function that literally just sets the flag.  For every other
> flag, that's folio_set_FLAG.  We can't use __folio_set_flag because that
> means "set the flag non-atomically".
> 
> We need a function that does all of the work involved with tracking
> dirty folios.  I chose folio_mark_dirty() to align with
> folio_mark_uptodate() (ie mark is not just 'set" but also "do some extra
> work").
> 
> But because we're converting from set_page_dirty(), the OBVIOUS rename
> is to folio_set_dirty(), which is WRONG.
> 
> So we're in the part of the design space where the consistent naming and
> the-obvious-thing-to-do-is-wrong are in collision, and I do not have a
> good answer.
> 
> Maybe we can call the first function _folio_set_dirty(), and we don't
> have a folio_set_dirty() at all?  We don't have a folio_set_uptodate(),
> so there's some precedent there.

Is there anything stopping us from renaming set_page_dirty() to
mark_page_dirty() (or page_mark_dirty())? For me the folio naming is consistent,
but the page names suck; presumably PageSetDirty() and set_page_dirty()... yuk.
Matthew Wilcox Jan. 22, 2024, 5:32 p.m. UTC | #5
On Mon, Jan 22, 2024 at 05:26:00PM +0000, Ryan Roberts wrote:
> On 22/01/2024 17:20, Matthew Wilcox wrote:
> > On Mon, Jan 22, 2024 at 06:01:58PM +0100, David Hildenbrand wrote:
> >>> And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
> >>> equivalent change, as part of the contpte series, I've swapped set_page_dirty()
> >>> for folio_mark_dirty().
> >>
> >> Good catch, that should be folio_mark_dirty(). Let me send a fixup.
> >>
> >> (the difference in naming for both functions really is bad)
> > 
> > It really is, and I don't know what to do about it.
> > 
> > We need a function that literally just sets the flag.  For every other
> > flag, that's folio_set_FLAG.  We can't use __folio_set_flag because that
> > means "set the flag non-atomically".
> > 
> > We need a function that does all of the work involved with tracking
> > dirty folios.  I chose folio_mark_dirty() to align with
> > folio_mark_uptodate() (ie mark is not just 'set" but also "do some extra
> > work").
> > 
> > But because we're converting from set_page_dirty(), the OBVIOUS rename
> > is to folio_set_dirty(), which is WRONG.
> > 
> > So we're in the part of the design space where the consistent naming and
> > the-obvious-thing-to-do-is-wrong are in collision, and I do not have a
> > good answer.
> > 
> > Maybe we can call the first function _folio_set_dirty(), and we don't
> > have a folio_set_dirty() at all?  We don't have a folio_set_uptodate(),
> > so there's some precedent there.
> 
> Is there anything stopping us from renaming set_page_dirty() to
> mark_page_dirty() (or page_mark_dirty())? For me the folio naming is consistent,
> but the page names suck; presumably PageSetDirty() and set_page_dirty()... yuk.

Well, laziness.  There's about 150 places where we mention
set_page_dirty() and all of them need to be converted to
folio_mark_dirty().  I don't particularly like converting code twice;
I get the impression it annoys people.

The important thing is what does it look like when someone writes
a new filesystem in 2030.  I fear that they may get confused and
call folio_set_dirty(), not realising that they should be calling
folio_mark_dirty().  It doesn't help that btrfs have decided to introduce
btrfs_folio_set_dirty().

I think MM people can afford to add a leading '_' to folio_set_dirty()
so that's my current favourite option for fixing this mess.
David Hildenbrand Jan. 22, 2024, 5:34 p.m. UTC | #6
On 22.01.24 18:20, Matthew Wilcox wrote:
> On Mon, Jan 22, 2024 at 06:01:58PM +0100, David Hildenbrand wrote:
>>> And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
>>> equivalent change, as part of the contpte series, I've swapped set_page_dirty()
>>> for folio_mark_dirty().
>>
>> Good catch, that should be folio_mark_dirty(). Let me send a fixup.
>>
>> (the difference in naming for both functions really is bad)
> 
> It really is, and I don't know what to do about it.
> 
> We need a function that literally just sets the flag.  For every other
> flag, that's folio_set_FLAG.  We can't use __folio_set_flag because that
> means "set the flag non-atomically".
> 
> We need a function that does all of the work involved with tracking
> dirty folios.  I chose folio_mark_dirty() to align with
> folio_mark_uptodate() (ie mark is not just 'set" but also "do some extra
> work").
> 
> But because we're converting from set_page_dirty(), the OBVIOUS rename
> is to folio_set_dirty(), which is WRONG.

And I made the same mistake at least also in "mm/huge_memory: 
page_remove_rmap() -> folio_remove_rmap_pmd()".

I better double check all these so-simple-looking conversions that just 
went upstream.

Interestingly, __split_huge_pmd_locked() used SetPageReferenced() 
instead of

> 
> So we're in the part of the design space where the consistent naming and
> the-obvious-thing-to-do-is-wrong are in collision, and I do not have a
> good answer.
> 
> Maybe we can call the first function _folio_set_dirty(), and we don't
> have a folio_set_dirty() at all?  We don't have a folio_set_uptodate(),
> so there's some precedent there.

Good question. This mark vs. set is confusing. We want some way to 
highlight that folio_set_dirty() is the one that we usually do not want 
to use.
David Hildenbrand Jan. 22, 2024, 5:40 p.m. UTC | #7
On 22.01.24 18:34, David Hildenbrand wrote:
> On 22.01.24 18:20, Matthew Wilcox wrote:
>> On Mon, Jan 22, 2024 at 06:01:58PM +0100, David Hildenbrand wrote:
>>>> And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my
>>>> equivalent change, as part of the contpte series, I've swapped set_page_dirty()
>>>> for folio_mark_dirty().
>>>
>>> Good catch, that should be folio_mark_dirty(). Let me send a fixup.
>>>
>>> (the difference in naming for both functions really is bad)
>>
>> It really is, and I don't know what to do about it.
>>
>> We need a function that literally just sets the flag.  For every other
>> flag, that's folio_set_FLAG.  We can't use __folio_set_flag because that
>> means "set the flag non-atomically".
>>
>> We need a function that does all of the work involved with tracking
>> dirty folios.  I chose folio_mark_dirty() to align with
>> folio_mark_uptodate() (ie mark is not just 'set" but also "do some extra
>> work").
>>
>> But because we're converting from set_page_dirty(), the OBVIOUS rename
>> is to folio_set_dirty(), which is WRONG.
> 
> And I made the same mistake at least also in "mm/huge_memory:
> page_remove_rmap() -> folio_remove_rmap_pmd()".
> 
> I better double check all these so-simple-looking conversions that just
> went upstream.
> 
> Interestingly, __split_huge_pmd_locked() used SetPageReferenced()
> instead of

Forgot to delete that sentence.

Anyhow, it's all confusing. My replacement in 91b2978a34807 from 
SetPageDirty -> folio_set_dirty() was correct. It only operates on anon 
folios, likely that's why folio_set_dirty() is okay there.

Oh my.
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 6552ea27b0bfa..eda2181275d9b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1434,6 +1434,7 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	arch_enter_lazy_mmu_mode();
 	do {
 		pte_t ptent = ptep_get(pte);
+		struct folio *folio;
 		struct page *page;
 
 		if (pte_none(ptent))
@@ -1459,21 +1460,22 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				continue;
 			}
 
+			folio = page_folio(page);
 			delay_rmap = 0;
-			if (!PageAnon(page)) {
+			if (!folio_test_anon(folio)) {
 				if (pte_dirty(ptent)) {
-					set_page_dirty(page);
+					folio_set_dirty(folio);
 					if (tlb_delay_rmap(tlb)) {
 						delay_rmap = 1;
 						force_flush = 1;
 					}
 				}
 				if (pte_young(ptent) && likely(vma_has_recency(vma)))
-					mark_page_accessed(page);
+					folio_mark_accessed(folio);
 			}
 			rss[mm_counter(page)]--;
 			if (!delay_rmap) {
-				page_remove_rmap(page, vma, false);
+				folio_remove_rmap_pte(folio, page, vma);
 				if (unlikely(page_mapcount(page) < 0))
 					print_bad_pte(vma, addr, ptent, page);
 			}
@@ -1489,6 +1491,7 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		if (is_device_private_entry(entry) ||
 		    is_device_exclusive_entry(entry)) {
 			page = pfn_swap_entry_to_page(entry);
+			folio = page_folio(page);
 			if (unlikely(!should_zap_page(details, page)))
 				continue;
 			/*
@@ -1500,8 +1503,8 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			WARN_ON_ONCE(!vma_is_anonymous(vma));
 			rss[mm_counter(page)]--;
 			if (is_device_private_entry(entry))
-				page_remove_rmap(page, vma, false);
-			put_page(page);
+				folio_remove_rmap_pte(folio, page, vma);
+			folio_put(folio);
 		} else if (!non_swap_entry(entry)) {
 			/* Genuine swap entry, hence a private anon page */
 			if (!should_zap_cows(details))
@@ -3220,10 +3223,10 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 			 * threads.
 			 *
 			 * The critical issue is to order this
-			 * page_remove_rmap with the ptp_clear_flush above.
-			 * Those stores are ordered by (if nothing else,)
+			 * folio_remove_rmap_pte() with the ptp_clear_flush
+			 * above. Those stores are ordered by (if nothing else,)
 			 * the barrier present in the atomic_add_negative
-			 * in page_remove_rmap.
+			 * in folio_remove_rmap_pte();
 			 *
 			 * Then the TLB flush in ptep_clear_flush ensures that
 			 * no process can access the old page before the
@@ -3232,7 +3235,7 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 			 * mapcount is visible. So transitively, TLBs to
 			 * old page will be flushed before it can be reused.
 			 */
-			page_remove_rmap(vmf->page, vma, false);
+			folio_remove_rmap_pte(old_folio, vmf->page, vma);
 		}
 
 		/* Free the old page.. */
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 4f559f4ddd217..604ddf08affed 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -55,7 +55,7 @@  static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_
 
 		if (encoded_page_flags(enc)) {
 			struct page *page = encoded_page_ptr(enc);
-			page_remove_rmap(page, vma, false);
+			folio_remove_rmap_pte(page_folio(page), page, vma);
 		}
 	}
 }