diff mbox series

[09/10] mm/ksm: calc_checksum for folio

Message ID 20240604042454.2012091-10-alexs@kernel.org (mailing list archive)
State New
Headers show
Series use folio in ksm | expand

Commit Message

alexs@kernel.org June 4, 2024, 4:24 a.m. UTC
From: "Alex Shi (tencent)" <alexs@kernel.org>

Let's check the whole folio for contents change, don't count it if the
folio changed.

Signed-off-by: Alex Shi (tencent) <alexs@kernel.org>
---
 mm/ksm.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Comments

David Hildenbrand June 4, 2024, 1:18 p.m. UTC | #1
On 04.06.24 06:24, alexs@kernel.org wrote:
> From: "Alex Shi (tencent)" <alexs@kernel.org>
> 
> Let's check the whole folio for contents change, don't count it if the
> folio changed.
> 
> Signed-off-by: Alex Shi (tencent) <alexs@kernel.org>
> ---
>   mm/ksm.c | 14 ++++++++------
>   1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/ksm.c b/mm/ksm.c
> index b9c04ce677b9..dc2b5e6a9659 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -1258,11 +1258,13 @@ static int unmerge_and_remove_all_rmap_items(void)
>   }
>   #endif /* CONFIG_SYSFS */
>   
> -static u32 calc_checksum(struct page *page)
> +static u32 calc_checksum(struct folio *folio)
>   {
>   	u32 checksum;
> -	void *addr = kmap_local_page(page);
> -	checksum = xxhash(addr, PAGE_SIZE, 0);
> +	int nr = folio_nr_pages(folio);
> +	void *addr = kmap_local_page(folio_page(folio, 0));
> +
> +	checksum = xxhash(addr, nr * PAGE_SIZE, 0);
>   	kunmap_local(addr);
>   	return checksum;
>   }
> @@ -2369,7 +2371,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
>   	 * don't want to insert it in the unstable tree, and we don't want
>   	 * to waste our time searching for something identical to it there.
>   	 */
> -	checksum = calc_checksum(page);
> +	checksum = calc_checksum(folio);

So for a large folio you suddenly checksum more than a single page? 
That's wrong.

Or am I missing something?
Alex Shi June 5, 2024, 3:44 a.m. UTC | #2
On 6/4/24 9:18 PM, David Hildenbrand wrote:
>> @@ -2369,7 +2371,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
>>        * don't want to insert it in the unstable tree, and we don't want
>>        * to waste our time searching for something identical to it there.
>>        */
>> -    checksum = calc_checksum(page);
>> +    checksum = calc_checksum(folio);
> 
> So for a large folio you suddenly checksum more than a single page? That's wrong.
> 
> Or am I missing something?

I am not sure if this change are good too, anyway, comparing the whole folio may have it advantages on efficiency, but more splitting do save more pages.

Anyway, this change could be dropped.

Thanks!
David Hildenbrand June 5, 2024, 7:53 a.m. UTC | #3
On 05.06.24 05:44, Alex Shi wrote:
> 
> 
> On 6/4/24 9:18 PM, David Hildenbrand wrote:
>>> @@ -2369,7 +2371,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
>>>         * don't want to insert it in the unstable tree, and we don't want
>>>         * to waste our time searching for something identical to it there.
>>>         */
>>> -    checksum = calc_checksum(page);
>>> +    checksum = calc_checksum(folio);
>>
>> So for a large folio you suddenly checksum more than a single page? That's wrong.
>>
>> Or am I missing something?
> 
> I am not sure if this change are good too, anyway, comparing the whole folio may have it advantages on efficiency, but more splitting do save more pages.

Calculating the checksum of something that could be a large folio when 
you might want to deduplicate subpages of the folio (and decide if you 
might want to split it) sound very wrong.

Please pay more attention to the details how the current code works and 
how it all works with tail pages.
diff mbox series

Patch

diff --git a/mm/ksm.c b/mm/ksm.c
index b9c04ce677b9..dc2b5e6a9659 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1258,11 +1258,13 @@  static int unmerge_and_remove_all_rmap_items(void)
 }
 #endif /* CONFIG_SYSFS */
 
-static u32 calc_checksum(struct page *page)
+static u32 calc_checksum(struct folio *folio)
 {
 	u32 checksum;
-	void *addr = kmap_local_page(page);
-	checksum = xxhash(addr, PAGE_SIZE, 0);
+	int nr = folio_nr_pages(folio);
+	void *addr = kmap_local_page(folio_page(folio, 0));
+
+	checksum = xxhash(addr, nr * PAGE_SIZE, 0);
 	kunmap_local(addr);
 	return checksum;
 }
@@ -2369,7 +2371,7 @@  static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
 	 * don't want to insert it in the unstable tree, and we don't want
 	 * to waste our time searching for something identical to it there.
 	 */
-	checksum = calc_checksum(page);
+	checksum = calc_checksum(folio);
 	if (rmap_item->oldchecksum != checksum) {
 		rmap_item->oldchecksum = checksum;
 		return;
@@ -2385,7 +2387,7 @@  static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
 		mmap_read_lock(mm);
 		vma = find_mergeable_vma(mm, rmap_item->address);
 		if (vma) {
-			err = try_to_merge_one_page(vma, page_folio(page), rmap_item,
+			err = try_to_merge_one_page(vma, folio, rmap_item,
 						    page_folio(ZERO_PAGE(rmap_item->address)));
 			trace_ksm_merge_one_page(
 				page_to_pfn(ZERO_PAGE(rmap_item->address)),
@@ -3916,7 +3918,7 @@  static int __init ksm_init(void)
 	int err;
 
 	/* The correct value depends on page size and endianness */
-	zero_checksum = calc_checksum(ZERO_PAGE(0));
+	zero_checksum = calc_checksum(page_folio(ZERO_PAGE(0)));
 	/* Default to false for backwards compatibility */
 	ksm_use_zero_pages = false;