diff mbox series

[RFC,v4,4/4] filemap: batched update mm counter,rmap when map file folio

Message ID 20230206140639.538867-5-fengwei.yin@intel.com (mailing list archive)
State New
Headers show
Series folio based filemap_map_pages() | expand

Commit Message

Yin, Fengwei Feb. 6, 2023, 2:06 p.m. UTC
Use do_set_pte_range() in filemap_map_folio_range(). Which
batched updates mm counter, rmap for large folio.

With a will-it-scale.page_fault3 like app (change file write
fault testing to read fault testing. Trying to upstream it to
will-it-scale at [1]) got 15% performance gain on a 48C/96T
Cascade Lake test box with 96 processes running against xfs.

Perf data collected before/after the change:
  18.73%--page_add_file_rmap
          |
           --11.60%--__mod_lruvec_page_state
                     |
                     |--7.40%--__mod_memcg_lruvec_state
                     |          |
                     |           --5.58%--cgroup_rstat_updated
                     |
                      --2.53%--__mod_lruvec_state
                                |
                                 --1.48%--__mod_node_page_state

  9.93%--page_add_file_rmap_range
         |
          --2.67%--__mod_lruvec_page_state
                    |
                    |--1.95%--__mod_memcg_lruvec_state
                    |          |
                    |           --1.57%--cgroup_rstat_updated
                    |
                     --0.61%--__mod_lruvec_state
                               |
                                --0.54%--__mod_node_page_state

The running time of __mode_lruvec_page_state() is reduced about 9%.

[1]: https://github.com/antonblanchard/will-it-scale/pull/37

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 mm/filemap.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

Comments

Matthew Wilcox (Oracle) Feb. 6, 2023, 2:34 p.m. UTC | #1
On Mon, Feb 06, 2023 at 10:06:39PM +0800, Yin Fengwei wrote:
> @@ -3354,11 +3354,12 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
>  	struct file *file = vma->vm_file;
>  	struct page *page = folio_page(folio, start);
>  	unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
> -	unsigned int ref_count = 0, count = 0;
> +	unsigned int mapped = 0;
> +	pte_t *pte = vmf->pte;
>  
>  	do {
>  		if (PageHWPoison(page))
> -			continue;
> +			goto map;
>  
>  		if (mmap_miss > 0)
>  			mmap_miss--;
> @@ -3368,20 +3369,34 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
>  		 * handled in the specific fault path, and it'll prohibit the
>  		 * fault-around logic.
>  		 */
> -		if (!pte_none(*vmf->pte))
> -			continue;
> +		if (!pte_none(pte[mapped]))
> +			goto map;

I see what you're trying to do here, but do_set_pte_range() uses the
pte from vmf->pte.  Perhaps best to save it at the beginning of the
function and restore it at the end.  ie:

	pte_t *old_ptep = vmf->pte;

	} while (vmf->pte++);

	vmf->pte = old_ptep;

The only other thing that bugs me about this patch is the use of
'mapped' as the variable name.  It's in the past tense, not the future
tense, so it gives me the wrong impression about what it's counting.
While we could use 'tomap', that's kind of clunky.  'count' or even just
'i' would be better.
Yin, Fengwei Feb. 6, 2023, 3:03 p.m. UTC | #2
On 2/6/2023 10:34 PM, Matthew Wilcox wrote:
> On Mon, Feb 06, 2023 at 10:06:39PM +0800, Yin Fengwei wrote:
>> @@ -3354,11 +3354,12 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
>>  	struct file *file = vma->vm_file;
>>  	struct page *page = folio_page(folio, start);
>>  	unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
>> -	unsigned int ref_count = 0, count = 0;
>> +	unsigned int mapped = 0;
>> +	pte_t *pte = vmf->pte;
>>  
>>  	do {
>>  		if (PageHWPoison(page))
>> -			continue;
>> +			goto map;
>>  
>>  		if (mmap_miss > 0)
>>  			mmap_miss--;
>> @@ -3368,20 +3369,34 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
>>  		 * handled in the specific fault path, and it'll prohibit the
>>  		 * fault-around logic.
>>  		 */
>> -		if (!pte_none(*vmf->pte))
>> -			continue;
>> +		if (!pte_none(pte[mapped]))
>> +			goto map;
> 
> I see what you're trying to do here, but do_set_pte_range() uses the
> pte from vmf->pte.  Perhaps best to save it at the beginning of the
> function and restore it at the end.  ie:
> 
> 	pte_t *old_ptep = vmf->pte;
> 
> 	} while (vmf->pte++);
> 
> 	vmf->pte = old_ptep;
Yes. This also works.

> 
> The only other thing that bugs me about this patch is the use of
> 'mapped' as the variable name.  It's in the past tense, not the future
> tense, so it gives me the wrong impression about what it's counting.
> While we could use 'tomap', that's kind of clunky.  'count' or even just
> 'i' would be better.
OK. Will use count in next version.


Regards
Yin, Fengwei
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 6f110b9e5d27..4452361e8858 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3354,11 +3354,12 @@  static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 	struct file *file = vma->vm_file;
 	struct page *page = folio_page(folio, start);
 	unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
-	unsigned int ref_count = 0, count = 0;
+	unsigned int mapped = 0;
+	pte_t *pte = vmf->pte;
 
 	do {
 		if (PageHWPoison(page))
-			continue;
+			goto map;
 
 		if (mmap_miss > 0)
 			mmap_miss--;
@@ -3368,20 +3369,34 @@  static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 		 * handled in the specific fault path, and it'll prohibit the
 		 * fault-around logic.
 		 */
-		if (!pte_none(*vmf->pte))
-			continue;
+		if (!pte_none(pte[mapped]))
+			goto map;
 
 		if (vmf->address == addr)
 			ret = VM_FAULT_NOPAGE;
 
-		ref_count++;
-		do_set_pte(vmf, page, addr);
-	} while (vmf->pte++, page++, addr += PAGE_SIZE, ++count < nr_pages);
+		mapped++;
+		continue;
 
-	/* Restore the vmf->pte */
-	vmf->pte -= nr_pages;
+map:
+		if (mapped) {
+			do_set_pte_range(vmf, folio, addr, pte, start, mapped);
+			folio_ref_add(folio, mapped);
+		}
+
+		/* advance 1 to jump over the HWPoison or !pte_none entry */
+		mapped++;
+		start += mapped;
+		pte += mapped;
+		addr += mapped * PAGE_SIZE;
+		mapped = 0;
+	} while (page++, --nr_pages > 0);
+
+	if (mapped) {
+		do_set_pte_range(vmf, folio, addr, pte, start, mapped);
+		folio_ref_add(folio, mapped);
+	}
 
-	folio_ref_add(folio, ref_count);
 	WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
 
 	return ret;