diff mbox series

[rfc,4/4] mm: filemap: try to batch lruvec stat updating

Message ID 20240429072417.2146732-5-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: filemap: try to batch lruvec stat updating | expand

Commit Message

Kefeng Wang April 29, 2024, 7:24 a.m. UTC
The filemap_map_pages() tries to map few pages(eg, 16 pages), but the
lruvec stat updating is called on each mapping, since the updating is
time-consuming, especially with memcg, so try to batch it when the memcg
and pgdat are same during the mapping, if luckily, we could save most of
time of lruvec stat updating, the lat_pagefault shows 3~4% improvement.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/filemap.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

Comments

Kefeng Wang May 7, 2024, 9:06 a.m. UTC | #1
+ memcg maintainers and David too, please check all patches from link

https://lore.kernel.org/linux-mm/20240429072417.2146732-1-wangkefeng.wang@huawei.com/

Thanks

On 2024/4/29 15:24, Kefeng Wang wrote:
> The filemap_map_pages() tries to map few pages(eg, 16 pages), but the
> lruvec stat updating is called on each mapping, since the updating is
> time-consuming, especially with memcg, so try to batch it when the memcg
> and pgdat are same during the mapping, if luckily, we could save most of
> time of lruvec stat updating, the lat_pagefault shows 3~4% improvement.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/filemap.c | 33 ++++++++++++++++++++++++++++++---
>   1 file changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 3966b6616d02..b27281707098 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -3615,6 +3615,20 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
>   	return ret;
>   }
>   
> +static void filemap_lruvec_stat_update(struct mem_cgroup *memcg,
> +				       pg_data_t *pgdat, int nr)
> +{
> +	struct lruvec *lruvec;
> +
> +	if (!memcg) {
> +		__mod_node_page_state(pgdat, NR_FILE_MAPPED, nr);
> +		return;
> +	}
> +
> +	lruvec = mem_cgroup_lruvec(memcg, pgdat);
> +	__mod_lruvec_state(lruvec, NR_FILE_MAPPED, nr);
> +}
> +
>   vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>   			     pgoff_t start_pgoff, pgoff_t end_pgoff)
>   {
> @@ -3628,6 +3642,9 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>   	vm_fault_t ret = 0;
>   	unsigned long rss = 0;
>   	unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type;
> +	struct mem_cgroup *memcg, *memcg_cur;
> +	pg_data_t *pgdat, *pgdat_cur;
> +	int nr_mapped = 0;
>   
>   	rcu_read_lock();
>   	folio = next_uptodate_folio(&xas, mapping, end_pgoff);
> @@ -3648,9 +3665,20 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>   	}
>   
>   	folio_type = mm_counter_file(folio);
> +	memcg = folio_memcg(folio);
> +	pgdat = folio_pgdat(folio);
>   	do {
>   		unsigned long end;
> -		int nr_mapped = 0;
> +
> +		memcg_cur = folio_memcg(folio);
> +		pgdat_cur = folio_pgdat(folio);
> +
> +		if (unlikely(memcg != memcg_cur || pgdat != pgdat_cur)) {
> +			filemap_lruvec_stat_update(memcg, pgdat, nr_mapped);
> +			nr_mapped = 0;
> +			memcg = memcg_cur;
> +			pgdat = pgdat_cur;
> +		}
>   
>   		addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
>   		vmf->pte += xas.xa_index - last_pgoff;
> @@ -3668,11 +3696,10 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>   					nr_pages, &rss, &nr_mapped,
>   					&mmap_miss);
>   
> -		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr_mapped);
> -
>   		folio_unlock(folio);
>   		folio_put(folio);
>   	} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
> +	filemap_lruvec_stat_update(memcg, pgdat, nr_mapped);
>   	add_mm_counter(vma->vm_mm, folio_type, rss);
>   	pte_unmap_unlock(vmf->pte, vmf->ptl);
>   out:
Johannes Weiner May 9, 2024, 2:01 p.m. UTC | #2
On Tue, May 07, 2024 at 05:06:57PM +0800, Kefeng Wang wrote:
> > +static void filemap_lruvec_stat_update(struct mem_cgroup *memcg,
> > +				       pg_data_t *pgdat, int nr)
> > +{
> > +	struct lruvec *lruvec;
> > +
> > +	if (!memcg) {
> > +		__mod_node_page_state(pgdat, NR_FILE_MAPPED, nr);
> > +		return;
> > +	}
> > +
> > +	lruvec = mem_cgroup_lruvec(memcg, pgdat);
> > +	__mod_lruvec_state(lruvec, NR_FILE_MAPPED, nr);
> > +}
> > +
> >   vm_fault_t filemap_map_pages(struct vm_fault *vmf,
> >   			     pgoff_t start_pgoff, pgoff_t end_pgoff)
> >   {
> > @@ -3628,6 +3642,9 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
> >   	vm_fault_t ret = 0;
> >   	unsigned long rss = 0;
> >   	unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type;
> > +	struct mem_cgroup *memcg, *memcg_cur;
> > +	pg_data_t *pgdat, *pgdat_cur;
> > +	int nr_mapped = 0;
> >   
> >   	rcu_read_lock();
> >   	folio = next_uptodate_folio(&xas, mapping, end_pgoff);
> > @@ -3648,9 +3665,20 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
> >   	}
> >   
> >   	folio_type = mm_counter_file(folio);
> > +	memcg = folio_memcg(folio);
> > +	pgdat = folio_pgdat(folio);

You should be able to do:

	lruvec = folio_lruvec(folio);

and then pass that directly to filemap_lruvec_stat_update().
Kefeng Wang May 10, 2024, 1:55 a.m. UTC | #3
On 2024/5/9 22:01, Johannes Weiner wrote:
> On Tue, May 07, 2024 at 05:06:57PM +0800, Kefeng Wang wrote:
>>> +static void filemap_lruvec_stat_update(struct mem_cgroup *memcg,
>>> +				       pg_data_t *pgdat, int nr)
>>> +{
>>> +	struct lruvec *lruvec;
>>> +
>>> +	if (!memcg) {
>>> +		__mod_node_page_state(pgdat, NR_FILE_MAPPED, nr);
>>> +		return;
>>> +	}
>>> +
>>> +	lruvec = mem_cgroup_lruvec(memcg, pgdat);
>>> +	__mod_lruvec_state(lruvec, NR_FILE_MAPPED, nr);
>>> +}
>>> +
>>>    vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>>>    			     pgoff_t start_pgoff, pgoff_t end_pgoff)
>>>    {
>>> @@ -3628,6 +3642,9 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>>>    	vm_fault_t ret = 0;
>>>    	unsigned long rss = 0;
>>>    	unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type;
>>> +	struct mem_cgroup *memcg, *memcg_cur;
>>> +	pg_data_t *pgdat, *pgdat_cur;
>>> +	int nr_mapped = 0;
>>>    
>>>    	rcu_read_lock();
>>>    	folio = next_uptodate_folio(&xas, mapping, end_pgoff);
>>> @@ -3648,9 +3665,20 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
>>>    	}
>>>    
>>>    	folio_type = mm_counter_file(folio);
>>> +	memcg = folio_memcg(folio);
>>> +	pgdat = folio_pgdat(folio);
> 
> You should be able to do:
> 
> 	lruvec = folio_lruvec(folio);
> 
> and then pass that directly to filemap_lruvec_stat_update().

It's obviously better, will update and address David's comment in 
patch3, thank you.
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 3966b6616d02..b27281707098 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3615,6 +3615,20 @@  static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
 	return ret;
 }
 
+static void filemap_lruvec_stat_update(struct mem_cgroup *memcg,
+				       pg_data_t *pgdat, int nr)
+{
+	struct lruvec *lruvec;
+
+	if (!memcg) {
+		__mod_node_page_state(pgdat, NR_FILE_MAPPED, nr);
+		return;
+	}
+
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	__mod_lruvec_state(lruvec, NR_FILE_MAPPED, nr);
+}
+
 vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 			     pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
@@ -3628,6 +3642,9 @@  vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	vm_fault_t ret = 0;
 	unsigned long rss = 0;
 	unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type;
+	struct mem_cgroup *memcg, *memcg_cur;
+	pg_data_t *pgdat, *pgdat_cur;
+	int nr_mapped = 0;
 
 	rcu_read_lock();
 	folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3648,9 +3665,20 @@  vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	}
 
 	folio_type = mm_counter_file(folio);
+	memcg = folio_memcg(folio);
+	pgdat = folio_pgdat(folio);
 	do {
 		unsigned long end;
-		int nr_mapped = 0;
+
+		memcg_cur = folio_memcg(folio);
+		pgdat_cur = folio_pgdat(folio);
+
+		if (unlikely(memcg != memcg_cur || pgdat != pgdat_cur)) {
+			filemap_lruvec_stat_update(memcg, pgdat, nr_mapped);
+			nr_mapped = 0;
+			memcg = memcg_cur;
+			pgdat = pgdat_cur;
+		}
 
 		addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
 		vmf->pte += xas.xa_index - last_pgoff;
@@ -3668,11 +3696,10 @@  vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 					nr_pages, &rss, &nr_mapped,
 					&mmap_miss);
 
-		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr_mapped);
-
 		folio_unlock(folio);
 		folio_put(folio);
 	} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
+	filemap_lruvec_stat_update(memcg, pgdat, nr_mapped);
 	add_mm_counter(vma->vm_mm, folio_type, rss);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out: