diff mbox series

filemap: add filemap_map_order0_folio() to handle order0 folio

Message ID 20230914134741.1937654-1-fengwei.yin@intel.com (mailing list archive)
State New
Headers show
Series filemap: add filemap_map_order0_folio() to handle order0 folio | expand

Commit Message

Yin, Fengwei Sept. 14, 2023, 1:47 p.m. UTC
Kernel test robot reported regressions for several benchmarks [1].
The regression are related with commit:
de74976eb65151a2f568e477fc2e0032df5b22b4 ("filemap: add filemap_map_folio_range()")

It turned out that function filemap_map_folio_range() brings these
regressions when handle folio with order0.

Add filemap_map_order0_folio() to handle order0 folio. The benefit
come from two perspectives:
  - the code size is smaller (around 126 bytes)
  - no loop

Testing showed the regressions reported by 0day [1] all are fixed:
commit 9f1f5b60e76d44fa: parent commit of de74976eb65151a2
commit fbdf9263a3d7fdbd: latest mm-unstable commit
commit 7fbfe2003f84686d: this fixing patch

9f1f5b60e76d44fa fbdf9263a3d7fdbd            7fbfe2003f84686d
---------------- --------------------------- ---------------------------
   3843810           -21.4%    3020268            +4.6%    4018708      stress-ng.bad-altstack.ops
     64061           -21.4%      50336            +4.6%      66977      stress-ng.bad-altstack.ops_per_sec

   1709026           -14.4%    1462102            +2.4%    1750757      stress-ng.fork.ops
     28483           -14.4%      24368            +2.4%      29179      stress-ng.fork.ops_per_sec

   3685088           -53.6%    1710976            +0.5%    3702454      stress-ng.zombie.ops
     56732           -65.3%      19667            +0.7%      57107      stress-ng.zombie.ops_per_sec

     61874           -12.1%      54416            +0.4%      62136      vm-scalability.median
  13527663           -11.7%   11942117            -0.1%   13513946      vm-scalability.throughput
 4.066e+09           -11.7%   3.59e+09            -0.1%  4.061e+09      vm-scalability.workload

[1]:
https://lore.kernel.org/oe-lkp/72e017b9-deb6-44fa-91d6-716ee2c39cbc@intel.com/T/#m7d2bba30f75a9cee8eab07e5809abd9b3b206c84

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202309111556.b2aa3d7a-oliver.sang@intel.com
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 mm/filemap.c | 69 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 21 deletions(-)
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 8962d1255905..e712c99717de 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3461,13 +3461,11 @@  static struct folio *next_uptodate_folio(struct xa_state *xas,
  */
 static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 			struct folio *folio, unsigned long start,
-			unsigned long addr, unsigned int nr_pages)
+			unsigned long addr, unsigned int nr_pages,
+			unsigned int *mmap_miss)
 {
 	vm_fault_t ret = 0;
-	struct vm_area_struct *vma = vmf->vma;
-	struct file *file = vma->vm_file;
 	struct page *page = folio_page(folio, start);
-	unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
 	unsigned int count = 0;
 	pte_t *old_ptep = vmf->pte;
 
@@ -3475,8 +3473,7 @@  static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 		if (PageHWPoison(page + count))
 			goto skip;
 
-		if (mmap_miss > 0)
-			mmap_miss--;
+		(*mmap_miss)++;
 
 		/*
 		 * NOTE: If there're PTE markers, we'll leave them to be
@@ -3511,7 +3508,35 @@  static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
 	}
 
 	vmf->pte = old_ptep;
-	WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
+
+	return ret;
+}
+
+static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
+		struct folio *folio, unsigned long addr,
+		unsigned int *mmap_miss)
+{
+	vm_fault_t ret = 0;
+	struct page *page = &folio->page;
+
+	if (PageHWPoison(page))
+		return ret;
+
+	(*mmap_miss)++;
+
+	/*
+	 * NOTE: If there're PTE markers, we'll leave them to be
+	 * handled in the specific fault path, and it'll prohibit
+	 * the fault-around logic.
+	 */
+	if (!pte_none(ptep_get(vmf->pte)))
+		return ret;
+
+	if (vmf->address == addr)
+		ret = VM_FAULT_NOPAGE;
+
+	set_pte_range(vmf, folio, page, 1, addr);
+	folio_ref_inc(folio);
 
 	return ret;
 }
@@ -3527,7 +3552,7 @@  vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	XA_STATE(xas, &mapping->i_pages, start_pgoff);
 	struct folio *folio;
 	vm_fault_t ret = 0;
-	int nr_pages = 0;
+	unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;
 
 	rcu_read_lock();
 	folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3555,25 +3580,27 @@  vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		end = folio->index + folio_nr_pages(folio) - 1;
 		nr_pages = min(end, end_pgoff) - xas.xa_index + 1;
 
-		/*
-		 * NOTE: If there're PTE markers, we'll leave them to be
-		 * handled in the specific fault path, and it'll prohibit the
-		 * fault-around logic.
-		 */
-		if (!pte_none(ptep_get(vmf->pte)))
-			goto unlock;
-
-		ret |= filemap_map_folio_range(vmf, folio,
-				xas.xa_index - folio->index, addr, nr_pages);
+		if (!folio_test_large(folio))
+			ret |= filemap_map_order0_folio(vmf,
+					folio, addr, &mmap_miss);
+		else
+			ret |= filemap_map_folio_range(vmf, folio,
+					xas.xa_index - folio->index, addr,
+					nr_pages, &mmap_miss);
 
-unlock:
 		folio_unlock(folio);
 		folio_put(folio);
-		folio = next_uptodate_folio(&xas, mapping, end_pgoff);
-	} while (folio);
+	} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
 	rcu_read_unlock();
+
+	mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss);
+	if (mmap_miss >= mmap_miss_saved)
+		WRITE_ONCE(file->f_ra.mmap_miss, 0);
+	else
+		WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss);
+
 	return ret;
 }
 EXPORT_SYMBOL(filemap_map_pages);