diff mbox series

[RFC,4/5] mm: add do_set_pte_entry()

Message ID 20230130125504.2509710-5-fengwei.yin@intel.com (mailing list archive)
State New
Headers show
Series folio based filemap_map_pages() | expand

Commit Message

Yin Fengwei Jan. 30, 2023, 12:55 p.m. UTC
do_set_pte_entry() does same as do_set_pte() but doesn't update
mm_counter or add file rmap. It allows batched mm_counter and
rmap operations later.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

Comments

Matthew Wilcox Jan. 30, 2023, 1:53 p.m. UTC | #1
On Mon, Jan 30, 2023 at 08:55:03PM +0800, Yin Fengwei wrote:
> +++ b/include/linux/mm.h
> @@ -1061,6 +1061,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>  
>  vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
>  void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
> +	 unsigned long addr);

indentation

> -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
> +	unsigned long addr)

ditto

>  {
>  	struct vm_area_struct *vma = vmf->vma;
>  	bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte);
> @@ -4276,6 +4277,16 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>  	if (unlikely(uffd_wp))
>  		entry = pte_mkuffd_wp(entry);
> +	set_pte_at(vma->vm_mm, addr, vmf->pte, entry);

I'm not sure this is safe.  As soon as you call set_pte_at(), the page
can be found by GUP.  If it is, and it doesn't have rmap set up, aren't
things going to go horribly wrong?
Yin Fengwei Jan. 31, 2023, 1:06 a.m. UTC | #2
On 1/30/2023 9:53 PM, Matthew Wilcox wrote:
> On Mon, Jan 30, 2023 at 08:55:03PM +0800, Yin Fengwei wrote:
>> +++ b/include/linux/mm.h
>> @@ -1061,6 +1061,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>>  
>>  vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
>>  void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
>> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
>> +	 unsigned long addr);
> 
> indentation
> 
>> -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
>> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
>> +	unsigned long addr)
> 
> ditto
> 
>>  {
>>  	struct vm_area_struct *vma = vmf->vma;
>>  	bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte);
>> @@ -4276,6 +4277,16 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>  	if (unlikely(uffd_wp))
>>  		entry = pte_mkuffd_wp(entry);
>> +	set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
> 
> I'm not sure this is safe.  As soon as you call set_pte_at(), the page
> can be found by GUP.  If it is, and it doesn't have rmap set up, aren't
> things going to go horribly wrong?
Thanks a lot for pointing this out. I was not aware of the connection of
the sequence here with GUP. Will take care of this in next version by
putting rmap set up before set_pte_at().

Regards
Yin, Fengwei

>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63c0e645f37c..f31426a40e05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1061,6 +1061,8 @@  static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 
 vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
 void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
+void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
+	 unsigned long addr);
 
 vm_fault_t finish_fault(struct vm_fault *vmf);
 vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
diff --git a/mm/memory.c b/mm/memory.c
index 61ccd2d7e6a6..d0c27e11fab4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4256,7 +4256,8 @@  vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 }
 #endif
 
-void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
+void do_set_pte_entry(struct vm_fault *vmf, struct page *page,
+	unsigned long addr)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte);
@@ -4276,6 +4277,16 @@  void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	if (unlikely(uffd_wp))
 		entry = pte_mkuffd_wp(entry);
+	set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
+}
+
+void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+	do_set_pte_entry(vmf, page, addr);
+
 	/* copy-on-write page */
 	if (write && !(vma->vm_flags & VM_SHARED)) {
 		inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
@@ -4285,7 +4296,6 @@  void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 		inc_mm_counter(vma->vm_mm, mm_counter_file(page));
 		page_add_file_rmap(page, vma, false);
 	}
-	set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
 }
 
 static bool vmf_pte_changed(struct vm_fault *vmf)