Message ID | 1474992504-20133-17-git-send-email-jack@suse.cz (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On Tue, Sep 27, 2016 at 06:08:20PM +0200, Jan Kara wrote: > Provide a helper function for finishing write faults due to PTE being > read-only. The helper will be used by DAX to avoid the need of > complicating generic MM code with DAX locking specifics. > > Signed-off-by: Jan Kara <jack@suse.cz> > --- > include/linux/mm.h | 1 + > mm/memory.c | 65 +++++++++++++++++++++++++++++++----------------------- > 2 files changed, 39 insertions(+), 27 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 1055f2ece80d..e5a014be8932 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -617,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, > struct page *page); > int finish_fault(struct vm_fault *vmf); > +int finish_mkwrite_fault(struct vm_fault *vmf); > #endif > > /* > diff --git a/mm/memory.c b/mm/memory.c > index f49e736d6a36..8c8cb7f2133e 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2266,6 +2266,36 @@ oom: > return VM_FAULT_OOM; > } > > +/** > + * finish_mkrite_fault - finish page fault making PTE writeable once the page finish_mkwrite_fault > @@ -2315,26 +2335,17 @@ static int wp_page_shared(struct vm_fault *vmf) > put_page(vmf->page); > return tmp; > } > - /* > - * Since we dropped the lock we need to revalidate > - * the PTE as someone else may have changed it. If > - * they did, we just return, as we can count on the > - * MMU to tell us if they didn't also make it writable. > - */ > - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, > - vmf->address, &vmf->ptl); > - if (!pte_same(*vmf->pte, vmf->orig_pte)) { > + tmp = finish_mkwrite_fault(vmf); > + if (unlikely(!tmp || (tmp & > + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { The 'tmp' return from finish_mkwrite_fault() can only be 0 or VM_FAULT_WRITE. I think this test should just be if (unlikely(!tmp)) { With that and the small spelling fix: Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
On Tue 18-10-16 12:35:25, Ross Zwisler wrote: > On Tue, Sep 27, 2016 at 06:08:20PM +0200, Jan Kara wrote: > > Provide a helper function for finishing write faults due to PTE being > > read-only. The helper will be used by DAX to avoid the need of > > complicating generic MM code with DAX locking specifics. > > > > Signed-off-by: Jan Kara <jack@suse.cz> > > --- > > include/linux/mm.h | 1 + > > mm/memory.c | 65 +++++++++++++++++++++++++++++++----------------------- > > 2 files changed, 39 insertions(+), 27 deletions(-) > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 1055f2ece80d..e5a014be8932 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -617,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > > int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, > > struct page *page); > > int finish_fault(struct vm_fault *vmf); > > +int finish_mkwrite_fault(struct vm_fault *vmf); > > #endif > > > > /* > > diff --git a/mm/memory.c b/mm/memory.c > > index f49e736d6a36..8c8cb7f2133e 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -2266,6 +2266,36 @@ oom: > > return VM_FAULT_OOM; > > } > > > > +/** > > + * finish_mkrite_fault - finish page fault making PTE writeable once the page > finish_mkwrite_fault Fixed, thanks. > > @@ -2315,26 +2335,17 @@ static int wp_page_shared(struct vm_fault *vmf) > > put_page(vmf->page); > > return tmp; > > } > > - /* > > - * Since we dropped the lock we need to revalidate > > - * the PTE as someone else may have changed it. If > > - * they did, we just return, as we can count on the > > - * MMU to tell us if they didn't also make it writable. > > - */ > > - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, > > - vmf->address, &vmf->ptl); > > - if (!pte_same(*vmf->pte, vmf->orig_pte)) { > > + tmp = finish_mkwrite_fault(vmf); > > + if (unlikely(!tmp || (tmp & > > + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { > > The 'tmp' return from finish_mkwrite_fault() can only be 0 or VM_FAULT_WRITE. > I think this test should just be > > if (unlikely(!tmp)) { Right, finish_mkwrite_fault() cannot currently throw other errors than "retry needed" which is indicated by tmp == 0. However I'd prefer to keep symmetry with finish_fault() handler which can throw other errors and better be prepared to handle them from finish_mkwrite_fault() as well. Honza
On Wed, Oct 19, 2016 at 09:16:00AM +0200, Jan Kara wrote: > On Tue 18-10-16 12:35:25, Ross Zwisler wrote: > > On Tue, Sep 27, 2016 at 06:08:20PM +0200, Jan Kara wrote: > > > Provide a helper function for finishing write faults due to PTE being > > > read-only. The helper will be used by DAX to avoid the need of > > > complicating generic MM code with DAX locking specifics. > > > > > > Signed-off-by: Jan Kara <jack@suse.cz> > > > --- > > > include/linux/mm.h | 1 + > > > mm/memory.c | 65 +++++++++++++++++++++++++++++++----------------------- > > > 2 files changed, 39 insertions(+), 27 deletions(-) > > > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > > index 1055f2ece80d..e5a014be8932 100644 > > > --- a/include/linux/mm.h > > > +++ b/include/linux/mm.h > > > @@ -617,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > > > int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, > > > struct page *page); > > > int finish_fault(struct vm_fault *vmf); > > > +int finish_mkwrite_fault(struct vm_fault *vmf); > > > #endif > > > > > > /* > > > diff --git a/mm/memory.c b/mm/memory.c > > > index f49e736d6a36..8c8cb7f2133e 100644 > > > --- a/mm/memory.c > > > +++ b/mm/memory.c > > > @@ -2266,6 +2266,36 @@ oom: > > > return VM_FAULT_OOM; > > > } > > > > > > +/** > > > + * finish_mkrite_fault - finish page fault making PTE writeable once the page > > finish_mkwrite_fault > > Fixed, thanks. > > > > @@ -2315,26 +2335,17 @@ static int wp_page_shared(struct vm_fault *vmf) > > > put_page(vmf->page); > > > return tmp; > > > } > > > - /* > > > - * Since we dropped the lock we need to revalidate > > > - * the PTE as someone else may have changed it. If > > > - * they did, we just return, as we can count on the > > > - * MMU to tell us if they didn't also make it writable. > > > - */ > > > - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, > > > - vmf->address, &vmf->ptl); > > > - if (!pte_same(*vmf->pte, vmf->orig_pte)) { > > > + tmp = finish_mkwrite_fault(vmf); > > > + if (unlikely(!tmp || (tmp & > > > + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { > > > > The 'tmp' return from finish_mkwrite_fault() can only be 0 or VM_FAULT_WRITE. > > I think this test should just be > > > > if (unlikely(!tmp)) { > > Right, finish_mkwrite_fault() cannot currently throw other errors than > "retry needed" which is indicated by tmp == 0. However I'd prefer to keep > symmetry with finish_fault() handler which can throw other errors and > better be prepared to handle them from finish_mkwrite_fault() as well. Fair enough. You can add: Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
On Wed 19-10-16 11:21:52, Ross Zwisler wrote: > On Wed, Oct 19, 2016 at 09:16:00AM +0200, Jan Kara wrote: > > > > @@ -2315,26 +2335,17 @@ static int wp_page_shared(struct vm_fault *vmf) > > > > put_page(vmf->page); > > > > return tmp; > > > > } > > > > - /* > > > > - * Since we dropped the lock we need to revalidate > > > > - * the PTE as someone else may have changed it. If > > > > - * they did, we just return, as we can count on the > > > > - * MMU to tell us if they didn't also make it writable. > > > > - */ > > > > - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, > > > > - vmf->address, &vmf->ptl); > > > > - if (!pte_same(*vmf->pte, vmf->orig_pte)) { > > > > + tmp = finish_mkwrite_fault(vmf); > > > > + if (unlikely(!tmp || (tmp & > > > > + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { > > > > > > The 'tmp' return from finish_mkwrite_fault() can only be 0 or VM_FAULT_WRITE. > > > I think this test should just be > > > > > > if (unlikely(!tmp)) { > > > > Right, finish_mkwrite_fault() cannot currently throw other errors than > > "retry needed" which is indicated by tmp == 0. However I'd prefer to keep > > symmetry with finish_fault() handler which can throw other errors and > > better be prepared to handle them from finish_mkwrite_fault() as well. > > Fair enough. You can add: > > Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Thanks. Actually, your question made me have a harder look at return values from finish_mkwrite_fault() and I've added one more commit switching the return values so that finish_mkwrite_fault() returns 0 on success and VM_FAULT_NOPAGE if PTE changed. That is less confusing and even more consistent with what finish_fault() returns. Honza
diff --git a/include/linux/mm.h b/include/linux/mm.h index 1055f2ece80d..e5a014be8932 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -617,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page); int finish_fault(struct vm_fault *vmf); +int finish_mkwrite_fault(struct vm_fault *vmf); #endif /* diff --git a/mm/memory.c b/mm/memory.c index f49e736d6a36..8c8cb7f2133e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2266,6 +2266,36 @@ oom: return VM_FAULT_OOM; } +/** + * finish_mkrite_fault - finish page fault making PTE writeable once the page + * page is prepared + * + * @vmf: structure describing the fault + * + * This function handles all that is needed to finish a write page fault due + * to PTE being read-only once the mapped page is prepared. It handles locking + * of PTE and modifying it. The function returns VM_FAULT_WRITE on success, + * 0 when PTE got changed before we acquired PTE lock. + * + * The function expects the page to be locked or other protection against + * concurrent faults / writeback (such as DAX radix tree locks). + */ +int finish_mkwrite_fault(struct vm_fault *vmf) +{ + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); + /* + * We might have raced with another page fault while we released the + * pte_offset_map_lock. + */ + if (!pte_same(*vmf->pte, vmf->orig_pte)) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } + wp_page_reuse(vmf); + return VM_FAULT_WRITE; +} + /* * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping @@ -2282,16 +2312,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) ret = vma->vm_ops->pfn_mkwrite(vma, vmf); if (ret & VM_FAULT_ERROR) return ret; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - /* - * We might have raced with another page fault while we - * released the pte_offset_map_lock. - */ - if (!pte_same(*vmf->pte, vmf->orig_pte)) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; - } + return finish_mkwrite_fault(vmf); } wp_page_reuse(vmf); return VM_FAULT_WRITE; @@ -2301,7 +2322,6 @@ static int wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; - int page_mkwrite = 0; get_page(vmf->page); @@ -2315,26 +2335,17 @@ static int wp_page_shared(struct vm_fault *vmf) put_page(vmf->page); return tmp; } - /* - * Since we dropped the lock we need to revalidate - * the PTE as someone else may have changed it. If - * they did, we just return, as we can count on the - * MMU to tell us if they didn't also make it writable. - */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (!pte_same(*vmf->pte, vmf->orig_pte)) { + tmp = finish_mkwrite_fault(vmf); + if (unlikely(!tmp || (tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { unlock_page(vmf->page); - pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(vmf->page); - return 0; + return tmp; } - page_mkwrite = 1; - } - - wp_page_reuse(vmf); - if (!page_mkwrite) + } else { + wp_page_reuse(vmf); lock_page(vmf->page); + } fault_dirty_shared_page(vma, vmf->page); put_page(vmf->page);
Provide a helper function for finishing write faults due to PTE being read-only. The helper will be used by DAX to avoid the need of complicating generic MM code with DAX locking specifics. Signed-off-by: Jan Kara <jack@suse.cz> --- include/linux/mm.h | 1 + mm/memory.c | 65 +++++++++++++++++++++++++++++++----------------------- 2 files changed, 39 insertions(+), 27 deletions(-)