diff mbox series

[RFC,v9,14/27] mm: Handle Shadow Stack page fault

Message ID 20200205181935.3712-15-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show
Series Control-flow Enforcement: Shadow Stack | expand

Commit Message

Yu-cheng Yu Feb. 5, 2020, 6:19 p.m. UTC
When a task does fork(), its Shadow Stack (SHSTK) must be duplicated for
the child.  This patch implements a flow similar to copy-on-write of an
anonymous page, but for SHSTK.

A SHSTK PTE must be RO and Dirty.  This Dirty bit requirement is used to
effect the copying.  In copy_one_pte(), clear the Dirty bit from a SHSTK
PTE to cause a page fault upon the next SHSTK access.  At that time, fix
the PTE and copy/re-use the page.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
 arch/x86/mm/pgtable.c         | 15 +++++++++++++++
 include/asm-generic/pgtable.h | 17 +++++++++++++++++
 mm/memory.c                   |  7 ++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)

Comments

Kees Cook Feb. 25, 2020, 8:20 p.m. UTC | #1
On Wed, Feb 05, 2020 at 10:19:22AM -0800, Yu-cheng Yu wrote:
> When a task does fork(), its Shadow Stack (SHSTK) must be duplicated for
> the child.  This patch implements a flow similar to copy-on-write of an
> anonymous page, but for SHSTK.
> 
> A SHSTK PTE must be RO and Dirty.  This Dirty bit requirement is used to
> effect the copying.  In copy_one_pte(), clear the Dirty bit from a SHSTK
> PTE to cause a page fault upon the next SHSTK access.  At that time, fix
> the PTE and copy/re-use the page.

Just to confirm, during the fork, it's really not a SHSTK for a moment
(it's still RO, but not dirty). Can other racing threads muck this up,
or is this bit removed only on the copied side?

-Kees

> 
> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> ---
>  arch/x86/mm/pgtable.c         | 15 +++++++++++++++
>  include/asm-generic/pgtable.h | 17 +++++++++++++++++
>  mm/memory.c                   |  7 ++++++-
>  3 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index 7bd2c3a52297..2eb33794c08d 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -872,3 +872,18 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
>  
>  #endif /* CONFIG_X86_64 */
>  #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
> +
> +#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER
> +inline bool arch_copy_pte_mapping(vm_flags_t vm_flags)
> +{
> +	return (vm_flags & VM_SHSTK);
> +}
> +
> +inline pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma)
> +{
> +	if (vma->vm_flags & VM_SHSTK)
> +		return pte_mkdirty_shstk(pte);
> +	else
> +		return pte;
> +}
> +#endif /* CONFIG_X86_INTEL_SHADOW_STACK_USER */
> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
> index 798ea36a0549..9cb2f9ba5895 100644
> --- a/include/asm-generic/pgtable.h
> +++ b/include/asm-generic/pgtable.h
> @@ -1190,6 +1190,23 @@ static inline bool arch_has_pfn_modify_check(void)
>  }
>  #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
>  
> +#ifdef CONFIG_MMU
> +#ifndef CONFIG_ARCH_HAS_SHSTK
> +static inline bool arch_copy_pte_mapping(vm_flags_t vm_flags)
> +{
> +	return false;
> +}
> +
> +static inline pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma)
> +{
> +	return pte;
> +}
> +#else
> +bool arch_copy_pte_mapping(vm_flags_t vm_flags);
> +pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma);
> +#endif
> +#endif /* CONFIG_MMU */
> +
>  /*
>   * Architecture PAGE_KERNEL_* fallbacks
>   *
> diff --git a/mm/memory.c b/mm/memory.c
> index 45442d9a4f52..6daa28614327 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -772,7 +772,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	 * If it's a COW mapping, write protect it both
>  	 * in the parent and the child
>  	 */
> -	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> +	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
> +	    arch_copy_pte_mapping(vm_flags)) {
>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>  		pte = pte_wrprotect(pte);
>  	}
> @@ -2417,6 +2418,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  	entry = pte_mkyoung(vmf->orig_pte);
>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	entry = pte_set_vma_features(entry, vma);
>  	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>  		update_mmu_cache(vma, vmf->address, vmf->pte);
>  	pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2504,6 +2506,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  		entry = mk_pte(new_page, vma->vm_page_prot);
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +		entry = pte_set_vma_features(entry, vma);
>  		/*
>  		 * Clear the pte entry and flush it first, before updating the
>  		 * pte with the new entry. This will avoid a race condition
> @@ -3023,6 +3026,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	pte = mk_pte(page, vma->vm_page_prot);
>  	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
>  		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> +		pte = pte_set_vma_features(pte, vma);
>  		vmf->flags &= ~FAULT_FLAG_WRITE;
>  		ret |= VM_FAULT_WRITE;
>  		exclusive = RMAP_EXCLUSIVE;
> @@ -3165,6 +3169,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>  	entry = mk_pte(page, vma->vm_page_prot);
>  	if (vma->vm_flags & VM_WRITE)
>  		entry = pte_mkwrite(pte_mkdirty(entry));
> +	entry = pte_set_vma_features(entry, vma);
>  
>  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>  			&vmf->ptl);
> -- 
> 2.21.0
> 
>
Dave Hansen Feb. 27, 2020, 12:08 a.m. UTC | #2
> diff --git a/mm/memory.c b/mm/memory.c
> index 45442d9a4f52..6daa28614327 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -772,7 +772,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	 * If it's a COW mapping, write protect it both
>  	 * in the parent and the child
>  	 */
> -	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> +	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
> +	    arch_copy_pte_mapping(vm_flags)) {
>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>  		pte = pte_wrprotect(pte);
>  	}

You have to modify this because pte_write()==0 for shadow stack PTEs, right?

Aren't shadow stack ptes *logically* writable, even if they don't have
the write bit set?  What would happen if we made pte_write()==1 for them?

> @@ -2417,6 +2418,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  	entry = pte_mkyoung(vmf->orig_pte);
>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	entry = pte_set_vma_features(entry, vma);
>  	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>  		update_mmu_cache(vma, vmf->address, vmf->pte);
>  	pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2504,6 +2506,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  		entry = mk_pte(new_page, vma->vm_page_prot);
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +		entry = pte_set_vma_features(entry, vma);
>  		/*
>  		 * Clear the pte entry and flush it first, before updating the
>  		 * pte with the new entry. This will avoid a race condition
> @@ -3023,6 +3026,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	pte = mk_pte(page, vma->vm_page_prot);
>  	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
>  		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> +		pte = pte_set_vma_features(pte, vma);
>  		vmf->flags &= ~FAULT_FLAG_WRITE;
>  		ret |= VM_FAULT_WRITE;
>  		exclusive = RMAP_EXCLUSIVE;
> @@ -3165,6 +3169,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>  	entry = mk_pte(page, vma->vm_page_prot);
>  	if (vma->vm_flags & VM_WRITE)
>  		entry = pte_mkwrite(pte_mkdirty(entry));
> +	entry = pte_set_vma_features(entry, vma);
>  
>  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>  			&vmf->ptl);
> 

These seem wrong, or at best inconsistent with what's already done.

We don't need anything like pte_set_vma_features() today because we have
vma->vm_page_prot.  We could easily have done what you suggest here for
things like protection keys: ignore the pkey PTE bits until we create
the final PTE then shove them in there.

What are the bit patterns of the shadow stack bits that come out of
these sites?  Can they be represented in ->vm_page_prot?
Yu-cheng Yu March 5, 2020, 6:30 p.m. UTC | #3
On Tue, 2020-02-25 at 12:20 -0800, Kees Cook wrote:
> On Wed, Feb 05, 2020 at 10:19:22AM -0800, Yu-cheng Yu wrote:
> > When a task does fork(), its Shadow Stack (SHSTK) must be duplicated for
> > the child.  This patch implements a flow similar to copy-on-write of an
> > anonymous page, but for SHSTK.
> > 
> > A SHSTK PTE must be RO and Dirty.  This Dirty bit requirement is used to
> > effect the copying.  In copy_one_pte(), clear the Dirty bit from a SHSTK
> > PTE to cause a page fault upon the next SHSTK access.  At that time, fix
> > the PTE and copy/re-use the page.
> 
> Just to confirm, during the fork, it's really not a SHSTK for a moment
> (it's still RO, but not dirty). Can other racing threads muck this up,
> or is this bit removed only on the copied side?

In [RFC PATCH v9 12/27] x86/mm: Modify ptep_set_wrprotect and
pmdp_set_wrprotect for _PAGE_DIRTY_SW, _PAGE_DIRTY_HW is changed to
_PAGE_DIRTY_SW with cmpxchg.  That prevents racing.

The hw dirty bit is removed from the original copy first.  The next shadow
stack access to the page causes copying.  The copied page gets the hw dirty
bit again.

Yu-cheng
Yu-cheng Yu April 7, 2020, 6:14 p.m. UTC | #4
On Wed, 2020-02-26 at 16:08 -0800, Dave Hansen wrote:
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 45442d9a4f52..6daa28614327 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -772,7 +772,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> >  	 * If it's a COW mapping, write protect it both
> >  	 * in the parent and the child
> >  	 */
> > -	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> > +	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
> > +	    arch_copy_pte_mapping(vm_flags)) {
> >  		ptep_set_wrprotect(src_mm, addr, src_pte);
> >  		pte = pte_wrprotect(pte);
> >  	}
> 
> You have to modify this because pte_write()==0 for shadow stack PTEs, right?
> 
> Aren't shadow stack ptes *logically* writable, even if they don't have
> the write bit set?  What would happen if we made pte_write()==1 for them?

Here the vm_flags needs to have VM_MAYWRITE, and the PTE needs to have
_PAGE_WRITE.  A shadow stack does not have either.

To fix checking vm_flags, what about adding a "arch_is_cow_mappping()" to the
generic is_cow_mapping()?

For the PTE, the check actually tries to determine if the PTE is not already
being copy-on-write, which is:

	(!_PAGE_RW && !_PAGE_DIRTY_HW)

So what about making it pte_cow()?

	/*
	 * The PTE is in copy-on-write status.
	 */
	static inline int pte_cow(pte_t pte)
	{
		return !(pte_flags(pte) & (_PAGE_WRITE | _PAGE_DIRTY_HW));
	}
> 
> > @@ -2417,6 +2418,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
> >  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> >  	entry = pte_mkyoung(vmf->orig_pte);
> >  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> > +	entry = pte_set_vma_features(entry, vma);
> >  	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
> >  		update_mmu_cache(vma, vmf->address, vmf->pte);
> >  	pte_unmap_unlock(vmf->pte, vmf->ptl);
> > @@ -2504,6 +2506,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
> >  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> >  		entry = mk_pte(new_page, vma->vm_page_prot);
> >  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> > +		entry = pte_set_vma_features(entry, vma);
> >  		/*
> >  		 * Clear the pte entry and flush it first, before updating the
> >  		 * pte with the new entry. This will avoid a race condition
> > @@ -3023,6 +3026,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >  	pte = mk_pte(page, vma->vm_page_prot);
> >  	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
> >  		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> > +		pte = pte_set_vma_features(pte, vma);
> >  		vmf->flags &= ~FAULT_FLAG_WRITE;
> >  		ret |= VM_FAULT_WRITE;
> >  		exclusive = RMAP_EXCLUSIVE;
> > @@ -3165,6 +3169,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
> >  	entry = mk_pte(page, vma->vm_page_prot);
> >  	if (vma->vm_flags & VM_WRITE)
> >  		entry = pte_mkwrite(pte_mkdirty(entry));
> > +	entry = pte_set_vma_features(entry, vma);
> >  
> >  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
> >  			&vmf->ptl);
> > 
> 
> These seem wrong, or at best inconsistent with what's already done.
> 
> We don't need anything like pte_set_vma_features() today because we have
> vma->vm_page_prot.  We could easily have done what you suggest here for
> things like protection keys: ignore the pkey PTE bits until we create
> the final PTE then shove them in there.
> 
> What are the bit patterns of the shadow stack bits that come out of
> these sites?  Can they be represented in ->vm_page_prot?

Yes, we can put _PAGE_DIRTY_HW in vm_page_prot.  Also set the bit in
ptep_set_access_flags() for shadow stack PTEs.
Dave Hansen April 7, 2020, 10:21 p.m. UTC | #5
On 4/7/20 11:14 AM, Yu-cheng Yu wrote:
> On Wed, 2020-02-26 at 16:08 -0800, Dave Hansen wrote:
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 45442d9a4f52..6daa28614327 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -772,7 +772,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  	 * If it's a COW mapping, write protect it both
>>>  	 * in the parent and the child
>>>  	 */
>>> -	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>>> +	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
>>> +	    arch_copy_pte_mapping(vm_flags)) {
>>>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>>>  		pte = pte_wrprotect(pte);
>>>  	}
>>
>> You have to modify this because pte_write()==0 for shadow stack PTEs, right?
>>
>> Aren't shadow stack ptes *logically* writable, even if they don't have
>> the write bit set?  What would happen if we made pte_write()==1 for them?
> 
> Here the vm_flags needs to have VM_MAYWRITE, and the PTE needs to have
> _PAGE_WRITE.  A shadow stack does not have either.

I literally mean taking pte_write(), and doing something l

static inline int pte_write(pte_t pte)
{
	if (pte_present(pte) && pte_is_shadow_stack(pte))
		return 1;

        return pte_flags(pte) & _PAGE_RW;
}

Then if is_cow_mapping() returns true for shadow stack VMAs, the above
code doesn't need to change.

> To fix checking vm_flags, what about adding a "arch_is_cow_mappping()" to the
> generic is_cow_mapping()?

That makes good sense to me.

> For the PTE, the check actually tries to determine if the PTE is not already
> being copy-on-write, which is:
> 
> 	(!_PAGE_RW && !_PAGE_DIRTY_HW)
> 
> So what about making it pte_cow()?
> 
> 	/*
> 	 * The PTE is in copy-on-write status.
> 	 */
> 	static inline int pte_cow(pte_t pte)
> 	{
> 		return !(pte_flags(pte) & (_PAGE_WRITE | _PAGE_DIRTY_HW));
> 	}

... with appropriate comments that seems fine to me.
Yu-cheng Yu April 8, 2020, 6:18 p.m. UTC | #6
On Tue, 2020-04-07 at 15:21 -0700, Dave Hansen wrote:
> On 4/7/20 11:14 AM, Yu-cheng Yu wrote:
> > On Wed, 2020-02-26 at 16:08 -0800, Dave Hansen wrote:
> > > > diff --git a/mm/memory.c b/mm/memory.c
> > > > index 45442d9a4f52..6daa28614327 100644
> > > > --- a/mm/memory.c
> > > > +++ b/mm/memory.c
> > > > @@ -772,7 +772,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > > >  	 * If it's a COW mapping, write protect it both
> > > >  	 * in the parent and the child
> > > >  	 */
> > > > -	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> > > > +	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
> > > > +	    arch_copy_pte_mapping(vm_flags)) {
> > > >  		ptep_set_wrprotect(src_mm, addr, src_pte);
> > > >  		pte = pte_wrprotect(pte);
> > > >  	}
> > > 
> > > You have to modify this because pte_write()==0 for shadow stack PTEs, right?
> > > 
> > > Aren't shadow stack ptes *logically* writable, even if they don't have
> > > the write bit set?  What would happen if we made pte_write()==1 for them?
> > 
> > Here the vm_flags needs to have VM_MAYWRITE, and the PTE needs to have
> > _PAGE_WRITE.  A shadow stack does not have either.
> 
> I literally mean taking pte_write(), and doing something l
> 
> static inline int pte_write(pte_t pte)
> {
> 	if (pte_present(pte) && pte_is_shadow_stack(pte))
> 		return 1;
> 
>         return pte_flags(pte) & _PAGE_RW;
> }
> 
> Then if is_cow_mapping() returns true for shadow stack VMAs, the above
> code doesn't need to change.

One benefit of this change is can_follow_write_pte() does not need any changes. 
A shadow stack PTE not in copy-on-write status is pte_write().

However, there are places that use pte_write() to determine if the PTE can be
made _PAGE_RW.  One such case is in change_pte_range(), where

	preserve_write = prot_numa && pte_write(oldpte);

and later,

	if (preserve_write)
		ptent = pte_mk_savedwrite(ptent);

Currently, there are other checks and shadow stack PTEs won't become _PAGE_RW. 
I am wondering if this can be overlooked later when the code is modified.

Another potential issue is, because pte_write()==1, a shadow stack PTE is made a
write migration entry, and can later accidentally become _PAGE_RW.  I think the
page fault handler would catch that, but still call it out in case I miss
anything.

Yu-cheng
diff mbox series

Patch

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd2c3a52297..2eb33794c08d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -872,3 +872,18 @@  int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 
 #endif /* CONFIG_X86_64 */
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
+#ifdef CONFIG_X86_INTEL_SHADOW_STACK_USER
+inline bool arch_copy_pte_mapping(vm_flags_t vm_flags)
+{
+	return (vm_flags & VM_SHSTK);
+}
+
+inline pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHSTK)
+		return pte_mkdirty_shstk(pte);
+	else
+		return pte;
+}
+#endif /* CONFIG_X86_INTEL_SHADOW_STACK_USER */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 798ea36a0549..9cb2f9ba5895 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1190,6 +1190,23 @@  static inline bool arch_has_pfn_modify_check(void)
 }
 #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
 
+#ifdef CONFIG_MMU
+#ifndef CONFIG_ARCH_HAS_SHSTK
+static inline bool arch_copy_pte_mapping(vm_flags_t vm_flags)
+{
+	return false;
+}
+
+static inline pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma)
+{
+	return pte;
+}
+#else
+bool arch_copy_pte_mapping(vm_flags_t vm_flags);
+pte_t pte_set_vma_features(pte_t pte, struct vm_area_struct *vma);
+#endif
+#endif /* CONFIG_MMU */
+
 /*
  * Architecture PAGE_KERNEL_* fallbacks
  *
diff --git a/mm/memory.c b/mm/memory.c
index 45442d9a4f52..6daa28614327 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -772,7 +772,8 @@  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * If it's a COW mapping, write protect it both
 	 * in the parent and the child
 	 */
-	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
+	if ((is_cow_mapping(vm_flags) && pte_write(pte)) ||
+	    arch_copy_pte_mapping(vm_flags)) {
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -2417,6 +2418,7 @@  static inline void wp_page_reuse(struct vm_fault *vmf)
 	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 	entry = pte_mkyoung(vmf->orig_pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	entry = pte_set_vma_features(entry, vma);
 	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
 		update_mmu_cache(vma, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2504,6 +2506,7 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		entry = pte_set_vma_features(entry, vma);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
 		 * pte with the new entry. This will avoid a race condition
@@ -3023,6 +3026,7 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+		pte = pte_set_vma_features(pte, vma);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
 		exclusive = RMAP_EXCLUSIVE;
@@ -3165,6 +3169,7 @@  static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	entry = mk_pte(page, vma->vm_page_prot);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
+	entry = pte_set_vma_features(entry, vma);
 
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);