diff mbox

[v5,4/9] arm64: hugetlb: Add break-before-make logic for contiguous entries

Message ID 20170802094904.27749-5-punit.agrawal@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Punit Agrawal Aug. 2, 2017, 9:48 a.m. UTC
From: Steve Capper <steve.capper@arm.com>

It has become apparent that one has to take special care when modifying
attributes of memory mappings that employ the contiguous bit.

Both the requirement and the architecturally correct "Break-Before-Make"
technique of updating contiguous entries can be found described in:
ARM DDI 0487A.k_iss10775, "Misprogramming of the Contiguous bit",
page D4-1762.

The huge pte accessors currently replace the attributes of contiguous
pte entries in place thus can, on certain platforms, lead to TLB
conflict aborts or even erroneous results returned from TLB lookups.

This patch adds a helper function get_clear_flush(.) that clears a
contiguous entry and returns the head pte (whilst taking care to
retain dirty bit information that could have been modified by DBM).
A tlb invalidate is performed to then ensure that there is no
possibility of multiple tlb entries being present for the same
region.

Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
(Fixed indentation and some comments cleanup)
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 81 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 17 deletions(-)

Comments

Catalin Marinas Aug. 7, 2017, 5:19 p.m. UTC | #1
On Wed, Aug 02, 2017 at 10:48:59AM +0100, Punit Agrawal wrote:
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index 08deed7c71f0..f2c976464f39 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -68,6 +68,47 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
>  	return CONT_PTES;
>  }
>  
> +/*
> + * Changing some bits of contiguous entries requires us to follow a
> + * Break-Before-Make approach, breaking the whole contiguous set
> + * before we can change any entries. See ARM DDI 0487A.k_iss10775,
> + * "Misprogramming of the Contiguous bit", page D4-1762.
> + *
> + * This helper performs the break step.
> + */
> +static pte_t get_clear_flush(struct mm_struct *mm,
> +			     unsigned long addr,
> +			     pte_t *ptep,
> +			     unsigned long pgsize,
> +			     unsigned long ncontig)
> +{
> +	unsigned long i, saddr = addr;
> +	struct vm_area_struct vma = { .vm_mm = mm };
> +	pte_t orig_pte = huge_ptep_get(ptep);
> +
> +	/*
> +	 * If we already have a faulting entry then we don't need
> +	 * to break before make (there won't be a tlb entry cached).
> +	 */
> +	if (!pte_present(orig_pte))
> +		return orig_pte;
> +
> +	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
> +		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
> +
> +		/*
> +		 * If HW_AFDBM is enabled, then the HW could turn on
> +		 * the dirty bit for any page in the set, so check
> +		 * them all.  All hugetlb entries are already young.
> +		 */
> +		if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && pte_dirty(pte))
> +			orig_pte = pte_mkdirty(orig_pte);
> +	}
> +
> +	flush_tlb_range(&vma, saddr, addr);
> +	return orig_pte;
> +}
> +
>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  			    pte_t *ptep, pte_t pte)
>  {
> @@ -93,6 +134,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  	dpfn = pgsize >> PAGE_SHIFT;
>  	hugeprot = pte_pgprot(pte);
>  
> +	get_clear_flush(mm, addr, ptep, pgsize, ncontig);
> +
>  	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
>  		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
>  			 pte_val(pfn_pte(pfn, hugeprot)));

Is there any risk of the huge pte being accessed (from user space on
another CPU) in the short break-before-make window? Not that we can do
much about it but just checking.

BTW, it seems a bit overkill to use ptep_get_and_clear() (via
get_clear_flush) when we just want to zero the entries. Probably not
much overhead though.

> @@ -222,6 +256,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>  	int ncontig, i, changed = 0;
>  	size_t pgsize = 0;
>  	unsigned long pfn = pte_pfn(pte), dpfn;
> +	pte_t orig_pte;
>  	pgprot_t hugeprot;
>  
>  	if (!pte_cont(pte))
> @@ -231,10 +266,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>  	dpfn = pgsize >> PAGE_SHIFT;
>  	hugeprot = pte_pgprot(pte);
>  
> -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
> -		changed |= ptep_set_access_flags(vma, addr, ptep,
> -				pfn_pte(pfn, hugeprot), dirty);
> -	}
> +	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
> +	if (!pte_same(orig_pte, pte))
> +		changed = 1;
> +
> +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
> +		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
>  
>  	return changed;
>  }

If hugeprot isn't dirty but orig_pte became dirty, it looks like we just
drop such information from the new pte.

Same comment here about the window. huge_ptep_set_access_flags() is
called on a present (huge) pte and we briefly make it invalid. Can the
mm subsystem cope with a fault on another CPU here? Same for the
huge_ptep_set_wrprotect() below.

> @@ -244,6 +281,9 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
>  {
>  	int ncontig, i;
>  	size_t pgsize;
> +	pte_t pte = pte_wrprotect(huge_ptep_get(ptep)), orig_pte;
> +	unsigned long pfn = pte_pfn(pte), dpfn;
> +	pgprot_t hugeprot;
>  
>  	if (!pte_cont(*ptep)) {
>  		ptep_set_wrprotect(mm, addr, ptep);
> @@ -251,8 +291,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
>  	}
>  
>  	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
> -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
> -		ptep_set_wrprotect(mm, addr, ptep);
> +	dpfn = pgsize >> PAGE_SHIFT;
> +
> +	orig_pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
> +	if (pte_dirty(orig_pte))
> +		pte = pte_mkdirty(pte);
> +
> +	hugeprot = pte_pgprot(pte);
> +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
> +		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
>  }
>  
>  void huge_ptep_clear_flush(struct vm_area_struct *vma,
Steve Capper Aug. 9, 2017, 12:13 p.m. UTC | #2
Hi Catalin,

On Mon, Aug 07, 2017 at 06:19:17PM +0100, Catalin Marinas wrote:
> On Wed, Aug 02, 2017 at 10:48:59AM +0100, Punit Agrawal wrote:
> > diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> > index 08deed7c71f0..f2c976464f39 100644
> > --- a/arch/arm64/mm/hugetlbpage.c
> > +++ b/arch/arm64/mm/hugetlbpage.c
> > @@ -68,6 +68,47 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
> >  	return CONT_PTES;
> >  }
> >  
> > +/*
> > + * Changing some bits of contiguous entries requires us to follow a
> > + * Break-Before-Make approach, breaking the whole contiguous set
> > + * before we can change any entries. See ARM DDI 0487A.k_iss10775,
> > + * "Misprogramming of the Contiguous bit", page D4-1762.
> > + *
> > + * This helper performs the break step.
> > + */
> > +static pte_t get_clear_flush(struct mm_struct *mm,
> > +			     unsigned long addr,
> > +			     pte_t *ptep,
> > +			     unsigned long pgsize,
> > +			     unsigned long ncontig)
> > +{
> > +	unsigned long i, saddr = addr;
> > +	struct vm_area_struct vma = { .vm_mm = mm };
> > +	pte_t orig_pte = huge_ptep_get(ptep);
> > +
> > +	/*
> > +	 * If we already have a faulting entry then we don't need
> > +	 * to break before make (there won't be a tlb entry cached).
> > +	 */
> > +	if (!pte_present(orig_pte))
> > +		return orig_pte;
> > +
> > +	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
> > +		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
> > +
> > +		/*
> > +		 * If HW_AFDBM is enabled, then the HW could turn on
> > +		 * the dirty bit for any page in the set, so check
> > +		 * them all.  All hugetlb entries are already young.
> > +		 */
> > +		if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && pte_dirty(pte))
> > +			orig_pte = pte_mkdirty(orig_pte);
> > +	}
> > +
> > +	flush_tlb_range(&vma, saddr, addr);
> > +	return orig_pte;
> > +}
> > +
> >  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> >  			    pte_t *ptep, pte_t pte)
> >  {
> > @@ -93,6 +134,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> >  	dpfn = pgsize >> PAGE_SHIFT;
> >  	hugeprot = pte_pgprot(pte);
> >  
> > +	get_clear_flush(mm, addr, ptep, pgsize, ncontig);
> > +
> >  	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
> >  		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
> >  			 pte_val(pfn_pte(pfn, hugeprot)));
> 
> Is there any risk of the huge pte being accessed (from user space on
> another CPU) in the short break-before-make window? Not that we can do
> much about it but just checking.

IIUC we're protected by the huge_pte_lock(.).

> 
> BTW, it seems a bit overkill to use ptep_get_and_clear() (via
> get_clear_flush) when we just want to zero the entries. Probably not
> much overhead though.
>

I think we need the TLB invalidate here to ensure there's zero possibility of
conflicting TLB entries being in play.
 
> > @@ -222,6 +256,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
> >  	int ncontig, i, changed = 0;
> >  	size_t pgsize = 0;
> >  	unsigned long pfn = pte_pfn(pte), dpfn;
> > +	pte_t orig_pte;
> >  	pgprot_t hugeprot;
> >  
> >  	if (!pte_cont(pte))
> > @@ -231,10 +266,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
> >  	dpfn = pgsize >> PAGE_SHIFT;
> >  	hugeprot = pte_pgprot(pte);
> >  
> > -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
> > -		changed |= ptep_set_access_flags(vma, addr, ptep,
> > -				pfn_pte(pfn, hugeprot), dirty);
> > -	}
> > +	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
> > +	if (!pte_same(orig_pte, pte))
> > +		changed = 1;
> > +
> > +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
> > +		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
> >  
> >  	return changed;
> >  }
> 
> If hugeprot isn't dirty but orig_pte became dirty, it looks like we just
> drop such information from the new pte.

Ahhh... okay, I will have a think about this, thanks!

> 
> Same comment here about the window. huge_ptep_set_access_flags() is
> called on a present (huge) pte and we briefly make it invalid. Can the
> mm subsystem cope with a fault on another CPU here? Same for the
> huge_ptep_set_wrprotect() below.

I think the fault handler will wait for the huge_pte_lock(.).

> 
> > @@ -244,6 +281,9 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
> >  {
> >  	int ncontig, i;
> >  	size_t pgsize;
> > +	pte_t pte = pte_wrprotect(huge_ptep_get(ptep)), orig_pte;
> > +	unsigned long pfn = pte_pfn(pte), dpfn;
> > +	pgprot_t hugeprot;
> >  
> >  	if (!pte_cont(*ptep)) {
> >  		ptep_set_wrprotect(mm, addr, ptep);
> > @@ -251,8 +291,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
> >  	}
> >  
> >  	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
> > -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
> > -		ptep_set_wrprotect(mm, addr, ptep);
> > +	dpfn = pgsize >> PAGE_SHIFT;
> > +
> > +	orig_pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
> > +	if (pte_dirty(orig_pte))
> > +		pte = pte_mkdirty(pte);
> > +
> > +	hugeprot = pte_pgprot(pte);
> > +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
> > +		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
> >  }
> >  
> >  void huge_ptep_clear_flush(struct vm_area_struct *vma,
> 

Cheers Catalin.
Punit Agrawal Aug. 9, 2017, 1:29 p.m. UTC | #3
Catalin Marinas <catalin.marinas@arm.com> writes:

> On Wed, Aug 02, 2017 at 10:48:59AM +0100, Punit Agrawal wrote:
>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>> index 08deed7c71f0..f2c976464f39 100644
>> --- a/arch/arm64/mm/hugetlbpage.c
>> +++ b/arch/arm64/mm/hugetlbpage.c
>> @@ -68,6 +68,47 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
>>  	return CONT_PTES;
>>  }
>>  
>> +/*
>> + * Changing some bits of contiguous entries requires us to follow a
>> + * Break-Before-Make approach, breaking the whole contiguous set
>> + * before we can change any entries. See ARM DDI 0487A.k_iss10775,
>> + * "Misprogramming of the Contiguous bit", page D4-1762.
>> + *
>> + * This helper performs the break step.
>> + */
>> +static pte_t get_clear_flush(struct mm_struct *mm,
>> +			     unsigned long addr,
>> +			     pte_t *ptep,
>> +			     unsigned long pgsize,
>> +			     unsigned long ncontig)
>> +{
>> +	unsigned long i, saddr = addr;
>> +	struct vm_area_struct vma = { .vm_mm = mm };
>> +	pte_t orig_pte = huge_ptep_get(ptep);
>> +
>> +	/*
>> +	 * If we already have a faulting entry then we don't need
>> +	 * to break before make (there won't be a tlb entry cached).
>> +	 */
>> +	if (!pte_present(orig_pte))
>> +		return orig_pte;
>> +
>> +	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
>> +		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
>> +
>> +		/*
>> +		 * If HW_AFDBM is enabled, then the HW could turn on
>> +		 * the dirty bit for any page in the set, so check
>> +		 * them all.  All hugetlb entries are already young.
>> +		 */
>> +		if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && pte_dirty(pte))
>> +			orig_pte = pte_mkdirty(orig_pte);
>> +	}
>> +
>> +	flush_tlb_range(&vma, saddr, addr);
>> +	return orig_pte;
>> +}
>> +
>>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>>  			    pte_t *ptep, pte_t pte)
>>  {
>> @@ -93,6 +134,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>>  	dpfn = pgsize >> PAGE_SHIFT;
>>  	hugeprot = pte_pgprot(pte);
>>  
>> +	get_clear_flush(mm, addr, ptep, pgsize, ncontig);
>> +
>>  	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
>>  		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
>>  			 pte_val(pfn_pte(pfn, hugeprot)));
>
> Is there any risk of the huge pte being accessed (from user space on
> another CPU) in the short break-before-make window? Not that we can do
> much about it but just checking.

The calls to set_huge_pte_at are protected by a page table lock. If a
fault is taken on another CPU we'll end up running the following call
sequence

hugetlb_fault()
--> hugetlb_no_page()

which checks if the pte is none after acquiring the page table lock and
backs out of the fault if so.

>
> BTW, it seems a bit overkill to use ptep_get_and_clear() (via
> get_clear_flush) when we just want to zero the entries. Probably not
> much overhead though.

We missed converting huge_ptep_clear_flush() to follow break-before-make
requirement. I'll add a helper to zero out the entries and flush the
range which can be used here and in huge_ptep_clear_flush() as well.

>
>> @@ -222,6 +256,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>>  	int ncontig, i, changed = 0;
>>  	size_t pgsize = 0;
>>  	unsigned long pfn = pte_pfn(pte), dpfn;
>> +	pte_t orig_pte;
>>  	pgprot_t hugeprot;
>>  
>>  	if (!pte_cont(pte))
>> @@ -231,10 +266,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>>  	dpfn = pgsize >> PAGE_SHIFT;
>>  	hugeprot = pte_pgprot(pte);
>>  
>> -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
>> -		changed |= ptep_set_access_flags(vma, addr, ptep,
>> -				pfn_pte(pfn, hugeprot), dirty);
>> -	}
>> +	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
>> +	if (!pte_same(orig_pte, pte))
>> +		changed = 1;
>> +
>> +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
>> +		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
>>  
>>  	return changed;
>>  }
>
> If hugeprot isn't dirty but orig_pte became dirty, it looks like we just
> drop such information from the new pte.

We can avoid this by deriving hugeprot from orig_pte instead of
pte. I'll move update the patch to move setting hugeprot after the call
to get_clear_flush().

>
> Same comment here about the window. huge_ptep_set_access_flags() is
> called on a present (huge) pte and we briefly make it invalid. Can the
> mm subsystem cope with a fault on another CPU here? Same for the
> huge_ptep_set_wrprotect() below.

I've checked through the code and can confirm that callers to both
huge_ptep_set_access_flags() and huge_ptep_set_wrprotect() hold the page
table lock. So we should be safe here.

I also checked the get_user_pages_fast (based on offline discussion) and
can confirm that there are checks for p*d_none() in which case the slow
path is taken.

I'll update the patches with the two changes discussed above and
re-post.

Thanks,
Punit

>
>> @@ -244,6 +281,9 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
>>  {
>>  	int ncontig, i;
>>  	size_t pgsize;
>> +	pte_t pte = pte_wrprotect(huge_ptep_get(ptep)), orig_pte;
>> +	unsigned long pfn = pte_pfn(pte), dpfn;
>> +	pgprot_t hugeprot;
>>  
>>  	if (!pte_cont(*ptep)) {
>>  		ptep_set_wrprotect(mm, addr, ptep);
>> @@ -251,8 +291,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
>>  	}
>>  
>>  	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
>> -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
>> -		ptep_set_wrprotect(mm, addr, ptep);
>> +	dpfn = pgsize >> PAGE_SHIFT;
>> +
>> +	orig_pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
>> +	if (pte_dirty(orig_pte))
>> +		pte = pte_mkdirty(pte);
>> +
>> +	hugeprot = pte_pgprot(pte);
>> +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
>> +		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
>>  }
>>  
>>  void huge_ptep_clear_flush(struct vm_area_struct *vma,
diff mbox

Patch

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 08deed7c71f0..f2c976464f39 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -68,6 +68,47 @@  static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 	return CONT_PTES;
 }
 
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step.
+ */
+static pte_t get_clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pgsize,
+			     unsigned long ncontig)
+{
+	unsigned long i, saddr = addr;
+	struct vm_area_struct vma = { .vm_mm = mm };
+	pte_t orig_pte = huge_ptep_get(ptep);
+
+	/*
+	 * If we already have a faulting entry then we don't need
+	 * to break before make (there won't be a tlb entry cached).
+	 */
+	if (!pte_present(orig_pte))
+		return orig_pte;
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+		/*
+		 * If HW_AFDBM is enabled, then the HW could turn on
+		 * the dirty bit for any page in the set, so check
+		 * them all.  All hugetlb entries are already young.
+		 */
+		if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && pte_dirty(pte))
+			orig_pte = pte_mkdirty(orig_pte);
+	}
+
+	flush_tlb_range(&vma, saddr, addr);
+	return orig_pte;
+}
+
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte)
 {
@@ -93,6 +134,8 @@  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	dpfn = pgsize >> PAGE_SHIFT;
 	hugeprot = pte_pgprot(pte);
 
+	get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+
 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 			 pte_val(pfn_pte(pfn, hugeprot)));
@@ -194,7 +237,7 @@  pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
-	int ncontig, i;
+	int ncontig;
 	size_t pgsize;
 	pte_t orig_pte = huge_ptep_get(ptep);
 
@@ -202,17 +245,8 @@  pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 		return ptep_get_and_clear(mm, addr, ptep);
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
-		/*
-		 * If HW_AFDBM is enabled, then the HW could
-		 * turn on the dirty bit for any of the page
-		 * in the set, so check them all.
-		 */
-		if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
-			orig_pte = pte_mkdirty(orig_pte);
-	}
 
-	return orig_pte;
+	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 }
 
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -222,6 +256,7 @@  int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	int ncontig, i, changed = 0;
 	size_t pgsize = 0;
 	unsigned long pfn = pte_pfn(pte), dpfn;
+	pte_t orig_pte;
 	pgprot_t hugeprot;
 
 	if (!pte_cont(pte))
@@ -231,10 +266,12 @@  int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	dpfn = pgsize >> PAGE_SHIFT;
 	hugeprot = pte_pgprot(pte);
 
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-		changed |= ptep_set_access_flags(vma, addr, ptep,
-				pfn_pte(pfn, hugeprot), dirty);
-	}
+	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+	if (!pte_same(orig_pte, pte))
+		changed = 1;
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 
 	return changed;
 }
@@ -244,6 +281,9 @@  void huge_ptep_set_wrprotect(struct mm_struct *mm,
 {
 	int ncontig, i;
 	size_t pgsize;
+	pte_t pte = pte_wrprotect(huge_ptep_get(ptep)), orig_pte;
+	unsigned long pfn = pte_pfn(pte), dpfn;
+	pgprot_t hugeprot;
 
 	if (!pte_cont(*ptep)) {
 		ptep_set_wrprotect(mm, addr, ptep);
@@ -251,8 +291,15 @@  void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	}
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
-		ptep_set_wrprotect(mm, addr, ptep);
+	dpfn = pgsize >> PAGE_SHIFT;
+
+	orig_pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+	if (pte_dirty(orig_pte))
+		pte = pte_mkdirty(pte);
+
+	hugeprot = pte_pgprot(pte);
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 }
 
 void huge_ptep_clear_flush(struct vm_area_struct *vma,