diff mbox

[2/6] arm64: Convert pte handling from inline asm to using (cmp)xchg

Message ID 20170725135308.18173-3-catalin.marinas@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Catalin Marinas July 25, 2017, 1:53 p.m. UTC
With the support for hardware updates of the access and dirty states,
the following pte handling functions had to be implemented using
exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(),
ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of
the LSE atomic instructions and also make the code cleaner, convert
these pte functions to use the more generic cmpxchg()/xchg().

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 67 +++++++++++++++++-----------------------
 arch/arm64/mm/fault.c            | 23 ++++++--------
 2 files changed, 39 insertions(+), 51 deletions(-)

Comments

Will Deacon Aug. 17, 2017, 12:59 p.m. UTC | #1
On Tue, Jul 25, 2017 at 02:53:04PM +0100, Catalin Marinas wrote:
> With the support for hardware updates of the access and dirty states,
> the following pte handling functions had to be implemented using
> exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(),
> ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of
> the LSE atomic instructions and also make the code cleaner, convert
> these pte functions to use the more generic cmpxchg()/xchg().
> 
> Cc: Will Deacon <will.deacon@arm.com>
> Acked-by: Mark Rutland <mark.rutland@arm.com>
> Acked-by: Steve Capper <steve.capper@arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 67 +++++++++++++++++-----------------------
>  arch/arm64/mm/fault.c            | 23 ++++++--------
>  2 files changed, 39 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 6eae342ced6b..4580d5992d0c 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -39,6 +39,7 @@
>  
>  #ifndef __ASSEMBLY__
>  
> +#include <asm/cmpxchg.h>
>  #include <asm/fixmap.h>
>  #include <linux/mmdebug.h>
>  
> @@ -173,6 +174,11 @@ static inline pte_t pte_clear_rdonly(pte_t pte)
>  	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
>  }
>  
> +static inline pte_t pte_set_rdonly(pte_t pte)
> +{
> +	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
> +}
> +
>  static inline pte_t pte_mkpresent(pte_t pte)
>  {
>  	return set_pte_bit(pte, __pgprot(PTE_VALID));
> @@ -593,20 +599,15 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
>  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
>  static inline int __ptep_test_and_clear_young(pte_t *ptep)
>  {
> -	pteval_t pteval;
> -	unsigned int tmp, res;
> +	pte_t old_pte, pte;
>  
> -	asm volatile("//	__ptep_test_and_clear_young\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
> -	"	and	%0, %0, %4		// clear PTE_AF\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
> -	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
> +	do {
> +		old_pte = READ_ONCE(*ptep);
> +		pte = pte_mkold(old_pte);
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
> +				 pte_val(pte)) != pte_val(old_pte));

Given that cmpxchg returns the value it read, can you avoid the READ_ONCE
on subsequent iterations of the loop? Same with the other cmpxchgs you've
added in this patch.

> -	return res;
> +	return pte_young(old_pte);
>  }
>  
>  static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
> @@ -630,17 +631,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
>  static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
>  				       unsigned long address, pte_t *ptep)
>  {
> -	pteval_t old_pteval;
> -	unsigned int tmp;
> -
> -	asm volatile("//	ptep_get_and_clear\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	stxr	%w1, xzr, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
> -
> -	return __pte(old_pteval);
> +	return __pte(xchg_relaxed(&pte_val(*ptep), 0));
>  }
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -659,21 +650,21 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
>  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
>  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
>  {
> -	pteval_t pteval;
> -	unsigned long tmp;
> -
> -	asm volatile("//	ptep_set_wrprotect\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	tst	%0, %4			// check for hw dirty (!PTE_RDONLY)\n"
> -	"	csel	%1, %3, xzr, eq		// set PTE_DIRTY|PTE_RDONLY if dirty\n"
> -	"	orr	%0, %0, %1		// if !dirty, PTE_RDONLY is already set\n"
> -	"	and	%0, %0, %5		// clear PTE_WRITE/PTE_DBM\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
> -	: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
> -	: "cc");
> +	pte_t old_pte, pte;
> +
> +	do {
> +		pte = old_pte = READ_ONCE(*ptep);
> +		/*
> +		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
> +		 * clear), set the PTE_DIRTY and PTE_RDONLY bits.
> +		 */
> +		if (pte_hw_dirty(pte)) {
> +			pte = pte_mkdirty(pte);
> +			pte = pte_set_rdonly(pte);
> +		}
> +		pte = pte_wrprotect(pte);
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
> +				 pte_val(pte)) != pte_val(old_pte));
>  }
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 2509e4fe6992..65ed66bb2828 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -34,6 +34,7 @@
>  #include <linux/hugetlb.h>
>  
>  #include <asm/bug.h>
> +#include <asm/cmpxchg.h>
>  #include <asm/cpufeature.h>
>  #include <asm/exception.h>
>  #include <asm/debug-monitors.h>
> @@ -154,8 +155,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  			  unsigned long address, pte_t *ptep,
>  			  pte_t entry, int dirty)
>  {
> -	pteval_t old_pteval;
> -	unsigned int tmp;
> +	pteval_t old_pteval, pteval;
>  
>  	if (pte_same(*ptep, entry))
>  		return 0;
> @@ -165,7 +165,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  
>  	/* set PTE_RDONLY if actual read-only or clean PTE */
>  	if (!pte_write(entry) || !pte_sw_dirty(entry))
> -		pte_val(entry) |= PTE_RDONLY;
> +		entry = pte_set_rdonly(entry);
>  
>  	/*
>  	 * Setting the flags must be done atomically to avoid racing with the
> @@ -174,16 +174,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
>  	 * (calculated as: a & b == ~(~a | ~b)).
>  	 */
>  	pte_val(entry) ^= PTE_RDONLY;
> -	asm volatile("//	ptep_set_access_flags\n"
> -	"	prfm	pstl1strm, %2\n"
> -	"1:	ldxr	%0, %2\n"
> -	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
> -	"	orr	%0, %0, %4		// set flags\n"
> -	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
> -	"	stxr	%w1, %0, %2\n"
> -	"	cbnz	%w1, 1b\n"
> -	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
> -	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
> +	do {
> +		old_pteval = READ_ONCE(pte_val(*ptep));
> +		pteval = old_pteval ^ PTE_RDONLY;
> +		pteval |= pte_val(entry);
> +		pteval ^= PTE_RDONLY;
> +	} while (cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval) !=
> +		 old_pteval);
>  
>  	flush_tlb_fix_spurious_fault(vma, address);
>  	return 1;

You haven't changed this in your patch, but how is the return value supposed
to interact with concurrent updates to the pte? Isn't the earlier pte_same
check racy?

Will
Catalin Marinas Aug. 18, 2017, 4:15 p.m. UTC | #2
On Thu, Aug 17, 2017 at 01:59:58PM +0100, Will Deacon wrote:
> On Tue, Jul 25, 2017 at 02:53:04PM +0100, Catalin Marinas wrote:
> > @@ -593,20 +599,15 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
> >  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
> >  static inline int __ptep_test_and_clear_young(pte_t *ptep)
> >  {
> > -	pteval_t pteval;
> > -	unsigned int tmp, res;
> > +	pte_t old_pte, pte;
> >  
> > -	asm volatile("//	__ptep_test_and_clear_young\n"
> > -	"	prfm	pstl1strm, %2\n"
> > -	"1:	ldxr	%0, %2\n"
> > -	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
> > -	"	and	%0, %0, %4		// clear PTE_AF\n"
> > -	"	stxr	%w1, %0, %2\n"
> > -	"	cbnz	%w1, 1b\n"
> > -	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
> > -	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
> > +	do {
> > +		old_pte = READ_ONCE(*ptep);
> > +		pte = pte_mkold(old_pte);
> > +	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
> > +				 pte_val(pte)) != pte_val(old_pte));
> 
> Given that cmpxchg returns the value it read, can you avoid the READ_ONCE
> on subsequent iterations of the loop? Same with the other cmpxchgs you've
> added in this patch.

I'll have a look.

> > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > index 2509e4fe6992..65ed66bb2828 100644
> > --- a/arch/arm64/mm/fault.c
> > +++ b/arch/arm64/mm/fault.c
> > @@ -34,6 +34,7 @@
> >  #include <linux/hugetlb.h>
> >  
> >  #include <asm/bug.h>
> > +#include <asm/cmpxchg.h>
> >  #include <asm/cpufeature.h>
> >  #include <asm/exception.h>
> >  #include <asm/debug-monitors.h>
> > @@ -154,8 +155,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
> >  			  unsigned long address, pte_t *ptep,
> >  			  pte_t entry, int dirty)
> >  {
> > -	pteval_t old_pteval;
> > -	unsigned int tmp;
> > +	pteval_t old_pteval, pteval;
> >  
> >  	if (pte_same(*ptep, entry))
> >  		return 0;
> > @@ -165,7 +165,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
> >  
> >  	/* set PTE_RDONLY if actual read-only or clean PTE */
> >  	if (!pte_write(entry) || !pte_sw_dirty(entry))
> > -		pte_val(entry) |= PTE_RDONLY;
> > +		entry = pte_set_rdonly(entry);
> >  
> >  	/*
> >  	 * Setting the flags must be done atomically to avoid racing with the
> > @@ -174,16 +174,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
> >  	 * (calculated as: a & b == ~(~a | ~b)).
> >  	 */
> >  	pte_val(entry) ^= PTE_RDONLY;
> > -	asm volatile("//	ptep_set_access_flags\n"
> > -	"	prfm	pstl1strm, %2\n"
> > -	"1:	ldxr	%0, %2\n"
> > -	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
> > -	"	orr	%0, %0, %4		// set flags\n"
> > -	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
> > -	"	stxr	%w1, %0, %2\n"
> > -	"	cbnz	%w1, 1b\n"
> > -	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
> > -	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
> > +	do {
> > +		old_pteval = READ_ONCE(pte_val(*ptep));
> > +		pteval = old_pteval ^ PTE_RDONLY;
> > +		pteval |= pte_val(entry);
> > +		pteval ^= PTE_RDONLY;
> > +	} while (cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval) !=
> > +		 old_pteval);
> >  
> >  	flush_tlb_fix_spurious_fault(vma, address);
> >  	return 1;
> 
> You haven't changed this in your patch, but how is the return value supposed
> to interact with concurrent updates to the pte? Isn't the earlier pte_same
> check racy?

The pte_same() check is a best effort and it's meant to return early if
we are not changing any permissions. The only scenario where this can
happen is if the caller of ptep_set_access_flags() wants to set the AF
bit while the hardware also set it, so they look the same and we exit
early. There is no information loss.

The return value is irrelevant to arm64 since all the core code does is
call update_mmu_cache() if 1.
diff mbox

Patch

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6eae342ced6b..4580d5992d0c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -39,6 +39,7 @@ 
 
 #ifndef __ASSEMBLY__
 
+#include <asm/cmpxchg.h>
 #include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
@@ -173,6 +174,11 @@  static inline pte_t pte_clear_rdonly(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
 }
 
+static inline pte_t pte_set_rdonly(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
 static inline pte_t pte_mkpresent(pte_t pte)
 {
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -593,20 +599,15 @@  static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int __ptep_test_and_clear_young(pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned int tmp, res;
+	pte_t old_pte, pte;
 
-	asm volatile("//	__ptep_test_and_clear_young\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
-	"	and	%0, %0, %4		// clear PTE_AF\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
-	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
+	do {
+		old_pte = READ_ONCE(*ptep);
+		pte = pte_mkold(old_pte);
+	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
+				 pte_val(pte)) != pte_val(old_pte));
 
-	return res;
+	return pte_young(old_pte);
 }
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -630,17 +631,7 @@  static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address, pte_t *ptep)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
-
-	asm volatile("//	ptep_get_and_clear\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	stxr	%w1, xzr, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
-
-	return __pte(old_pteval);
+	return __pte(xchg_relaxed(&pte_val(*ptep), 0));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -659,21 +650,21 @@  static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned long tmp;
-
-	asm volatile("//	ptep_set_wrprotect\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	tst	%0, %4			// check for hw dirty (!PTE_RDONLY)\n"
-	"	csel	%1, %3, xzr, eq		// set PTE_DIRTY|PTE_RDONLY if dirty\n"
-	"	orr	%0, %0, %1		// if !dirty, PTE_RDONLY is already set\n"
-	"	and	%0, %0, %5		// clear PTE_WRITE/PTE_DBM\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
-	: "cc");
+	pte_t old_pte, pte;
+
+	do {
+		pte = old_pte = READ_ONCE(*ptep);
+		/*
+		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+		 * clear), set the PTE_DIRTY and PTE_RDONLY bits.
+		 */
+		if (pte_hw_dirty(pte)) {
+			pte = pte_mkdirty(pte);
+			pte = pte_set_rdonly(pte);
+		}
+		pte = pte_wrprotect(pte);
+	} while (cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
+				 pte_val(pte)) != pte_val(old_pte));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2509e4fe6992..65ed66bb2828 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -34,6 +34,7 @@ 
 #include <linux/hugetlb.h>
 
 #include <asm/bug.h>
+#include <asm/cmpxchg.h>
 #include <asm/cpufeature.h>
 #include <asm/exception.h>
 #include <asm/debug-monitors.h>
@@ -154,8 +155,7 @@  int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
+	pteval_t old_pteval, pteval;
 
 	if (pte_same(*ptep, entry))
 		return 0;
@@ -165,7 +165,7 @@  int ptep_set_access_flags(struct vm_area_struct *vma,
 
 	/* set PTE_RDONLY if actual read-only or clean PTE */
 	if (!pte_write(entry) || !pte_sw_dirty(entry))
-		pte_val(entry) |= PTE_RDONLY;
+		entry = pte_set_rdonly(entry);
 
 	/*
 	 * Setting the flags must be done atomically to avoid racing with the
@@ -174,16 +174,13 @@  int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * (calculated as: a & b == ~(~a | ~b)).
 	 */
 	pte_val(entry) ^= PTE_RDONLY;
-	asm volatile("//	ptep_set_access_flags\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
-	"	orr	%0, %0, %4		// set flags\n"
-	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
+	do {
+		old_pteval = READ_ONCE(pte_val(*ptep));
+		pteval = old_pteval ^ PTE_RDONLY;
+		pteval |= pte_val(entry);
+		pteval ^= PTE_RDONLY;
+	} while (cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval) !=
+		 old_pteval);
 
 	flush_tlb_fix_spurious_fault(vma, address);
 	return 1;