diff mbox

[v2,2/2] arm64: Use PoU cache instr for I/D coherency

Message ID 1450184178-28257-3-git-send-email-ashoks@broadcom.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ashok Kumar Dec. 15, 2015, 12:56 p.m. UTC
In systems with three levels of cache(PoU at L1 and PoC at L3),
PoC cache flush instructions flushes L2 and L3 caches which could affect
performance.
For cache flushes for I and D coherency, PoU should suffice.
So changing all I and D coherency related cache flushes to PoU.

Introduced a new __clean_dcache_area_pou API for dcache flush till PoU
and provided a common macro for __flush_dcache_area and
__clean_dcache_area_pou.

Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/mm/cache.S               | 47 ++++++++++++++++++++++++++++++-------
 arch/arm64/mm/flush.c               | 14 +++++++----
 3 files changed, 49 insertions(+), 13 deletions(-)

Comments

Mark Rutland Dec. 15, 2015, 1:45 p.m. UTC | #1
On Tue, Dec 15, 2015 at 04:56:18AM -0800, Ashok Kumar wrote:
> In systems with three levels of cache(PoU at L1 and PoC at L3),
> PoC cache flush instructions flushes L2 and L3 caches which could affect
> performance.
> For cache flushes for I and D coherency, PoU should suffice.
> So changing all I and D coherency related cache flushes to PoU.
> 
> Introduced a new __clean_dcache_area_pou API for dcache flush till PoU
> and provided a common macro for __flush_dcache_area and
> __clean_dcache_area_pou.
> 
> Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
> ---
>  arch/arm64/include/asm/cacheflush.h |  1 +
>  arch/arm64/mm/cache.S               | 47 ++++++++++++++++++++++++++++++-------
>  arch/arm64/mm/flush.c               | 14 +++++++----
>  3 files changed, 49 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index c75b8d0..6a5ecbd 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -68,6 +68,7 @@
>  extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
>  extern void flush_icache_range(unsigned long start, unsigned long end);
>  extern void __flush_dcache_area(void *addr, size_t len);
> +extern void __clean_dcache_area_pou(void *addr, size_t len);
>  extern long __flush_cache_user_range(unsigned long start, unsigned long end);
>  
>  static inline void flush_cache_mm(struct mm_struct *mm)
> diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
> index eb48d5d..1fdcb38 100644
> --- a/arch/arm64/mm/cache.S
> +++ b/arch/arm64/mm/cache.S
> @@ -79,28 +79,57 @@ ENDPROC(flush_icache_range)
>  ENDPROC(__flush_cache_user_range)
>  
>  /*
> + * Macro to flush/invalidate cache lines till PoU/PoC

To keep this short and not use the confusing "flush" terminology, I
think it would be better to just say:

	Macro to perform a data cache maintenance for the interval
	[kaddr, kkaddr + size).

> + *
> + * 	op:		operation passed to dc instruction
> + * 	domain:		domain used in dsb instruciton
> + * 	kaddr:		starting virtual address of the region
> + * 	size:		size of the region
> + * 	Corrupts: 	kaddr, size, tmp1, tmp2
> + */
> +	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
> +	dcache_line_size \tmp1, \tmp2
> +	add	\size, \kaddr, \size
> +	sub	\tmp2, \tmp1, #1
> +	bic	\kaddr, \kaddr, \tmp2
> +1:	dc	\op, \kaddr
> +	add	\kaddr, \kaddr, \tmp1
> +	cmp	\kaddr, \size
> +	b.lo	1b
> +	dsb	\domain
> +	.endm
> +
> +/*
>   *	__flush_dcache_area(kaddr, size)
>   *
>   *	Ensure that the data held in the page kaddr is written back to the
>   *	page in question.
> + *	Flush and invalidate D-cache lines belonging to
> + *	address <kaddr, kaddr+size> till PoC.
>   *

Similarly we can replace all 4 lines of description here with:

	Ensure that any D-cache lines for the interval [kaddr,
	kaddr+size) are cleaned and invalidated to the PoC.

>   *	- kaddr   - kernel address
>   *	- size    - size in question
>   */
>  ENTRY(__flush_dcache_area)
> -	dcache_line_size x2, x3
> -	add	x1, x0, x1
> -	sub	x3, x2, #1
> -	bic	x0, x0, x3
> -1:	dc	civac, x0			// clean & invalidate D line / unified line
> -	add	x0, x0, x2
> -	cmp	x0, x1
> -	b.lo	1b
> -	dsb	sy
> +	dcache_by_line_op civac, sy, x0, x1, x2, x3
>  	ret
>  ENDPROC(__flush_dcache_area)
>  
>  /*
> + *	__clean_dcache_area_pou(kaddr, size)
> + *
> + * 	Flush D-cache lines belonging to address
> + * 	<kaddr, kaddr+size> till PoU.

Similarly here:

	Ensure that any D-cache lines for the interval [kaddr,
	kaddr+size) are cleaned to the PoU.

> + *
> + *	- kaddr   - kernel address
> + *	- size    - size in question
> + */
> +ENTRY(__clean_dcache_area_pou)
> +	dcache_by_line_op cvau, ish, x0, x1, x2, x3
> +	ret
> +ENDPROC(__clean_dcache_area_pou)
> +
> +/*
>   *	__inval_cache_range(start, end)
>   *	- start   - start address of region
>   *	- end     - end address of region
> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> index c26b804..4d4d15e 100644
> --- a/arch/arm64/mm/flush.c
> +++ b/arch/arm64/mm/flush.c
> @@ -41,7 +41,7 @@ static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
>  	if (vma->vm_flags & VM_EXEC) {
>  		unsigned long addr = (unsigned long)kaddr;
>  		if (icache_is_aliasing()) {
> -			__flush_dcache_area(kaddr, len);
> +			__clean_dcache_area_pou(kaddr, len);
>  			__flush_icache_all();
>  		} else {
>  			flush_icache_range(addr, addr + len);
> @@ -75,9 +75,15 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
>  		return;
>  
>  	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
> -		__flush_dcache_area(page_address(page),
> -				PAGE_SIZE << compound_order(page));
> -		__flush_icache_all();
> +		if (icache_is_aliasing()) {
> +			__clean_dcache_area_pou(page_address(page),
> +					    PAGE_SIZE << compound_order(page));
> +			__flush_icache_all();
> +		} else {
> +			flush_icache_range((unsigned long)page_address(page),
> +					   (unsigned long)page_address(page) +
> +					   (PAGE_SIZE << compound_order(page)));
> +		}

This now looks identical to the guts of flush_ptrace_access.

Let's extract that into a new helper to call in both cases, e.g.

	void sync_icache_aliases(void *kaddr, unsigned long len)
	{
		unsigned long addr = (unsigned long)kaddr;
		if (icache_is_aliasing()) {
			__flush_dcache_area(kaddr, len);
			__flush_icache_all();
		} else {
			flush_icache_range(addr, addr + len);
		}
	}

That would make this call site a lot simpler:

	sync_icache_aliases(page_address(page),
			    PAGE_SIZE << compound_order(page))

Other than that, this looks good to me.

With the changes suggested above:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Mark.
diff mbox

Patch

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c75b8d0..6a5ecbd 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@ 
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
 static inline void flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5d..1fdcb38 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -79,28 +79,57 @@  ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
 /*
+ * Macro to flush/invalidate cache lines till PoU/PoC
+ *
+ * 	op:		operation passed to dc instruction
+ * 	domain:		domain used in dsb instruciton
+ * 	kaddr:		starting virtual address of the region
+ * 	size:		size of the region
+ * 	Corrupts: 	kaddr, size, tmp1, tmp2
+ */
+	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+	dcache_line_size \tmp1, \tmp2
+	add	\size, \kaddr, \size
+	sub	\tmp2, \tmp1, #1
+	bic	\kaddr, \kaddr, \tmp2
+1:	dc	\op, \kaddr
+	add	\kaddr, \kaddr, \tmp1
+	cmp	\kaddr, \size
+	b.lo	1b
+	dsb	\domain
+	.endm
+
+/*
  *	__flush_dcache_area(kaddr, size)
  *
  *	Ensure that the data held in the page kaddr is written back to the
  *	page in question.
+ *	Flush and invalidate D-cache lines belonging to
+ *	address <kaddr, kaddr+size> till PoC.
  *
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
 ENTRY(__flush_dcache_area)
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D line / unified line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 ENDPROC(__flush_dcache_area)
 
 /*
+ *	__clean_dcache_area_pou(kaddr, size)
+ *
+ * 	Flush D-cache lines belonging to address
+ * 	<kaddr, kaddr+size> till PoU.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_pou)
+	dcache_by_line_op cvau, ish, x0, x1, x2, x3
+	ret
+ENDPROC(__clean_dcache_area_pou)
+
+/*
  *	__inval_cache_range(start, end)
  *	- start   - start address of region
  *	- end     - end address of region
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index c26b804..4d4d15e 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -41,7 +41,7 @@  static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 	if (vma->vm_flags & VM_EXEC) {
 		unsigned long addr = (unsigned long)kaddr;
 		if (icache_is_aliasing()) {
-			__flush_dcache_area(kaddr, len);
+			__clean_dcache_area_pou(kaddr, len);
 			__flush_icache_all();
 		} else {
 			flush_icache_range(addr, addr + len);
@@ -75,9 +75,15 @@  void __sync_icache_dcache(pte_t pte, unsigned long addr)
 		return;
 
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		__flush_dcache_area(page_address(page),
-				PAGE_SIZE << compound_order(page));
-		__flush_icache_all();
+		if (icache_is_aliasing()) {
+			__clean_dcache_area_pou(page_address(page),
+					    PAGE_SIZE << compound_order(page));
+			__flush_icache_all();
+		} else {
+			flush_icache_range((unsigned long)page_address(page),
+					   (unsigned long)page_address(page) +
+					   (PAGE_SIZE << compound_order(page)));
+		}
 	} else if (icache_is_aivivt()) {
 		__flush_icache_all();
 	}