From patchwork Wed Jul 31 16:44:44 2013
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Santosh Shilimkar <santosh.shilimkar@ti.com>
X-Patchwork-Id: 2836478
Return-Path: 
 <linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org>
X-Original-To: patchwork-linux-arm@patchwork.kernel.org
Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org
Received: from mail.kernel.org (mail.kernel.org [198.145.19.201])
	by patchwork2.web.kernel.org (Postfix) with ESMTP id DC54AC0319
	for <patchwork-linux-arm@patchwork.kernel.org>;
	Wed, 31 Jul 2013 16:46:50 +0000 (UTC)
Received: from mail.kernel.org (localhost [127.0.0.1])
	by mail.kernel.org (Postfix) with ESMTP id 83FF52040C
	for <patchwork-linux-arm@patchwork.kernel.org>;
	Wed, 31 Jul 2013 16:46:49 +0000 (UTC)
Received: from casper.infradead.org (casper.infradead.org [85.118.1.10])
	(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id E9E4C2037E
	for <patchwork-linux-arm@patchwork.kernel.org>;
	Wed, 31 Jul 2013 16:46:47 +0000 (UTC)
Received: from merlin.infradead.org ([2001:4978:20e::2])
	by casper.infradead.org with esmtps (Exim 4.80.1 #2 (Red Hat Linux))
	id 1V4ZXZ-0001xL-Ge; Wed, 31 Jul 2013 16:46:21 +0000
Received: from localhost ([::1] helo=merlin.infradead.org)
	by merlin.infradead.org with esmtp (Exim 4.80.1 #2 (Red Hat Linux))
	id 1V4ZXN-0004ZD-Jk; Wed, 31 Jul 2013 16:46:09 +0000
Received: from devils.ext.ti.com ([198.47.26.153])
	by merlin.infradead.org with esmtps (Exim 4.80.1 #2 (Red Hat Linux))
	id 1V4ZWj-0004Ut-1e for linux-arm-kernel@lists.infradead.org;
	Wed, 31 Jul 2013 16:45:34 +0000
Received: from dlelxv90.itg.ti.com ([172.17.2.17])
	by devils.ext.ti.com (8.13.7/8.13.7) with ESMTP id r6VGj86p024884;
	Wed, 31 Jul 2013 11:45:08 -0500
Received: from DFLE73.ent.ti.com (dfle73.ent.ti.com [128.247.5.110])
	by dlelxv90.itg.ti.com (8.14.3/8.13.8) with ESMTP id r6VGj8xk024761;
	Wed, 31 Jul 2013 11:45:08 -0500
Received: from dlelxv22.itg.ti.com (172.17.1.197) by DFLE73.ent.ti.com
	(128.247.5.110) with Microsoft SMTP Server id 14.2.342.3;
	Wed, 31 Jul 2013 11:45:08 -0500
Received: from ula0393909.am.dhcp.ti.com (ula0393909.am.dhcp.ti.com
	[158.218.103.117])	by dlelxv22.itg.ti.com (8.13.8/8.13.8) with ESMTP
	id r6VGinbH018451;	Wed, 31 Jul 2013 11:45:08 -0500
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
To: <linux-arm-kernel@lists.infradead.org>
Subject: [PATCH v2 4/6] ARM: mm: LPAE: Correct virt_to_phys patching for 64
	bit physical addresses
Date: Wed, 31 Jul 2013 12:44:44 -0400
Message-ID: <1375289086-5315-5-git-send-email-santosh.shilimkar@ti.com>
X-Mailer: git-send-email 1.7.9.5
In-Reply-To: <1375289086-5315-1-git-send-email-santosh.shilimkar@ti.com>
References: <1375289086-5315-1-git-send-email-santosh.shilimkar@ti.com>
MIME-Version: 1.0
X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 
X-CRM114-CacheID: sfid-20130731_124529_399117_3334A66C 
X-CRM114-Status: GOOD (  14.53  )
X-Spam-Score: -8.4 (--------)
Cc: Sricharan R <r.sricharan@ti.com>, Russell King <linux@arm.linux.org.uk>,
	Santosh Shilimkar <santosh.shilimkar@ti.com>,
	Nicolas Pitre <nico@linaro.org>
X-BeenThere: linux-arm-kernel@lists.infradead.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: <linux-arm-kernel.lists.infradead.org>
List-Unsubscribe: 
 <http://lists.infradead.org/mailman/options/linux-arm-kernel>,
	<mailto:linux-arm-kernel-request@lists.infradead.org?subject=unsubscribe>
List-Archive: <http://lists.infradead.org/pipermail/linux-arm-kernel/>
List-Post: <mailto:linux-arm-kernel@lists.infradead.org>
List-Help: <mailto:linux-arm-kernel-request@lists.infradead.org?subject=help>
List-Subscribe: 
 <http://lists.infradead.org/mailman/listinfo/linux-arm-kernel>,
	<mailto:linux-arm-kernel-request@lists.infradead.org?subject=subscribe>
Sender: "linux-arm-kernel" <linux-arm-kernel-bounces@lists.infradead.org>
Errors-To: 
 linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org
X-Spam-Status: No, score=-5.7 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED,
	RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

From: Sricharan R <r.sricharan@ti.com>

The current phys_to_virt patching mechanism does not work
for 64 bit physical addressesp. Note that constant used in add/sub
instructions is encoded in to the last 8 bits of the opcode. So shift
the _pv_offset constant by 24 to get it in to the correct place.

The v2p patching mechanism patches the higher 32bits of physical
address with a constant. While this is correct, in those platforms
where the lowmem addressable physical memory spawns across 4GB boundary,
a carry bit can be produced as a result of addition of lower 32bits.
This has to be taken in to account and added in to the upper. The patched
__pv_offset and va are added in lower 32bits, where __pv_offset can be
in two's complement form when PA_START < VA_START and that can result
in a false carry bit.

e.g PA = 0x80000000 VA = 0xC0000000
__pv_offset = PA - VA = 0xC0000000 (2's complement)

So adding __pv_offset + VA should never result in a true overflow. So in
order to differentiate between a true carry, a __pv_offset is extended
to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is
2's complement. We are using the same to insert 'mvn #0' instead of
'mov' while patching.

The above idea was suggested by Nicolas Pitre <nico@linaro.org> as
part of the review of first version of the subject patch.

There is no corresponding change on the phys_to_virt() side, because
computations on the upper 32-bits would be discarded anyway.

Cc: Nicolas Pitre <nico@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>

Signed-off-by: Sricharan R <r.sricharan@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/include/asm/memory.h |   35 +++++++++++++++++++--
 arch/arm/kernel/armksyms.c    |    1 +
 arch/arm/kernel/head.S        |   68 +++++++++++++++++++++++++++++++----------
 3 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index d9b96c65..abe879d 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -174,7 +174,9 @@
 #define __PV_BITS_31_24	0x81000000
 
 extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
-extern unsigned long __pv_phys_offset;
+extern phys_addr_t __pv_phys_offset;
+extern phys_addr_t __pv_offset;
+
 #define PHYS_OFFSET __pv_phys_offset
 
 #define __pv_stub(from,to,instr,type)			\
@@ -186,10 +188,37 @@ extern unsigned long __pv_phys_offset;
 	: "=r" (to)					\
 	: "r" (from), "I" (type))
 
+#define __pv_stub_mov_hi(t)				\
+	__asm__ volatile("@ __pv_stub_mov\n"		\
+	"1:	mov	%R0, %1\n"			\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	1b\n"				\
+	"	.popsection\n"				\
+	: "=r" (t)					\
+	: "I" (__PV_BITS_31_24))
+
+#define __pv_add_carry_stub(x, y)			\
+	__asm__ volatile("@ __pv_add_carry_stub\n"	\
+	"1:	adds	%Q0, %1, %2\n"			\
+	"2:	adc	%R0, %R0, #0\n"			\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	1b\n"				\
+	"	.long	2b\n"				\
+	"	.popsection\n"				\
+	: "+r" (y)					\
+	: "r" (x), "I" (__PV_BITS_31_24)		\
+	: "cc")
+
 static inline phys_addr_t __virt_to_phys(unsigned long x)
 {
-	unsigned long t;
-	__pv_stub(x, t, "add", __PV_BITS_31_24);
+	phys_addr_t t = 0;
+
+	if (sizeof(phys_addr_t) == 4) {
+		__pv_stub(x, t, "add", __PV_BITS_31_24);
+	} else {
+		__pv_stub_mov_hi(t);
+		__pv_add_carry_stub(x, t);
+	}
 	return t;
 }
 
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 60d3b73..1f031dd 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc);
 
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
 EXPORT_SYMBOL(__pv_phys_offset);
+EXPORT_SYMBOL(__pv_offset);
 #endif
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 9cf6063..aa3b0f7 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -545,17 +545,22 @@ ENDPROC(fixup_smp)
 	__HEAD
 __fixup_pv_table:
 	adr	r0, 1f
-	ldmia	r0, {r3-r5, r7}
+	ldmia	r0, {r3-r7}
+	cmp	r0, r3
+	mvn	ip, #0
 	sub	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
 	add	r4, r4, r3	@ adjust table start address
 	add	r5, r5, r3	@ adjust table end address
-	add	r7, r7, r3	@ adjust __pv_phys_offset address
-	str	r8, [r7]	@ save computed PHYS_OFFSET to __pv_phys_offset
+	add	r6, r6, r3	@ adjust __pv_phys_offset address
+	add	r7, r7, r3	@ adjust __pv_offset address
+	str	r8, [r6]	@ save computed PHYS_OFFSET to __pv_phys_offset
+	strcc	ip, [r7, #4]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
 THUMB(	it	ne		@ cross section branch )
 	bne	__error
-	str	r6, [r7, #4]	@ save to __pv_offset
+	lsl	r6, r6, #24
+	str	r6, [r7]	@ save to __pv_offset low bits
 	b	__fixup_a_pv_table
 ENDPROC(__fixup_pv_table)
 
@@ -564,6 +569,7 @@ ENDPROC(__fixup_pv_table)
 	.long	__pv_table_begin
 	.long	__pv_table_end
 2:	.long	__pv_phys_offset
+	.long	__pv_offset
 
 	.text
 __fixup_a_pv_table:
@@ -589,27 +595,53 @@ __fixup_a_pv_table:
 	bcc	1b
 	bx	lr
 #else
-	b	2f
+	adr	r0, 5f
+	b	4f
 1:	ldr	ip, [r7, r3]
-	bic	ip, ip, #0x000000ff
-	orr	ip, ip, r6	@ mask in offset bits 31-24
-	str	ip, [r7, r3]
-2:	cmp	r4, r5
+	lsr	r6, ip, #20		@ extract opcode
+	and	r6, r6, #0x3e
+	cmp	r6, #0x28		@ check for 'add' instruction
+	beq	2f
+	cmp	r6, #0x24		@ check for 'sub' instruction
+	beq	2f
+	cmp	r6, #0x2a		@ check for 'adc' instruction
+	beq	4f
+	ldr	r6, [r0]
+	add	r6, r6, r3
+	ldr	r6, [r6, #4]
+	mvn	r11, #0
+	cmp	r11, r6
+	and	ip, ip, #0xf000		@ Register encoded in inst
+	orrne	ip, ip, r6
+	ldreq	r6, [r0, #0x4]		@ mvn if _pv_offset high bits is 0xffffffff
+	ldrne	r6, [r0, #0x8]		@ mov otherwise
+	bic	r6, r6, #0xff
+	bic	r6, r6, #0xf00
+	orr	ip, ip, r6
+	b	3f
+2:	ldr	r6, [r0]
+	ldr	r6, [r6, r3]
+	bic	ip, ip, #0xff
+	orr	ip, ip, r6, lsr #24	@ mask in offset bits 31-24
+3:	str	ip, [r7, r3]
+4:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
 	bcc	1b
 	mov	pc, lr
 #endif
 ENDPROC(__fixup_a_pv_table)
 
+5:	.long __pv_offset
+	mvn	r0, #0
+	mov	r0, #0x81000000 @ For getting the correct 4 byte encoding
+
 ENTRY(fixup_pv_table)
-	stmfd	sp!, {r4 - r7, lr}
-	ldr	r2, 2f			@ get address of __pv_phys_offset
+	stmfd	sp!, {r0, r3 - r7, r11 - r12, lr}
 	mov	r3, #0			@ no offset
 	mov	r4, r0			@ r0 = table start
 	add	r5, r0, r1		@ r1 = table size
-	ldr	r6, [r2, #4]		@ get __pv_offset
 	bl	__fixup_a_pv_table
-	ldmfd	sp!, {r4 - r7, pc}
+	ldmfd	sp!, {r0, r3 - r7, r11 - r12, pc}
 ENDPROC(fixup_pv_table)
 
 	.align
@@ -619,10 +651,14 @@ ENDPROC(fixup_pv_table)
 	.globl	__pv_phys_offset
 	.type	__pv_phys_offset, %object
 __pv_phys_offset:
-	.long	0
-	.size	__pv_phys_offset, . - __pv_phys_offset
+	.quad	0
+
+	.data
+	.globl __pv_offset
+	.type __pv_offset, %object
 __pv_offset:
-	.long	0
+	.quad   0
+
 #endif
 
 #include "head-common.S"