diff mbox

[v3,RESEND,05/17] ARM: LPAE: support 64-bit virt_to_phys patching

Message ID 1348242975-19184-6-git-send-email-cyril@ti.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cyril Chemparathy Sept. 21, 2012, 3:56 p.m. UTC
This patch adds support for 64-bit physical addresses in virt_to_phys()
patching.  This does not do real 64-bit add/sub, but instead patches in the
upper 32-bits of the phys_offset directly into the output of virt_to_phys.

There is no corresponding change on the phys_to_virt() side, because
computations on the upper 32-bits would be discarded anyway.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
---
 arch/arm/include/asm/memory.h |   38 ++++++++++++++++++++++++++++++++++++--
 arch/arm/kernel/head.S        |    4 ++++
 arch/arm/kernel/setup.c       |    2 +-
 3 files changed, 41 insertions(+), 3 deletions(-)

Comments

Nicolas Pitre Sept. 22, 2012, 3:24 p.m. UTC | #1
On Fri, 21 Sep 2012, Cyril Chemparathy wrote:

> This patch adds support for 64-bit physical addresses in virt_to_phys()
> patching.  This does not do real 64-bit add/sub, but instead patches in the
> upper 32-bits of the phys_offset directly into the output of virt_to_phys.
> 
> There is no corresponding change on the phys_to_virt() side, because
> computations on the upper 32-bits would be discarded anyway.
> 
> Signed-off-by: Cyril Chemparathy <cyril@ti.com>

Reviewed-by: Nicolas Pitre <nico@linaro.org>


> ---
>  arch/arm/include/asm/memory.h |   38 ++++++++++++++++++++++++++++++++++++--
>  arch/arm/kernel/head.S        |    4 ++++
>  arch/arm/kernel/setup.c       |    2 +-
>  3 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
> index 88ca206..f3e8f88 100644
> --- a/arch/arm/include/asm/memory.h
> +++ b/arch/arm/include/asm/memory.h
> @@ -154,13 +154,47 @@
>  #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
>  
>  extern unsigned long	__pv_offset;
> -extern unsigned long	__pv_phys_offset;
> +extern phys_addr_t	__pv_phys_offset;
>  #define PHYS_OFFSET	__virt_to_phys(PAGE_OFFSET)
>  
>  static inline phys_addr_t __virt_to_phys(unsigned long x)
>  {
> -	unsigned long t;
> +	phys_addr_t t;
> +
> +#ifndef CONFIG_ARM_LPAE
>  	early_patch_imm8("add", t, x, __pv_offset, 0);
> +#else
> +	unsigned long __tmp;
> +
> +#ifndef __ARMEB__
> +#define PV_PHYS_HIGH	"(__pv_phys_offset + 4)"
> +#else
> +#define PV_PHYS_HIGH	"__pv_phys_offset"
> +#endif
> +
> +	early_patch_stub(
> +	/* type */		PATCH_IMM8,
> +	/* code */
> +		"ldr		%[tmp], =__pv_offset\n"
> +		"ldr		%[tmp], [%[tmp]]\n"
> +		"add		%Q[to], %[from], %[tmp]\n"
> +		"ldr		%[tmp], =" PV_PHYS_HIGH "\n"
> +		"ldr		%[tmp], [%[tmp]]\n"
> +		"mov		%R[to], %[tmp]\n",
> +	/* pad */		4,
> +	/* patch_data */
> +		".long		__pv_offset\n"
> +		"add		%Q[to], %[from], %[imm]\n"
> +		".long	"	PV_PHYS_HIGH "\n"
> +		"mov		%R[to], %[imm]\n",
> +	/* operands */
> +		: [to]	 "=r"	(t),
> +		  [tmp]	 "=&r"	(__tmp)
> +		: [from] "r"	(x),
> +		  [imm]	 "I"	(__IMM8),
> +			 "i"	(&__pv_offset),
> +			 "i"	(&__pv_phys_offset));
> +#endif
>  	return t;
>  }
>  
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 69a3c09..61fb8df 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -530,7 +530,11 @@ ENDPROC(__fixup_pv_offsets)
>  
>  	.align
>  1:	.long	.
> +#if defined(CONFIG_ARM_LPAE) && defined(__ARMEB__)
> +	.long	__pv_phys_offset + 4
> +#else
>  	.long	__pv_phys_offset
> +#endif
>  	.long	__pv_offset
>  	.long	PAGE_OFFSET
>  #endif
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index 59e0f57..edb4f42 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -159,7 +159,7 @@ DEFINE_PER_CPU(struct cpuinfo_arm, cpu_data);
>   * The initializers here prevent these from landing in the BSS section.
>   */
>  unsigned long __pv_offset = 0xdeadbeef;
> -unsigned long __pv_phys_offset = 0xdeadbeef;
> +phys_addr_t   __pv_phys_offset = 0xdeadbeef;
>  EXPORT_SYMBOL(__pv_phys_offset);
>  
>  #endif
> -- 
> 1.7.9.5
>
Catalin Marinas Sept. 24, 2012, 3:13 p.m. UTC | #2
On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
> This patch adds support for 64-bit physical addresses in virt_to_phys()
> patching.  This does not do real 64-bit add/sub, but instead patches in the
> upper 32-bits of the phys_offset directly into the output of virt_to_phys.

So this assumes that for the kernel linear mapping, all the physical
addresses have the same upper 32-bit. That's a good optimisation but I
haven't seen this check when calculating lowmem in sanity_check_meminfo.
Someone may build platform with memory starting at 3GB and going across
the 4GB limit.
Nicolas Pitre Sept. 24, 2012, 3:56 p.m. UTC | #3
On Mon, 24 Sep 2012, Catalin Marinas wrote:

> On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
> > This patch adds support for 64-bit physical addresses in virt_to_phys()
> > patching.  This does not do real 64-bit add/sub, but instead patches in the
> > upper 32-bits of the phys_offset directly into the output of virt_to_phys.
> 
> So this assumes that for the kernel linear mapping, all the physical
> addresses have the same upper 32-bit. That's a good optimisation but I
> haven't seen this check when calculating lowmem in sanity_check_meminfo.
> Someone may build platform with memory starting at 3GB and going across
> the 4GB limit.

Good point.  We better get an early warning if that happens.


Nicolas
tip-bot for Dave Martin Sept. 24, 2012, 4:31 p.m. UTC | #4
On Fri, Sep 21, 2012 at 11:56:03AM -0400, Cyril Chemparathy wrote:
> This patch adds support for 64-bit physical addresses in virt_to_phys()
> patching.  This does not do real 64-bit add/sub, but instead patches in the
> upper 32-bits of the phys_offset directly into the output of virt_to_phys.
> 
> There is no corresponding change on the phys_to_virt() side, because
> computations on the upper 32-bits would be discarded anyway.
> 
> Signed-off-by: Cyril Chemparathy <cyril@ti.com>
> ---
>  arch/arm/include/asm/memory.h |   38 ++++++++++++++++++++++++++++++++++++--
>  arch/arm/kernel/head.S        |    4 ++++
>  arch/arm/kernel/setup.c       |    2 +-
>  3 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
> index 88ca206..f3e8f88 100644
> --- a/arch/arm/include/asm/memory.h
> +++ b/arch/arm/include/asm/memory.h
> @@ -154,13 +154,47 @@
>  #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
>  
>  extern unsigned long	__pv_offset;
> -extern unsigned long	__pv_phys_offset;
> +extern phys_addr_t	__pv_phys_offset;
>  #define PHYS_OFFSET	__virt_to_phys(PAGE_OFFSET)
>  
>  static inline phys_addr_t __virt_to_phys(unsigned long x)
>  {
> -	unsigned long t;
> +	phys_addr_t t;
> +
> +#ifndef CONFIG_ARM_LPAE
>  	early_patch_imm8("add", t, x, __pv_offset, 0);
> +#else
> +	unsigned long __tmp;
> +
> +#ifndef __ARMEB__
> +#define PV_PHYS_HIGH	"(__pv_phys_offset + 4)"
> +#else
> +#define PV_PHYS_HIGH	"__pv_phys_offset"
> +#endif
> +
> +	early_patch_stub(
> +	/* type */		PATCH_IMM8,
> +	/* code */
> +		"ldr		%[tmp], =__pv_offset\n"
> +		"ldr		%[tmp], [%[tmp]]\n"
> +		"add		%Q[to], %[from], %[tmp]\n"
> +		"ldr		%[tmp], =" PV_PHYS_HIGH "\n"
> +		"ldr		%[tmp], [%[tmp]]\n"
> +		"mov		%R[to], %[tmp]\n",
> +	/* pad */		4,
> +	/* patch_data */
> +		".long		__pv_offset\n"
> +		"add		%Q[to], %[from], %[imm]\n"
> +		".long	"	PV_PHYS_HIGH "\n"
> +		"mov		%R[to], %[imm]\n",
> +	/* operands */
> +		: [to]	 "=r"	(t),
> +		  [tmp]	 "=&r"	(__tmp)
> +		: [from] "r"	(x),
> +		  [imm]	 "I"	(__IMM8),
> +			 "i"	(&__pv_offset),
> +			 "i"	(&__pv_phys_offset));

So, the actual offset we can apply is:

__pv_phys_offset + __pv_offset

where:

 * the high 32 bits of the address being fixed up are assumed to be 0
   (true, because the kernel is initially always fixed up to an address
   range <4GB)

 * the low 32 bits of __pv_phys_offset are assumed to be 0 (?)

 * the full offset is of the form

        ([..0..]XX[..0..] << 32) | [..0..]YY[..0..] 

Is this intentional?  It seems like a rather weird constraint...  but
it may be sensible.  PAGE_OFFSET is probably 0xc0000000 or 0x80000000,
(so YY can handle that) and the actual RAM above 4GB will likely be
huge and aligned on some enormous boundary in such situations (so that
XX can handle that).

So long as the low RAM alias is not misaligned relative to the high alias
on a finer granularity than 16MB (so that YY = (PAGE_OFFSET +/- the
misalignment) is still a legal immediate), I guess there should not be a
problem.

[...]

Cheers
---Dave
Nicolas Pitre Sept. 24, 2012, 4:51 p.m. UTC | #5
On Mon, 24 Sep 2012, Dave Martin wrote:

> On Fri, Sep 21, 2012 at 11:56:03AM -0400, Cyril Chemparathy wrote:
> > This patch adds support for 64-bit physical addresses in virt_to_phys()
> > patching.  This does not do real 64-bit add/sub, but instead patches in the
> > upper 32-bits of the phys_offset directly into the output of virt_to_phys.
> > 
> > There is no corresponding change on the phys_to_virt() side, because
> > computations on the upper 32-bits would be discarded anyway.
> > 
> > Signed-off-by: Cyril Chemparathy <cyril@ti.com>
> > ---
> >  arch/arm/include/asm/memory.h |   38 ++++++++++++++++++++++++++++++++++++--
> >  arch/arm/kernel/head.S        |    4 ++++
> >  arch/arm/kernel/setup.c       |    2 +-
> >  3 files changed, 41 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
> > index 88ca206..f3e8f88 100644
> > --- a/arch/arm/include/asm/memory.h
> > +++ b/arch/arm/include/asm/memory.h
> > @@ -154,13 +154,47 @@
> >  #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
> >  
> >  extern unsigned long	__pv_offset;
> > -extern unsigned long	__pv_phys_offset;
> > +extern phys_addr_t	__pv_phys_offset;
> >  #define PHYS_OFFSET	__virt_to_phys(PAGE_OFFSET)
> >  
> >  static inline phys_addr_t __virt_to_phys(unsigned long x)
> >  {
> > -	unsigned long t;
> > +	phys_addr_t t;
> > +
> > +#ifndef CONFIG_ARM_LPAE
> >  	early_patch_imm8("add", t, x, __pv_offset, 0);
> > +#else
> > +	unsigned long __tmp;
> > +
> > +#ifndef __ARMEB__
> > +#define PV_PHYS_HIGH	"(__pv_phys_offset + 4)"
> > +#else
> > +#define PV_PHYS_HIGH	"__pv_phys_offset"
> > +#endif
> > +
> > +	early_patch_stub(
> > +	/* type */		PATCH_IMM8,
> > +	/* code */
> > +		"ldr		%[tmp], =__pv_offset\n"
> > +		"ldr		%[tmp], [%[tmp]]\n"
> > +		"add		%Q[to], %[from], %[tmp]\n"
> > +		"ldr		%[tmp], =" PV_PHYS_HIGH "\n"
> > +		"ldr		%[tmp], [%[tmp]]\n"
> > +		"mov		%R[to], %[tmp]\n",
> > +	/* pad */		4,
> > +	/* patch_data */
> > +		".long		__pv_offset\n"
> > +		"add		%Q[to], %[from], %[imm]\n"
> > +		".long	"	PV_PHYS_HIGH "\n"
> > +		"mov		%R[to], %[imm]\n",
> > +	/* operands */
> > +		: [to]	 "=r"	(t),
> > +		  [tmp]	 "=&r"	(__tmp)
> > +		: [from] "r"	(x),
> > +		  [imm]	 "I"	(__IMM8),
> > +			 "i"	(&__pv_offset),
> > +			 "i"	(&__pv_phys_offset));
> 
> So, the actual offset we can apply is:
> 
> __pv_phys_offset + __pv_offset
> 
> where:
> 
>  * the high 32 bits of the address being fixed up are assumed to be 0
>    (true, because the kernel is initially always fixed up to an address
>    range <4GB)

The fixed up address is a virtual address.  So yes, by definition it 
must be <4GB on ARM32.

>  * the low 32 bits of __pv_phys_offset are assumed to be 0 (?)

It is typically representable with a shifted 8 bit immediate but not 
necessarily 0, just like on platforms without LPAE.

>  * the full offset is of the form
> 
>         ([..0..]XX[..0..] << 32) | [..0..]YY[..0..] 
> 
> Is this intentional?  It seems like a rather weird constraint...  but
> it may be sensible.  PAGE_OFFSET is probably 0xc0000000 or 0x80000000,
> (so YY can handle that) and the actual RAM above 4GB will likely be
> huge and aligned on some enormous boundary in such situations (so that
> XX can handle that).
> 
> So long as the low RAM alias is not misaligned relative to the high alias
> on a finer granularity than 16MB (so that YY = (PAGE_OFFSET +/- the
> misalignment) is still a legal immediate), I guess there should not be a
> problem.

There are already similar constraints for the current 
ARM_PATCH_PHYS_VIRT code.  So nothing really new here.


Nicolas
Cyril Chemparathy Sept. 24, 2012, 8:59 p.m. UTC | #6
On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
> On Mon, 24 Sep 2012, Catalin Marinas wrote:
>
>> On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
>>> This patch adds support for 64-bit physical addresses in virt_to_phys()
>>> patching.  This does not do real 64-bit add/sub, but instead patches in the
>>> upper 32-bits of the phys_offset directly into the output of virt_to_phys.
>>
>> So this assumes that for the kernel linear mapping, all the physical
>> addresses have the same upper 32-bit. That's a good optimisation but I
>> haven't seen this check when calculating lowmem in sanity_check_meminfo.
>> Someone may build platform with memory starting at 3GB and going across
>> the 4GB limit.
>
> Good point.  We better get an early warning if that happens.
>

Thanks.

I'm thinking of splitting the bank at the 32-bit boundary in such an 
event, assuming that the remaining memory should be usable as highmem.
Nicolas Pitre Sept. 24, 2012, 9:20 p.m. UTC | #7
On Mon, 24 Sep 2012, Cyril Chemparathy wrote:

> On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
> > On Mon, 24 Sep 2012, Catalin Marinas wrote:
> > 
> > > On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
> > > > This patch adds support for 64-bit physical addresses in virt_to_phys()
> > > > patching.  This does not do real 64-bit add/sub, but instead patches in
> > > > the
> > > > upper 32-bits of the phys_offset directly into the output of
> > > > virt_to_phys.
> > > 
> > > So this assumes that for the kernel linear mapping, all the physical
> > > addresses have the same upper 32-bit. That's a good optimisation but I
> > > haven't seen this check when calculating lowmem in sanity_check_meminfo.
> > > Someone may build platform with memory starting at 3GB and going across
> > > the 4GB limit.
> > 
> > Good point.  We better get an early warning if that happens.
> > 
> 
> Thanks.
> 
> I'm thinking of splitting the bank at the 32-bit boundary in such an event,
> assuming that the remaining memory should be usable as highmem.

No.  That's not the point here.

Let's suppose a system with 1GB of physical RAM starting at 0xe0000000.

In this case there is no need for highmem.  However the v2p patching 
code in the LPAE case assumes the high bits of a physical address are 
always the same which wouldn't be the case in this hypothetical example.

We want to make sure the kernel won't boot if a system with physical RAM 
crossing the 4GB address mark is encountered.


Nicolas
Catalin Marinas Sept. 24, 2012, 9:52 p.m. UTC | #8
On 24 September 2012 22:20, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Mon, 24 Sep 2012, Cyril Chemparathy wrote:
>
>> On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
>> > On Mon, 24 Sep 2012, Catalin Marinas wrote:
>> >
>> > > On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
>> > > > This patch adds support for 64-bit physical addresses in virt_to_phys()
>> > > > patching.  This does not do real 64-bit add/sub, but instead patches in
>> > > > the
>> > > > upper 32-bits of the phys_offset directly into the output of
>> > > > virt_to_phys.
>> > >
>> > > So this assumes that for the kernel linear mapping, all the physical
>> > > addresses have the same upper 32-bit. That's a good optimisation but I
>> > > haven't seen this check when calculating lowmem in sanity_check_meminfo.
>> > > Someone may build platform with memory starting at 3GB and going across
>> > > the 4GB limit.
>> >
>> > Good point.  We better get an early warning if that happens.
>> >
>>
>> Thanks.
>>
>> I'm thinking of splitting the bank at the 32-bit boundary in such an event,
>> assuming that the remaining memory should be usable as highmem.
>
> No.  That's not the point here.
>
> Let's suppose a system with 1GB of physical RAM starting at 0xe0000000.
>
> In this case there is no need for highmem.  However the v2p patching
> code in the LPAE case assumes the high bits of a physical address are
> always the same which wouldn't be the case in this hypothetical example.
>
> We want to make sure the kernel won't boot if a system with physical RAM
> crossing the 4GB address mark is encountered.

I think that's too restrictive. The case with 1 or 2GB below 4GB limit
and the rest of the RAM above is a valid one. For such configurations
we just limit the lowmem to the memory within 4GB and leave the rest
as highmem. Another example is with RAM between 6GB and 10GB.

So we can consider lowmem all the addresses with the same upper 32-bit
as the start of the first memory block.

Of course, if the lowmem turns out to be very small because of a weird
address, the kernel will probably fail shortly anyway but I don't
think we'll have such configurations.
Cyril Chemparathy Sept. 24, 2012, 9:53 p.m. UTC | #9
On 9/24/2012 5:20 PM, Nicolas Pitre wrote:
> On Mon, 24 Sep 2012, Cyril Chemparathy wrote:
>
>> On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
>>> On Mon, 24 Sep 2012, Catalin Marinas wrote:
>>>
>>>> On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
>>>>> This patch adds support for 64-bit physical addresses in virt_to_phys()
>>>>> patching.  This does not do real 64-bit add/sub, but instead patches in
>>>>> the
>>>>> upper 32-bits of the phys_offset directly into the output of
>>>>> virt_to_phys.
>>>>
>>>> So this assumes that for the kernel linear mapping, all the physical
>>>> addresses have the same upper 32-bit. That's a good optimisation but I
>>>> haven't seen this check when calculating lowmem in sanity_check_meminfo.
>>>> Someone may build platform with memory starting at 3GB and going across
>>>> the 4GB limit.
>>>
>>> Good point.  We better get an early warning if that happens.
>>>
>>
>> Thanks.
>>
>> I'm thinking of splitting the bank at the 32-bit boundary in such an event,
>> assuming that the remaining memory should be usable as highmem.
>
> No.  That's not the point here.
>
> Let's suppose a system with 1GB of physical RAM starting at 0xe0000000.
>
> In this case there is no need for highmem.  However the v2p patching
> code in the LPAE case assumes the high bits of a physical address are
> always the same which wouldn't be the case in this hypothetical example.
>
> We want to make sure the kernel won't boot if a system with physical RAM
> crossing the 4GB address mark is encountered.
>

Why brick the box if memory crosses over the 4G mark?

Since this constraint (constant upper 32-bits of PA) applies only to 
lowmem, why couldn't this hypothetical system run with 256M of lowmem, 
and the remaining 768M being used as highmem (if enabled).

IOW, why can't we treat this constraint just like the vmalloc limit 
constraint in sanity_check_meminfo(), by splitting the bank at the limit?
Russell King - ARM Linux Sept. 24, 2012, 10:06 p.m. UTC | #10
On Mon, Sep 24, 2012 at 04:59:50PM -0400, Cyril Chemparathy wrote:
> I'm thinking of splitting the bank at the 32-bit boundary in such an  
> event, assuming that the remaining memory should be usable as highmem.

I'd say, don't make it any more complicated than it has to be.  Don't
overdesign, especially in this area, and from what I read in this
thread, that's the danger here.

For the time being, we have two classes of systems: those where they
have a decent amount of RAM located below the 4GB mark, and yours which
have all their RAM above the 4GB mark - and again a decent size
contained within any naturally aligned 4GB chunk of the address space.

So, at the moment making the restriction that the top 32-bits of the
address space is constant _is_ the right thing to do.

And given that, I'd suggest that we reduce the size of these changes.
We don't need to go and rewrite the P2V patching stuff - we can merely
extend it.

The p->v translation can work just fine by ignoring the top 32-bits and
adding the offset, as is done today.

The v->p translation can also work in the reverse way, except that we
need to set the high order bits.  That can be done with a mov or movw
instruction, which the v:p patching code _can_ recognise and do the
appropriate thing.

And let's not forget that the range over which the v:p translation has
to work is just the lowmem only, which on most normal configurations
is no more than 1GB.  Though that can be resolved if the high-order
bits need to change by doing this in the v->p translation:

	movw	%hi, #0xVWXY		@ fixup
	adds	%lo, %lo, #offset	@ fixup
	adc	%hi, %hi, #0

_if_ we need to.
Nicolas Pitre Sept. 24, 2012, 10:32 p.m. UTC | #11
On Mon, 24 Sep 2012, Catalin Marinas wrote:

> On 24 September 2012 22:20, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Mon, 24 Sep 2012, Cyril Chemparathy wrote:
> >
> >> On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
> >> > On Mon, 24 Sep 2012, Catalin Marinas wrote:
> >> >
> >> > > On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
> >> > > > This patch adds support for 64-bit physical addresses in virt_to_phys()
> >> > > > patching.  This does not do real 64-bit add/sub, but instead patches in
> >> > > > the
> >> > > > upper 32-bits of the phys_offset directly into the output of
> >> > > > virt_to_phys.
> >> > >
> >> > > So this assumes that for the kernel linear mapping, all the physical
> >> > > addresses have the same upper 32-bit. That's a good optimisation but I
> >> > > haven't seen this check when calculating lowmem in sanity_check_meminfo.
> >> > > Someone may build platform with memory starting at 3GB and going across
> >> > > the 4GB limit.
> >> >
> >> > Good point.  We better get an early warning if that happens.
> >> >
> >>
> >> Thanks.
> >>
> >> I'm thinking of splitting the bank at the 32-bit boundary in such an event,
> >> assuming that the remaining memory should be usable as highmem.
> >
> > No.  That's not the point here.
> >
> > Let's suppose a system with 1GB of physical RAM starting at 0xe0000000.
> >
> > In this case there is no need for highmem.  However the v2p patching
> > code in the LPAE case assumes the high bits of a physical address are
> > always the same which wouldn't be the case in this hypothetical example.
> >
> > We want to make sure the kernel won't boot if a system with physical RAM
> > crossing the 4GB address mark is encountered.
> 
> I think that's too restrictive. The case with 1 or 2GB below 4GB limit
> and the rest of the RAM above is a valid one.

Valid: sure.  Likely: probably not.  That would complicate address 
decoding in hardware for little gain.

> For such configurations
> we just limit the lowmem to the memory within 4GB and leave the rest
> as highmem.

We don't want to limit lowmem.  The lowmem is very precious memory and 
we want to maximize its size.  In that case it is probably best to 
implement a real patchable 64-bit addition rather than artificially 
restricting the lowmem size.


Nicolas
Russell King - ARM Linux Sept. 24, 2012, 10:40 p.m. UTC | #12
On Mon, Sep 24, 2012 at 06:32:22PM -0400, Nicolas Pitre wrote:
> We don't want to limit lowmem.  The lowmem is very precious memory and 
> we want to maximize its size.  In that case it is probably best to 
> implement a real patchable 64-bit addition rather than artificially 
> restricting the lowmem size.

You don't need to.  You can solve the V->P translation like this:

        movw    %hi, #0xVWXY            @ fixup
        adds    %lo, %lo, #offset       @ fixup
        adc     %hi, %hi, #0

which is probably the simplest way to do the fixup - keep the existing
fixup code, and add support for that movw instruction.  And that will
work across any 4GB boundary just fine (we won't have more than 4GB of
virtual address space on a 32-bit CPU anyway, so we only have to worry
about one carry.)

And the P->V translation is truncating anyway, so that is just:

	sub	%lo, %lo, #offset

and nothing more.
Cyril Chemparathy Sept. 24, 2012, 10:53 p.m. UTC | #13
On 9/24/2012 6:40 PM, Russell King - ARM Linux wrote:
> On Mon, Sep 24, 2012 at 06:32:22PM -0400, Nicolas Pitre wrote:
>> We don't want to limit lowmem.  The lowmem is very precious memory and
>> we want to maximize its size.  In that case it is probably best to
>> implement a real patchable 64-bit addition rather than artificially
>> restricting the lowmem size.
>
> You don't need to.  You can solve the V->P translation like this:
>
>          movw    %hi, #0xVWXY            @ fixup
>          adds    %lo, %lo, #offset       @ fixup
>          adc     %hi, %hi, #0
>
> which is probably the simplest way to do the fixup - keep the existing
> fixup code, and add support for that movw instruction.  And that will
> work across any 4GB boundary just fine (we won't have more than 4GB of
> virtual address space on a 32-bit CPU anyway, so we only have to worry
> about one carry.)
>
> And the P->V translation is truncating anyway, so that is just:
>
> 	sub	%lo, %lo, #offset
>
> and nothing more.
>

The existing fixup code runs too early - before we get a chance to 
switch over from the boot-time sub-4G alias to the real physical address 
space.  Readability aside, part of the reasoning behind the C 
re-implementation was to defer the patch application until a later point 
in the boot process.

That said, we could patch once at boot, and then re-patch at switch 
over, I guess.  Is that what you're recommending?
Nicolas Pitre Sept. 24, 2012, 10:55 p.m. UTC | #14
On Mon, 24 Sep 2012, Russell King - ARM Linux wrote:

> On Mon, Sep 24, 2012 at 06:32:22PM -0400, Nicolas Pitre wrote:
> > We don't want to limit lowmem.  The lowmem is very precious memory and 
> > we want to maximize its size.  In that case it is probably best to 
> > implement a real patchable 64-bit addition rather than artificially 
> > restricting the lowmem size.
> 
> You don't need to.  You can solve the V->P translation like this:
> 
>         movw    %hi, #0xVWXY            @ fixup
>         adds    %lo, %lo, #offset       @ fixup
>         adc     %hi, %hi, #0

That's exactly what I mean when I say a real 64-bit addition.  Despite 
one of the arguments being a 32 bit value, the overflow needs to be 
carried to the high part.

> which is probably the simplest way to do the fixup - keep the existing
> fixup code, and add support for that movw instruction.  And that will
> work across any 4GB boundary just fine (we won't have more than 4GB of
> virtual address space on a 32-bit CPU anyway, so we only have to worry
> about one carry.)

Exactly.

> And the P->V translation is truncating anyway, so that is just:
> 
> 	sub	%lo, %lo, #offset
> 
> and nothing more.

Yes, that's what the posted code does already.


Nicolas
Nicolas Pitre Sept. 24, 2012, 11:03 p.m. UTC | #15
On Mon, 24 Sep 2012, Cyril Chemparathy wrote:

> On 9/24/2012 6:40 PM, Russell King - ARM Linux wrote:
> > On Mon, Sep 24, 2012 at 06:32:22PM -0400, Nicolas Pitre wrote:
> > > We don't want to limit lowmem.  The lowmem is very precious memory and
> > > we want to maximize its size.  In that case it is probably best to
> > > implement a real patchable 64-bit addition rather than artificially
> > > restricting the lowmem size.
> > 
> > You don't need to.  You can solve the V->P translation like this:
> > 
> >          movw    %hi, #0xVWXY            @ fixup
> >          adds    %lo, %lo, #offset       @ fixup
> >          adc     %hi, %hi, #0
> > 
> > which is probably the simplest way to do the fixup - keep the existing
> > fixup code, and add support for that movw instruction.  And that will
> > work across any 4GB boundary just fine (we won't have more than 4GB of
> > virtual address space on a 32-bit CPU anyway, so we only have to worry
> > about one carry.)
> > 
> > And the P->V translation is truncating anyway, so that is just:
> > 
> > 	sub	%lo, %lo, #offset
> > 
> > and nothing more.
> > 
> 
> The existing fixup code runs too early - before we get a chance to switch over
> from the boot-time sub-4G alias to the real physical address space.
> Readability aside, part of the reasoning behind the C re-implementation was to
> defer the patch application until a later point in the boot process.
> 
> That said, we could patch once at boot, and then re-patch at switch over, I
> guess.  Is that what you're recommending?

I don't think that would be wise.  Let's keep only one patching pass 
with only one implementation.

It is true that the C version is more complex, mainly due to the fact 
that the instruction fixup code is more comprehensive.  However, it is 
certainly easier to maintain in the long run, and much easier to debug.

The approach where a suboptimal stub is used until the actual patching 
takes place is pretty nice.  That helps keeping the boot code small and 
simple.


Nicolas
Russell King - ARM Linux Sept. 24, 2012, 11:08 p.m. UTC | #16
On Mon, Sep 24, 2012 at 06:53:10PM -0400, Cyril Chemparathy wrote:
> The existing fixup code runs too early - before we get a chance to  
> switch over from the boot-time sub-4G alias to the real physical address  
> space.  Readability aside, part of the reasoning behind the C  
> re-implementation was to defer the patch application until a later point  
> in the boot process.
>
> That said, we could patch once at boot, and then re-patch at switch  
> over, I guess.  Is that what you're recommending?

That would solve the problem, and probably in the least disruptive
manner too, since the existing solution will continue to work, and
we'd only need to fixup the movw instructions if we're building in
support for the rare 'high-lowmem' situation.

We don't even need any different module marking - the 'high-lowmem'
modules don't need their movw instructions fixed.
tip-bot for Dave Martin Sept. 25, 2012, 12:55 p.m. UTC | #17
On Mon, Sep 24, 2012 at 06:55:25PM -0400, Nicolas Pitre wrote:
> On Mon, 24 Sep 2012, Russell King - ARM Linux wrote:
> 
> > On Mon, Sep 24, 2012 at 06:32:22PM -0400, Nicolas Pitre wrote:
> > > We don't want to limit lowmem.  The lowmem is very precious memory and 
> > > we want to maximize its size.  In that case it is probably best to 
> > > implement a real patchable 64-bit addition rather than artificially 
> > > restricting the lowmem size.
> > 
> > You don't need to.  You can solve the V->P translation like this:
> > 
> >         movw    %hi, #0xVWXY            @ fixup
> >         adds    %lo, %lo, #offset       @ fixup
> >         adc     %hi, %hi, #0
> 
> That's exactly what I mean when I say a real 64-bit addition.  Despite 
> one of the arguments being a 32 bit value, the overflow needs to be 
> carried to the high part.
> 
> > which is probably the simplest way to do the fixup - keep the existing
> > fixup code, and add support for that movw instruction.  And that will
> > work across any 4GB boundary just fine (we won't have more than 4GB of
> > virtual address space on a 32-bit CPU anyway, so we only have to worry
> > about one carry.)
> 
> Exactly.
> 
> > And the P->V translation is truncating anyway, so that is just:
> > 
> > 	sub	%lo, %lo, #offset
> > 
> > and nothing more.
> 
> Yes, that's what the posted code does already.

Taking a step back, this is all feeling very complex.  Do we actually need
to solve the P2V patching problem for this platform?


For 32-bit platforms, especially pre-v6, small TLBs and caches, lack of
branch prediction etc. may make the performance boost from P2V patching
more significant, but I have some doubts about whether it is really
worth it for more heavyweight v7+LPAE platforms.

Would it be worth trying a simple macro implementation that reads a
global variable for this case, and investigating the performance impact?

For realistic workloads running on newer CPUs, I suspect that any costs
from this may disappear into the noise...

Cheers
---Dave
Catalin Marinas Sept. 25, 2012, 1:53 p.m. UTC | #18
On Mon, Sep 24, 2012 at 11:32:22PM +0100, Nicolas Pitre wrote:
> On Mon, 24 Sep 2012, Catalin Marinas wrote:
> 
> > On 24 September 2012 22:20, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > > On Mon, 24 Sep 2012, Cyril Chemparathy wrote:
> > >
> > >> On 9/24/2012 11:56 AM, Nicolas Pitre wrote:
> > >> > On Mon, 24 Sep 2012, Catalin Marinas wrote:
> > >> >
> > >> > > On Fri, Sep 21, 2012 at 04:56:03PM +0100, Cyril Chemparathy wrote:
> > >> > > > This patch adds support for 64-bit physical addresses in virt_to_phys()
> > >> > > > patching.  This does not do real 64-bit add/sub, but instead patches in
> > >> > > > the
> > >> > > > upper 32-bits of the phys_offset directly into the output of
> > >> > > > virt_to_phys.
> > >> > >
> > >> > > So this assumes that for the kernel linear mapping, all the physical
> > >> > > addresses have the same upper 32-bit. That's a good optimisation but I
> > >> > > haven't seen this check when calculating lowmem in sanity_check_meminfo.
> > >> > > Someone may build platform with memory starting at 3GB and going across
> > >> > > the 4GB limit.
> > >> >
> > >> > Good point.  We better get an early warning if that happens.
> > >> >
> > >>
> > >> Thanks.
> > >>
> > >> I'm thinking of splitting the bank at the 32-bit boundary in such an event,
> > >> assuming that the remaining memory should be usable as highmem.
> > >
> > > No.  That's not the point here.
> > >
> > > Let's suppose a system with 1GB of physical RAM starting at 0xe0000000.
> > >
> > > In this case there is no need for highmem.  However the v2p patching
> > > code in the LPAE case assumes the high bits of a physical address are
> > > always the same which wouldn't be the case in this hypothetical example.
> > >
> > > We want to make sure the kernel won't boot if a system with physical RAM
> > > crossing the 4GB address mark is encountered.
> > 
> > I think that's too restrictive. The case with 1 or 2GB below 4GB limit
> > and the rest of the RAM above is a valid one.
> 
> Valid: sure.  Likely: probably not.  That would complicate address 
> decoding in hardware for little gain.

You could have a block of 4GB RAM at 0x100000000 but the top 2GB aliased
at 0x80000000. It is also possible that the top 2GB are not visible to
software directly but only via the 0x80000000 alias.

But with Russell's proposal for full 64-bit address patching, no
limitations are needed.
diff mbox

Patch

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 88ca206..f3e8f88 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -154,13 +154,47 @@ 
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
 
 extern unsigned long	__pv_offset;
-extern unsigned long	__pv_phys_offset;
+extern phys_addr_t	__pv_phys_offset;
 #define PHYS_OFFSET	__virt_to_phys(PAGE_OFFSET)
 
 static inline phys_addr_t __virt_to_phys(unsigned long x)
 {
-	unsigned long t;
+	phys_addr_t t;
+
+#ifndef CONFIG_ARM_LPAE
 	early_patch_imm8("add", t, x, __pv_offset, 0);
+#else
+	unsigned long __tmp;
+
+#ifndef __ARMEB__
+#define PV_PHYS_HIGH	"(__pv_phys_offset + 4)"
+#else
+#define PV_PHYS_HIGH	"__pv_phys_offset"
+#endif
+
+	early_patch_stub(
+	/* type */		PATCH_IMM8,
+	/* code */
+		"ldr		%[tmp], =__pv_offset\n"
+		"ldr		%[tmp], [%[tmp]]\n"
+		"add		%Q[to], %[from], %[tmp]\n"
+		"ldr		%[tmp], =" PV_PHYS_HIGH "\n"
+		"ldr		%[tmp], [%[tmp]]\n"
+		"mov		%R[to], %[tmp]\n",
+	/* pad */		4,
+	/* patch_data */
+		".long		__pv_offset\n"
+		"add		%Q[to], %[from], %[imm]\n"
+		".long	"	PV_PHYS_HIGH "\n"
+		"mov		%R[to], %[imm]\n",
+	/* operands */
+		: [to]	 "=r"	(t),
+		  [tmp]	 "=&r"	(__tmp)
+		: [from] "r"	(x),
+		  [imm]	 "I"	(__IMM8),
+			 "i"	(&__pv_offset),
+			 "i"	(&__pv_phys_offset));
+#endif
 	return t;
 }
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 69a3c09..61fb8df 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -530,7 +530,11 @@  ENDPROC(__fixup_pv_offsets)
 
 	.align
 1:	.long	.
+#if defined(CONFIG_ARM_LPAE) && defined(__ARMEB__)
+	.long	__pv_phys_offset + 4
+#else
 	.long	__pv_phys_offset
+#endif
 	.long	__pv_offset
 	.long	PAGE_OFFSET
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 59e0f57..edb4f42 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -159,7 +159,7 @@  DEFINE_PER_CPU(struct cpuinfo_arm, cpu_data);
  * The initializers here prevent these from landing in the BSS section.
  */
 unsigned long __pv_offset = 0xdeadbeef;
-unsigned long __pv_phys_offset = 0xdeadbeef;
+phys_addr_t   __pv_phys_offset = 0xdeadbeef;
 EXPORT_SYMBOL(__pv_phys_offset);
 
 #endif