diff mbox series

[kvm-unit-tests,v3,15/27] arm/arm64: mmu_disable: Clean and invalidate before disabling

Message ID 20220630100324.3153655-16-nikos.nikoleris@arm.com (mailing list archive)
State New, archived
Headers show
Series EFI and ACPI support for arm64 | expand

Commit Message

Nikos Nikoleris June 30, 2022, 10:03 a.m. UTC
From: Andrew Jones <drjones@redhat.com>

The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
clean + invalidate after turning MMU off") justifies cleaning and
invalidating the dcache after disabling the MMU by saying it's nice
not to rely on the current page tables and that it should still work
(per the spec), as long as there's an identity map in the current
tables. Doing the invalidation after also somewhat helped with
reenabling the MMU without seeing stale data, but the real problem
with reenabling was because the cache needs to be disabled with
the MMU, but it wasn't.

Since we have to trust/validate that the current page tables have an
identity map anyway, then there's no harm in doing the clean
and invalidate first (it feels a little better to do so, anyway,
considering the cache maintenance instructions take virtual
addresses). Then, also disable the cache with the MMU to avoid
problems when reenabling. We invalidate the Icache and disable
that too for good measure. And, a final TLB invalidation ensures
we're crystal clean when we return from asm_mmu_disable().

Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
---
 arm/cstart.S   | 28 +++++++++++++++++++++-------
 arm/cstart64.S | 21 ++++++++++++++++-----
 2 files changed, 37 insertions(+), 12 deletions(-)

Comments

Alexandru Elisei June 30, 2022, 10:20 a.m. UTC | #1
Hi,

On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> From: Andrew Jones <drjones@redhat.com>
> 
> The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> clean + invalidate after turning MMU off") justifies cleaning and
> invalidating the dcache after disabling the MMU by saying it's nice
> not to rely on the current page tables and that it should still work
> (per the spec), as long as there's an identity map in the current
> tables. Doing the invalidation after also somewhat helped with
> reenabling the MMU without seeing stale data, but the real problem
> with reenabling was because the cache needs to be disabled with
> the MMU, but it wasn't.
> 
> Since we have to trust/validate that the current page tables have an
> identity map anyway, then there's no harm in doing the clean
> and invalidate first (it feels a little better to do so, anyway,
> considering the cache maintenance instructions take virtual
> addresses). Then, also disable the cache with the MMU to avoid
> problems when reenabling. We invalidate the Icache and disable
> that too for good measure. And, a final TLB invalidation ensures
> we're crystal clean when we return from asm_mmu_disable().

I'll point you to my previous reply [1] to this exact patch which explains
why it's incorrect and is only papering over another problem.

[1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/

Thanks,
Alex

> 
> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
> ---
>  arm/cstart.S   | 28 +++++++++++++++++++++-------
>  arm/cstart64.S | 21 ++++++++++++++++-----
>  2 files changed, 37 insertions(+), 12 deletions(-)
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 7036e67..dc324c5 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -179,6 +179,7 @@ halt:
>  .globl asm_mmu_enable
>  asm_mmu_enable:
>  	/* TLBIALL */
> +	mov	r2, #0
>  	mcr	p15, 0, r2, c8, c7, 0
>  	dsb	nsh
>  
> @@ -211,12 +212,7 @@ asm_mmu_enable:
>  
>  .globl asm_mmu_disable
>  asm_mmu_disable:
> -	/* SCTLR */
> -	mrc	p15, 0, r0, c1, c0, 0
> -	bic	r0, #CR_M
> -	mcr	p15, 0, r0, c1, c0, 0
> -	isb
> -
> +	/* Clean + invalidate the entire memory */
>  	ldr	r0, =__phys_offset
>  	ldr	r0, [r0]
>  	ldr	r1, =__phys_end
> @@ -224,7 +220,25 @@ asm_mmu_disable:
>  	sub	r1, r1, r0
>  	dcache_by_line_op dccimvac, sy, r0, r1, r2, r3
>  
> -	mov     pc, lr
> +	/* Invalidate Icache */
> +	mov	r0, #0
> +	mcr	p15, 0, r0, c7, c5, 0
> +	isb
> +
> +	/*  Disable cache, Icache and MMU */
> +	mrc	p15, 0, r0, c1, c0, 0
> +	bic	r0, #CR_C
> +	bic	r0, #CR_I
> +	bic	r0, #CR_M
> +	mcr	p15, 0, r0, c1, c0, 0
> +	isb
> +
> +	/* Invalidate TLB */
> +	mov	r0, #0
> +	mcr	p15, 0, r0, c8, c7, 0
> +	dsb	nsh
> +
> +	mov	pc, lr
>  
>  /*
>   * Vectors
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index e4ab7d0..390feb9 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -246,11 +246,6 @@ asm_mmu_enable:
>  
>  .globl asm_mmu_disable
>  asm_mmu_disable:
> -	mrs	x0, sctlr_el1
> -	bic	x0, x0, SCTLR_EL1_M
> -	msr	sctlr_el1, x0
> -	isb
> -
>  	/* Clean + invalidate the entire memory */
>  	adrp	x0, __phys_offset
>  	ldr	x0, [x0, :lo12:__phys_offset]
> @@ -259,6 +254,22 @@ asm_mmu_disable:
>  	sub	x1, x1, x0
>  	dcache_by_line_op civac, sy, x0, x1, x2, x3
>  
> +	/* Invalidate Icache */
> +	ic	iallu
> +	isb
> +
> +	/* Disable cache, Icache and MMU */
> +	mrs	x0, sctlr_el1
> +	bic	x0, x0, SCTLR_EL1_C
> +	bic	x0, x0, SCTLR_EL1_I
> +	bic	x0, x0, SCTLR_EL1_M
> +	msr	sctlr_el1, x0
> +	isb
> +
> +	/* Invalidate TLB */
> +	tlbi	vmalle1
> +	dsb	nsh
> +
>  	ret
>  
>  /*
> -- 
> 2.25.1
>
Nikos Nikoleris June 30, 2022, 11:08 a.m. UTC | #2
Hi Alex,

On 30/06/2022 11:20, Alexandru Elisei wrote:
> Hi,
> 
> On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
>> From: Andrew Jones <drjones@redhat.com>
>>
>> The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
>> clean + invalidate after turning MMU off") justifies cleaning and
>> invalidating the dcache after disabling the MMU by saying it's nice
>> not to rely on the current page tables and that it should still work
>> (per the spec), as long as there's an identity map in the current
>> tables. Doing the invalidation after also somewhat helped with
>> reenabling the MMU without seeing stale data, but the real problem
>> with reenabling was because the cache needs to be disabled with
>> the MMU, but it wasn't.
>>
>> Since we have to trust/validate that the current page tables have an
>> identity map anyway, then there's no harm in doing the clean
>> and invalidate first (it feels a little better to do so, anyway,
>> considering the cache maintenance instructions take virtual
>> addresses). Then, also disable the cache with the MMU to avoid
>> problems when reenabling. We invalidate the Icache and disable
>> that too for good measure. And, a final TLB invalidation ensures
>> we're crystal clean when we return from asm_mmu_disable().
> 
> I'll point you to my previous reply [1] to this exact patch which explains
> why it's incorrect and is only papering over another problem.
> 
> [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> 

Apologies, I didn't mean to ignore your feedback on this. There was a 
parallel discussion in [2] which I thought makes the problem more concrete.

This is Drew's patch as soon as he confirms he's also happy with the 
change you suggested in the patch description I am happy to make it.

Generally, a test will start off with the MMU enabled. At this point, we 
access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of 
the two regions could be mapped as any type of memory (I need to have 
another look to confirm if it's Normal Memory). Then we want to take 
over control of the page tables and for that reason we have to switch 
off the MMU. And any access to code or data will be with Device-nGnRnE 
as you pointed out. If we don't clean and invalidate, instructions and 
data might be in the cache and we will be mixing memory attributes, 
won't we?

[2]: 
https://lore.kernel.org/all/6c5a3ef7-3742-c4e9-5a94-c702a5b3ebca@arm.com/

Thanks,

Nikos

> Thanks,
> Alex
> 
>>
>> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>> Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
>> ---
>>   arm/cstart.S   | 28 +++++++++++++++++++++-------
>>   arm/cstart64.S | 21 ++++++++++++++++-----
>>   2 files changed, 37 insertions(+), 12 deletions(-)
>>
>> diff --git a/arm/cstart.S b/arm/cstart.S
>> index 7036e67..dc324c5 100644
>> --- a/arm/cstart.S
>> +++ b/arm/cstart.S
>> @@ -179,6 +179,7 @@ halt:
>>   .globl asm_mmu_enable
>>   asm_mmu_enable:
>>   	/* TLBIALL */
>> +	mov	r2, #0
>>   	mcr	p15, 0, r2, c8, c7, 0
>>   	dsb	nsh
>>   
>> @@ -211,12 +212,7 @@ asm_mmu_enable:
>>   
>>   .globl asm_mmu_disable
>>   asm_mmu_disable:
>> -	/* SCTLR */
>> -	mrc	p15, 0, r0, c1, c0, 0
>> -	bic	r0, #CR_M
>> -	mcr	p15, 0, r0, c1, c0, 0
>> -	isb
>> -
>> +	/* Clean + invalidate the entire memory */
>>   	ldr	r0, =__phys_offset
>>   	ldr	r0, [r0]
>>   	ldr	r1, =__phys_end
>> @@ -224,7 +220,25 @@ asm_mmu_disable:
>>   	sub	r1, r1, r0
>>   	dcache_by_line_op dccimvac, sy, r0, r1, r2, r3
>>   
>> -	mov     pc, lr
>> +	/* Invalidate Icache */
>> +	mov	r0, #0
>> +	mcr	p15, 0, r0, c7, c5, 0
>> +	isb
>> +
>> +	/*  Disable cache, Icache and MMU */
>> +	mrc	p15, 0, r0, c1, c0, 0
>> +	bic	r0, #CR_C
>> +	bic	r0, #CR_I
>> +	bic	r0, #CR_M
>> +	mcr	p15, 0, r0, c1, c0, 0
>> +	isb
>> +
>> +	/* Invalidate TLB */
>> +	mov	r0, #0
>> +	mcr	p15, 0, r0, c8, c7, 0
>> +	dsb	nsh
>> +
>> +	mov	pc, lr
>>   
>>   /*
>>    * Vectors
>> diff --git a/arm/cstart64.S b/arm/cstart64.S
>> index e4ab7d0..390feb9 100644
>> --- a/arm/cstart64.S
>> +++ b/arm/cstart64.S
>> @@ -246,11 +246,6 @@ asm_mmu_enable:
>>   
>>   .globl asm_mmu_disable
>>   asm_mmu_disable:
>> -	mrs	x0, sctlr_el1
>> -	bic	x0, x0, SCTLR_EL1_M
>> -	msr	sctlr_el1, x0
>> -	isb
>> -
>>   	/* Clean + invalidate the entire memory */
>>   	adrp	x0, __phys_offset
>>   	ldr	x0, [x0, :lo12:__phys_offset]
>> @@ -259,6 +254,22 @@ asm_mmu_disable:
>>   	sub	x1, x1, x0
>>   	dcache_by_line_op civac, sy, x0, x1, x2, x3
>>   
>> +	/* Invalidate Icache */
>> +	ic	iallu
>> +	isb
>> +
>> +	/* Disable cache, Icache and MMU */
>> +	mrs	x0, sctlr_el1
>> +	bic	x0, x0, SCTLR_EL1_C
>> +	bic	x0, x0, SCTLR_EL1_I
>> +	bic	x0, x0, SCTLR_EL1_M
>> +	msr	sctlr_el1, x0
>> +	isb
>> +
>> +	/* Invalidate TLB */
>> +	tlbi	vmalle1
>> +	dsb	nsh
>> +
>>   	ret
>>   
>>   /*
>> -- 
>> 2.25.1
>>
Alexandru Elisei June 30, 2022, 11:24 a.m. UTC | #3
Hi,

On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
> Hi Alex,
> 
> On 30/06/2022 11:20, Alexandru Elisei wrote:
> > Hi,
> > 
> > On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> > > From: Andrew Jones <drjones@redhat.com>
> > > 
> > > The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> > > clean + invalidate after turning MMU off") justifies cleaning and
> > > invalidating the dcache after disabling the MMU by saying it's nice
> > > not to rely on the current page tables and that it should still work
> > > (per the spec), as long as there's an identity map in the current
> > > tables. Doing the invalidation after also somewhat helped with
> > > reenabling the MMU without seeing stale data, but the real problem
> > > with reenabling was because the cache needs to be disabled with
> > > the MMU, but it wasn't.
> > > 
> > > Since we have to trust/validate that the current page tables have an
> > > identity map anyway, then there's no harm in doing the clean
> > > and invalidate first (it feels a little better to do so, anyway,
> > > considering the cache maintenance instructions take virtual
> > > addresses). Then, also disable the cache with the MMU to avoid
> > > problems when reenabling. We invalidate the Icache and disable
> > > that too for good measure. And, a final TLB invalidation ensures
> > > we're crystal clean when we return from asm_mmu_disable().
> > 
> > I'll point you to my previous reply [1] to this exact patch which explains
> > why it's incorrect and is only papering over another problem.
> > 
> > [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> > 
> 
> Apologies, I didn't mean to ignore your feedback on this. There was a
> parallel discussion in [2] which I thought makes the problem more concrete.

No problem, I figured as much :).

> 
> This is Drew's patch as soon as he confirms he's also happy with the change
> you suggested in the patch description I am happy to make it.
> 
> Generally, a test will start off with the MMU enabled. At this point, we
> access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
> two regions could be mapped as any type of memory (I need to have another
> look to confirm if it's Normal Memory). Then we want to take over control of
> the page tables and for that reason we have to switch off the MMU. And any
> access to code or data will be with Device-nGnRnE as you pointed out. If we
> don't clean and invalidate, instructions and data might be in the cache and
> we will be mixing memory attributes, won't we?

I missed that comment, sorry. I've replied to that comment made in v2,
here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
systems that boot through EFI").

This is the second time you've mentioned mixed memory attributes, so I'm
going to reiterate the question I asked in patch #19: what do you mean by
"mixing memory attributes" and what is wrong with it? Because it looks to
me like you're saying that you cannot access data written with the MMU on
when the MMU is off (and I assume the other way around, you cannot data
written with the MMU off when the MMU is on).

Thanks,
Alex
Nikos Nikoleris June 30, 2022, 3:16 p.m. UTC | #4
Hi Alex,

On 30/06/2022 12:24, Alexandru Elisei wrote:
> Hi,
> 
> On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
>> Hi Alex,
>>
>> On 30/06/2022 11:20, Alexandru Elisei wrote:
>>> Hi,
>>>
>>> On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
>>>> From: Andrew Jones <drjones@redhat.com>
>>>>
>>>> The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
>>>> clean + invalidate after turning MMU off") justifies cleaning and
>>>> invalidating the dcache after disabling the MMU by saying it's nice
>>>> not to rely on the current page tables and that it should still work
>>>> (per the spec), as long as there's an identity map in the current
>>>> tables. Doing the invalidation after also somewhat helped with
>>>> reenabling the MMU without seeing stale data, but the real problem
>>>> with reenabling was because the cache needs to be disabled with
>>>> the MMU, but it wasn't.
>>>>
>>>> Since we have to trust/validate that the current page tables have an
>>>> identity map anyway, then there's no harm in doing the clean
>>>> and invalidate first (it feels a little better to do so, anyway,
>>>> considering the cache maintenance instructions take virtual
>>>> addresses). Then, also disable the cache with the MMU to avoid
>>>> problems when reenabling. We invalidate the Icache and disable
>>>> that too for good measure. And, a final TLB invalidation ensures
>>>> we're crystal clean when we return from asm_mmu_disable().
>>>
>>> I'll point you to my previous reply [1] to this exact patch which explains
>>> why it's incorrect and is only papering over another problem.
>>>
>>> [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
>>>
>>
>> Apologies, I didn't mean to ignore your feedback on this. There was a
>> parallel discussion in [2] which I thought makes the problem more concrete.
> 
> No problem, I figured as much :).
> 
>>
>> This is Drew's patch as soon as he confirms he's also happy with the change
>> you suggested in the patch description I am happy to make it.
>>
>> Generally, a test will start off with the MMU enabled. At this point, we
>> access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
>> two regions could be mapped as any type of memory (I need to have another
>> look to confirm if it's Normal Memory). Then we want to take over control of
>> the page tables and for that reason we have to switch off the MMU. And any
>> access to code or data will be with Device-nGnRnE as you pointed out. If we
>> don't clean and invalidate, instructions and data might be in the cache and
>> we will be mixing memory attributes, won't we?
> 
> I missed that comment, sorry. I've replied to that comment made in v2,
> here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
> systems that boot through EFI").
> 
> This is the second time you've mentioned mixed memory attributes, so I'm
> going to reiterate the question I asked in patch #19: what do you mean by
> "mixing memory attributes" and what is wrong with it? Because it looks to
> me like you're saying that you cannot access data written with the MMU on
> when the MMU is off (and I assume the other way around, you cannot data
> written with the MMU off when the MMU is on).
> 

What I mean by mixing memory attributes is illustrated by the following 
example.

Take a memory location x, for which the page table entry maps to a 
physical location as Normal, Inner-Shareable, Inner-writeback and 
Outer-writeback. If we access it when the MMU is on and subquently when 
the MMU is off (treated as Device-nGnRnE), then we have two accesses 
with mismatched memory attributes to the same location. There is a whole 
section in the Arm ARM on why this needs to be avoided (B2.8 Mismatched 
memory attributes) but the result is "a loss of the uniprocessor 
semantics, ordering, or coherency". As I understand, the solution to 
this is:

"If the mismatched attributes for a Location mean that multiple 
cacheable accesses to the Location might be made with different 
shareability attributes, then uniprocessor semantics, ordering, and 
coherency are guaranteed only if:
• Software running on a PE cleans and invalidates a Location from cache 
before and after each read or write to that Location by that PE.
• A DMB barrier with scope that covers the full shareability of the 
accesses is placed between any accesses to the same memory Location that 
use different attributes."

So unless UEFI maps all memory as Device-nGnRnE we have to do something. 
I will try to find out more about UEFI's page tables.

Thanks,

Nikos


> Thanks,
> Alex
Alexandru Elisei June 30, 2022, 3:57 p.m. UTC | #5
Hi,

On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
> Hi Alex,
> 
> On 30/06/2022 12:24, Alexandru Elisei wrote:
> > Hi,
> > 
> > On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
> > > Hi Alex,
> > > 
> > > On 30/06/2022 11:20, Alexandru Elisei wrote:
> > > > Hi,
> > > > 
> > > > On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> > > > > From: Andrew Jones <drjones@redhat.com>
> > > > > 
> > > > > The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> > > > > clean + invalidate after turning MMU off") justifies cleaning and
> > > > > invalidating the dcache after disabling the MMU by saying it's nice
> > > > > not to rely on the current page tables and that it should still work
> > > > > (per the spec), as long as there's an identity map in the current
> > > > > tables. Doing the invalidation after also somewhat helped with
> > > > > reenabling the MMU without seeing stale data, but the real problem
> > > > > with reenabling was because the cache needs to be disabled with
> > > > > the MMU, but it wasn't.
> > > > > 
> > > > > Since we have to trust/validate that the current page tables have an
> > > > > identity map anyway, then there's no harm in doing the clean
> > > > > and invalidate first (it feels a little better to do so, anyway,
> > > > > considering the cache maintenance instructions take virtual
> > > > > addresses). Then, also disable the cache with the MMU to avoid
> > > > > problems when reenabling. We invalidate the Icache and disable
> > > > > that too for good measure. And, a final TLB invalidation ensures
> > > > > we're crystal clean when we return from asm_mmu_disable().
> > > > 
> > > > I'll point you to my previous reply [1] to this exact patch which explains
> > > > why it's incorrect and is only papering over another problem.
> > > > 
> > > > [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> > > > 
> > > 
> > > Apologies, I didn't mean to ignore your feedback on this. There was a
> > > parallel discussion in [2] which I thought makes the problem more concrete.
> > 
> > No problem, I figured as much :).
> > 
> > > 
> > > This is Drew's patch as soon as he confirms he's also happy with the change
> > > you suggested in the patch description I am happy to make it.
> > > 
> > > Generally, a test will start off with the MMU enabled. At this point, we
> > > access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
> > > two regions could be mapped as any type of memory (I need to have another
> > > look to confirm if it's Normal Memory). Then we want to take over control of
> > > the page tables and for that reason we have to switch off the MMU. And any
> > > access to code or data will be with Device-nGnRnE as you pointed out. If we
> > > don't clean and invalidate, instructions and data might be in the cache and
> > > we will be mixing memory attributes, won't we?
> > 
> > I missed that comment, sorry. I've replied to that comment made in v2,
> > here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
> > systems that boot through EFI").
> > 
> > This is the second time you've mentioned mixed memory attributes, so I'm
> > going to reiterate the question I asked in patch #19: what do you mean by
> > "mixing memory attributes" and what is wrong with it? Because it looks to
> > me like you're saying that you cannot access data written with the MMU on
> > when the MMU is off (and I assume the other way around, you cannot data
> > written with the MMU off when the MMU is on).
> > 
> 
> What I mean by mixing memory attributes is illustrated by the following
> example.
> 
> Take a memory location x, for which the page table entry maps to a physical
> location as Normal, Inner-Shareable, Inner-writeback and Outer-writeback. If
> we access it when the MMU is on and subquently when the MMU is off (treated
> as Device-nGnRnE), then we have two accesses with mismatched memory
> attributes to the same location. There is a whole section in the Arm ARM on
> why this needs to be avoided (B2.8 Mismatched memory attributes) but the
> result is "a loss of the uniprocessor semantics, ordering, or coherency". As
> I understand, the solution to this is:
> 
> "If the mismatched attributes for a Location mean that multiple cacheable
> accesses to the Location might be made with different shareability
> attributes, then uniprocessor semantics, ordering, and coherency are
> guaranteed only if:
> • Software running on a PE cleans and invalidates a Location from cache
> before and after each read or write to that Location by that PE.
> • A DMB barrier with scope that covers the full shareability of the accesses
> is placed between any accesses to the same memory Location that use
> different attributes."

Ok, so this is about *mismatched* memory attributes. I searched the Arm ARM
for the string "mixed" and nothing relevant came up.

Device-whatever memory is outer shareable and kvm-unit-tests maps memory as
inner shareable, so that matches the "different shareability attributes"
part of the paragraph.

But I would like to point out that there is only one type of cacheable
access that is being performed, when the MMU is on. When the MMU is off,
the access is not cacheable. So there are two types of accesses being
performed:

- cacheable + inner-shareable (MMU on)
- non-cacheable + outer-shareable (MMU off)

It looks to me like the paragraph doesn't apply to our case, because there
are no "multiple cacheable accesses [..] made with different shareability
attributes". Do you agree?

> 
> So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
> will try to find out more about UEFI's page tables.

That's important to know, especially regarding the text section of the
image. If UEFI doesnt' clean it to PoC, kvm-unit-tests must do it in order
to execute correctly with the MMU off.

Thanks,
Alex

> 
> Thanks,
> 
> Nikos
> 
> 
> > Thanks,
> > Alex
Andrew Jones July 1, 2022, 9:12 a.m. UTC | #6
On Thu, Jun 30, 2022 at 04:57:39PM +0100, Alexandru Elisei wrote:
> Hi,
> 
> On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
> > Hi Alex,
> > 
> > On 30/06/2022 12:24, Alexandru Elisei wrote:
> > > Hi,
> > > 
> > > On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
> > > > Hi Alex,
> > > > 
> > > > On 30/06/2022 11:20, Alexandru Elisei wrote:
> > > > > Hi,
> > > > > 
> > > > > On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> > > > > > From: Andrew Jones <drjones@redhat.com>
> > > > > > 
> > > > > > The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> > > > > > clean + invalidate after turning MMU off") justifies cleaning and
> > > > > > invalidating the dcache after disabling the MMU by saying it's nice
> > > > > > not to rely on the current page tables and that it should still work
> > > > > > (per the spec), as long as there's an identity map in the current
> > > > > > tables. Doing the invalidation after also somewhat helped with
> > > > > > reenabling the MMU without seeing stale data, but the real problem
> > > > > > with reenabling was because the cache needs to be disabled with
> > > > > > the MMU, but it wasn't.
> > > > > > 
> > > > > > Since we have to trust/validate that the current page tables have an
> > > > > > identity map anyway, then there's no harm in doing the clean
> > > > > > and invalidate first (it feels a little better to do so, anyway,
> > > > > > considering the cache maintenance instructions take virtual
> > > > > > addresses). Then, also disable the cache with the MMU to avoid
> > > > > > problems when reenabling. We invalidate the Icache and disable
> > > > > > that too for good measure. And, a final TLB invalidation ensures
> > > > > > we're crystal clean when we return from asm_mmu_disable().
> > > > > 
> > > > > I'll point you to my previous reply [1] to this exact patch which explains
> > > > > why it's incorrect and is only papering over another problem.
> > > > > 
> > > > > [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> > > > > 
> > > > 
> > > > Apologies, I didn't mean to ignore your feedback on this. There was a
> > > > parallel discussion in [2] which I thought makes the problem more concrete.
> > > 
> > > No problem, I figured as much :).
> > > 
> > > > 
> > > > This is Drew's patch as soon as he confirms he's also happy with the change
> > > > you suggested in the patch description I am happy to make it.
> > > > 
> > > > Generally, a test will start off with the MMU enabled. At this point, we
> > > > access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
> > > > two regions could be mapped as any type of memory (I need to have another
> > > > look to confirm if it's Normal Memory). Then we want to take over control of
> > > > the page tables and for that reason we have to switch off the MMU. And any
> > > > access to code or data will be with Device-nGnRnE as you pointed out. If we
> > > > don't clean and invalidate, instructions and data might be in the cache and
> > > > we will be mixing memory attributes, won't we?
> > > 
> > > I missed that comment, sorry. I've replied to that comment made in v2,
> > > here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
> > > systems that boot through EFI").
> > > 
> > > This is the second time you've mentioned mixed memory attributes, so I'm
> > > going to reiterate the question I asked in patch #19: what do you mean by
> > > "mixing memory attributes" and what is wrong with it? Because it looks to
> > > me like you're saying that you cannot access data written with the MMU on
> > > when the MMU is off (and I assume the other way around, you cannot data
> > > written with the MMU off when the MMU is on).
> > > 
> > 
> > What I mean by mixing memory attributes is illustrated by the following
> > example.
> > 
> > Take a memory location x, for which the page table entry maps to a physical
> > location as Normal, Inner-Shareable, Inner-writeback and Outer-writeback. If
> > we access it when the MMU is on and subquently when the MMU is off (treated
> > as Device-nGnRnE), then we have two accesses with mismatched memory
> > attributes to the same location. There is a whole section in the Arm ARM on
> > why this needs to be avoided (B2.8 Mismatched memory attributes) but the
> > result is "a loss of the uniprocessor semantics, ordering, or coherency". As
> > I understand, the solution to this is:
> > 
> > "If the mismatched attributes for a Location mean that multiple cacheable
> > accesses to the Location might be made with different shareability
> > attributes, then uniprocessor semantics, ordering, and coherency are
> > guaranteed only if:
> > • Software running on a PE cleans and invalidates a Location from cache
> > before and after each read or write to that Location by that PE.
> > • A DMB barrier with scope that covers the full shareability of the accesses
> > is placed between any accesses to the same memory Location that use
> > different attributes."
> 
> Ok, so this is about *mismatched* memory attributes. I searched the Arm ARM
> for the string "mixed" and nothing relevant came up.
> 
> Device-whatever memory is outer shareable and kvm-unit-tests maps memory as
> inner shareable, so that matches the "different shareability attributes"
> part of the paragraph.
> 
> But I would like to point out that there is only one type of cacheable
> access that is being performed, when the MMU is on. When the MMU is off,
> the access is not cacheable. So there are two types of accesses being
> performed:
> 
> - cacheable + inner-shareable (MMU on)
> - non-cacheable + outer-shareable (MMU off)
> 
> It looks to me like the paragraph doesn't apply to our case, because there
> are no "multiple cacheable accesses [..] made with different shareability
> attributes". Do you agree?
> 
> > 
> > So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
> > will try to find out more about UEFI's page tables.
> 
> That's important to know, especially regarding the text section of the
> image. If UEFI doesnt' clean it to PoC, kvm-unit-tests must do it in order
> to execute correctly with the MMU off.
>

Hi Alex and Nikos,

Indeed my experiments on bare-metal made this change necessary. I'm happy
to see this discussion, though, as this patch could be tweaked or at least
the commit message improved in order to better explain what's going on and
why the changes are necessary. IOW, I have no problem with this patch
being dropped and replaced by one of you with something that "makes
more sense" as long as the outcome (coherent execution on bare-metal)
still works.

Thanks,
drew
Alexandru Elisei July 1, 2022, 10:24 a.m. UTC | #7
Hi,

On Fri, Jul 01, 2022 at 11:12:14AM +0200, Andrew Jones wrote:
> On Thu, Jun 30, 2022 at 04:57:39PM +0100, Alexandru Elisei wrote:
> > Hi,
> > 
> > On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
> > > Hi Alex,
> > > 
> > > On 30/06/2022 12:24, Alexandru Elisei wrote:
> > > > Hi,
> > > > 
> > > > On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
> > > > > Hi Alex,
> > > > > 
> > > > > On 30/06/2022 11:20, Alexandru Elisei wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> > > > > > > From: Andrew Jones <drjones@redhat.com>
> > > > > > > 
> > > > > > > The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> > > > > > > clean + invalidate after turning MMU off") justifies cleaning and
> > > > > > > invalidating the dcache after disabling the MMU by saying it's nice
> > > > > > > not to rely on the current page tables and that it should still work
> > > > > > > (per the spec), as long as there's an identity map in the current
> > > > > > > tables. Doing the invalidation after also somewhat helped with
> > > > > > > reenabling the MMU without seeing stale data, but the real problem
> > > > > > > with reenabling was because the cache needs to be disabled with
> > > > > > > the MMU, but it wasn't.
> > > > > > > 
> > > > > > > Since we have to trust/validate that the current page tables have an
> > > > > > > identity map anyway, then there's no harm in doing the clean
> > > > > > > and invalidate first (it feels a little better to do so, anyway,
> > > > > > > considering the cache maintenance instructions take virtual
> > > > > > > addresses). Then, also disable the cache with the MMU to avoid
> > > > > > > problems when reenabling. We invalidate the Icache and disable
> > > > > > > that too for good measure. And, a final TLB invalidation ensures
> > > > > > > we're crystal clean when we return from asm_mmu_disable().
> > > > > > 
> > > > > > I'll point you to my previous reply [1] to this exact patch which explains
> > > > > > why it's incorrect and is only papering over another problem.
> > > > > > 
> > > > > > [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> > > > > > 
> > > > > 
> > > > > Apologies, I didn't mean to ignore your feedback on this. There was a
> > > > > parallel discussion in [2] which I thought makes the problem more concrete.
> > > > 
> > > > No problem, I figured as much :).
> > > > 
> > > > > 
> > > > > This is Drew's patch as soon as he confirms he's also happy with the change
> > > > > you suggested in the patch description I am happy to make it.
> > > > > 
> > > > > Generally, a test will start off with the MMU enabled. At this point, we
> > > > > access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
> > > > > two regions could be mapped as any type of memory (I need to have another
> > > > > look to confirm if it's Normal Memory). Then we want to take over control of
> > > > > the page tables and for that reason we have to switch off the MMU. And any
> > > > > access to code or data will be with Device-nGnRnE as you pointed out. If we
> > > > > don't clean and invalidate, instructions and data might be in the cache and
> > > > > we will be mixing memory attributes, won't we?
> > > > 
> > > > I missed that comment, sorry. I've replied to that comment made in v2,
> > > > here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
> > > > systems that boot through EFI").
> > > > 
> > > > This is the second time you've mentioned mixed memory attributes, so I'm
> > > > going to reiterate the question I asked in patch #19: what do you mean by
> > > > "mixing memory attributes" and what is wrong with it? Because it looks to
> > > > me like you're saying that you cannot access data written with the MMU on
> > > > when the MMU is off (and I assume the other way around, you cannot data
> > > > written with the MMU off when the MMU is on).
> > > > 
> > > 
> > > What I mean by mixing memory attributes is illustrated by the following
> > > example.
> > > 
> > > Take a memory location x, for which the page table entry maps to a physical
> > > location as Normal, Inner-Shareable, Inner-writeback and Outer-writeback. If
> > > we access it when the MMU is on and subquently when the MMU is off (treated
> > > as Device-nGnRnE), then we have two accesses with mismatched memory
> > > attributes to the same location. There is a whole section in the Arm ARM on
> > > why this needs to be avoided (B2.8 Mismatched memory attributes) but the
> > > result is "a loss of the uniprocessor semantics, ordering, or coherency". As
> > > I understand, the solution to this is:
> > > 
> > > "If the mismatched attributes for a Location mean that multiple cacheable
> > > accesses to the Location might be made with different shareability
> > > attributes, then uniprocessor semantics, ordering, and coherency are
> > > guaranteed only if:
> > > • Software running on a PE cleans and invalidates a Location from cache
> > > before and after each read or write to that Location by that PE.
> > > • A DMB barrier with scope that covers the full shareability of the accesses
> > > is placed between any accesses to the same memory Location that use
> > > different attributes."
> > 
> > Ok, so this is about *mismatched* memory attributes. I searched the Arm ARM
> > for the string "mixed" and nothing relevant came up.
> > 
> > Device-whatever memory is outer shareable and kvm-unit-tests maps memory as
> > inner shareable, so that matches the "different shareability attributes"
> > part of the paragraph.
> > 
> > But I would like to point out that there is only one type of cacheable
> > access that is being performed, when the MMU is on. When the MMU is off,
> > the access is not cacheable. So there are two types of accesses being
> > performed:
> > 
> > - cacheable + inner-shareable (MMU on)
> > - non-cacheable + outer-shareable (MMU off)
> > 
> > It looks to me like the paragraph doesn't apply to our case, because there
> > are no "multiple cacheable accesses [..] made with different shareability
> > attributes". Do you agree?
> > 
> > > 
> > > So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
> > > will try to find out more about UEFI's page tables.
> > 
> > That's important to know, especially regarding the text section of the
> > image. If UEFI doesnt' clean it to PoC, kvm-unit-tests must do it in order
> > to execute correctly with the MMU off.
> >
> 
> Hi Alex and Nikos,
> 
> Indeed my experiments on bare-metal made this change necessary. I'm happy
> to see this discussion, though, as this patch could be tweaked or at least
> the commit message improved in order to better explain what's going on and
> why the changes are necessary. IOW, I have no problem with this patch

If you fix the commit message to be architecturally correct then you will
come to the conclusion that the patch is architecturally incorrect because
while it fixes the problem you were seeing, it breaks asm_mmu_disable in
all other cases.

The problem you were seeing according to my investigation was this:

__phys_offset and __phys_end are written with the MMU on and the most up to
date value is in the cache. When the MMU is turned off, the value that
asm_mmu_disable reads is the stale value from main memory and it will not
clean + invalidate all the memory, which is what we want. This assumes that
UEFI cleaned the image to PoC, otherwise, that will need to be cleaned too
by kvm-unit-tests before turning off the MMU.

This was explained before, both on your original UEFI support series on
github [1], and on this list.

As for why it breaks asm_mmu_disable for all other cases:

The purpose of the clean in asm_mmu_disable is for the CPU to sync the
caches with main memory when the MMU is turned off (to propagate the most
up-to-date value from the cache to main memory); the purpose of the
invalidate is to make sure that the CPU reads from main memory instead of
the cache once the MMU is turned back on - if the cache line is still
valid, the CPU wll read the values written *before* the MMU was turned
off, not the values written *after* the MMU was turned off.

If you do the dcache clean + invalidate *before* turning the MMU on, the
CPU can speculate a read and allocate a new cache line before the MMU is
turned off, which would make the invalidate useless. Speculation is
prohibited with the MMU off, that's why the invalidate must be done with
the MMU off.

Because of this reason I believe the patch is incorrect.

> being dropped and replaced by one of you with something that "makes
> more sense" as long as the outcome (coherent execution on bare-metal)
> still works.

Hmm... maybe an experiment will work. I propose the following:

1. Revert this patch.
2. Apply this diff on top of the series:

diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 30d04d0eb100..913f4088d96c 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -374,6 +374,11 @@ static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo)
                }
        }
        __phys_end &= PHYS_MASK;
+
+       asm volatile("dc cvau, %0\n" :: "r" (&__phys_offset) : "memory");
+       asm volatile("dc cvau, %0\n" :: "r" (&__phys_end) : "memory");
+       dsb(sy);
+
        asm_mmu_disable();

        if (free_mem_pages == 0)

This is the solution, based on an architectural explanation of what we were
observing, that I proposed on your github branch, a solution that you've
tested with the result:

"I tested at least 10 times (lost count) with a build where "arm/arm64:
mmu_disable: Clean and invalidate before disabling" was reverted from the
target-efi branch and your hack was applied. It worked every time."

[1] https://github.com/rhdrjones/kvm-unit-tests/commit/fc58684bc47b7d07d75098fdfddb6083e9b12104#commitcomment-44222926

Thanks,
Alex
Andrew Jones July 1, 2022, 10:36 a.m. UTC | #8
On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
...
> So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
> will try to find out more about UEFI's page tables.
>

UEFI will map memory as normal-cacheable. You may find section 2.3.6 of
the UEFI spec informative.

Thanks,
drew
Andrew Jones July 1, 2022, 11:16 a.m. UTC | #9
On Fri, Jul 01, 2022 at 11:24:44AM +0100, Alexandru Elisei wrote:
...
> > being dropped and replaced by one of you with something that "makes
> > more sense" as long as the outcome (coherent execution on bare-metal)
> > still works.
> 
> Hmm... maybe an experiment will work. I propose the following:
> 
> 1. Revert this patch.
> 2. Apply this diff on top of the series:
> 
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 30d04d0eb100..913f4088d96c 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -374,6 +374,11 @@ static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo)
>                 }
>         }
>         __phys_end &= PHYS_MASK;
> +
> +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_offset) : "memory");
> +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_end) : "memory");
> +       dsb(sy);
> +
>         asm_mmu_disable();
> 
>         if (free_mem_pages == 0)
> 
> This is the solution, based on an architectural explanation of what we were
> observing, that I proposed on your github branch, a solution that you've
> tested with the result:
> 
> "I tested at least 10 times (lost count) with a build where "arm/arm64:
> mmu_disable: Clean and invalidate before disabling" was reverted from the
> target-efi branch and your hack was applied. It worked every time."
> 
> [1] https://github.com/rhdrjones/kvm-unit-tests/commit/fc58684bc47b7d07d75098fdfddb6083e9b12104#commitcomment-44222926
>

Hi Alex,

Thanks for digging that back up. I had lost track of it. The last comment
is you saying that you'll send a proper patch. Did you send one that got
lost? If not, would you like to send one now that Nikos can incorporate?

Thanks,
drew
Nikos Nikoleris July 1, 2022, 11:34 a.m. UTC | #10
On 01/07/2022 11:24, Alexandru Elisei wrote:
> Hi,
> 
> On Fri, Jul 01, 2022 at 11:12:14AM +0200, Andrew Jones wrote:
>> On Thu, Jun 30, 2022 at 04:57:39PM +0100, Alexandru Elisei wrote:
>>> Hi,
>>>
>>> On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
>>>> Hi Alex,
>>>>
>>>> On 30/06/2022 12:24, Alexandru Elisei wrote:
>>>>> Hi,
>>>>>
>>>>> On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
>>>>>> Hi Alex,
>>>>>>
>>>>>> On 30/06/2022 11:20, Alexandru Elisei wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
>>>>>>>> From: Andrew Jones <drjones@redhat.com>
>>>>>>>>
>>>>>>>> The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
>>>>>>>> clean + invalidate after turning MMU off") justifies cleaning and
>>>>>>>> invalidating the dcache after disabling the MMU by saying it's nice
>>>>>>>> not to rely on the current page tables and that it should still work
>>>>>>>> (per the spec), as long as there's an identity map in the current
>>>>>>>> tables. Doing the invalidation after also somewhat helped with
>>>>>>>> reenabling the MMU without seeing stale data, but the real problem
>>>>>>>> with reenabling was because the cache needs to be disabled with
>>>>>>>> the MMU, but it wasn't.
>>>>>>>>
>>>>>>>> Since we have to trust/validate that the current page tables have an
>>>>>>>> identity map anyway, then there's no harm in doing the clean
>>>>>>>> and invalidate first (it feels a little better to do so, anyway,
>>>>>>>> considering the cache maintenance instructions take virtual
>>>>>>>> addresses). Then, also disable the cache with the MMU to avoid
>>>>>>>> problems when reenabling. We invalidate the Icache and disable
>>>>>>>> that too for good measure. And, a final TLB invalidation ensures
>>>>>>>> we're crystal clean when we return from asm_mmu_disable().
>>>>>>>
>>>>>>> I'll point you to my previous reply [1] to this exact patch which explains
>>>>>>> why it's incorrect and is only papering over another problem.
>>>>>>>
>>>>>>> [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
>>>>>>>
>>>>>>
>>>>>> Apologies, I didn't mean to ignore your feedback on this. There was a
>>>>>> parallel discussion in [2] which I thought makes the problem more concrete.
>>>>>
>>>>> No problem, I figured as much :).
>>>>>
>>>>>>
>>>>>> This is Drew's patch as soon as he confirms he's also happy with the change
>>>>>> you suggested in the patch description I am happy to make it.
>>>>>>
>>>>>> Generally, a test will start off with the MMU enabled. At this point, we
>>>>>> access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
>>>>>> two regions could be mapped as any type of memory (I need to have another
>>>>>> look to confirm if it's Normal Memory). Then we want to take over control of
>>>>>> the page tables and for that reason we have to switch off the MMU. And any
>>>>>> access to code or data will be with Device-nGnRnE as you pointed out. If we
>>>>>> don't clean and invalidate, instructions and data might be in the cache and
>>>>>> we will be mixing memory attributes, won't we?
>>>>>
>>>>> I missed that comment, sorry. I've replied to that comment made in v2,
>>>>> here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
>>>>> systems that boot through EFI").
>>>>>
>>>>> This is the second time you've mentioned mixed memory attributes, so I'm
>>>>> going to reiterate the question I asked in patch #19: what do you mean by
>>>>> "mixing memory attributes" and what is wrong with it? Because it looks to
>>>>> me like you're saying that you cannot access data written with the MMU on
>>>>> when the MMU is off (and I assume the other way around, you cannot data
>>>>> written with the MMU off when the MMU is on).
>>>>>
>>>>
>>>> What I mean by mixing memory attributes is illustrated by the following
>>>> example.
>>>>
>>>> Take a memory location x, for which the page table entry maps to a physical
>>>> location as Normal, Inner-Shareable, Inner-writeback and Outer-writeback. If
>>>> we access it when the MMU is on and subquently when the MMU is off (treated
>>>> as Device-nGnRnE), then we have two accesses with mismatched memory
>>>> attributes to the same location. There is a whole section in the Arm ARM on
>>>> why this needs to be avoided (B2.8 Mismatched memory attributes) but the
>>>> result is "a loss of the uniprocessor semantics, ordering, or coherency". As
>>>> I understand, the solution to this is:
>>>>
>>>> "If the mismatched attributes for a Location mean that multiple cacheable
>>>> accesses to the Location might be made with different shareability
>>>> attributes, then uniprocessor semantics, ordering, and coherency are
>>>> guaranteed only if:
>>>> • Software running on a PE cleans and invalidates a Location from cache
>>>> before and after each read or write to that Location by that PE.
>>>> • A DMB barrier with scope that covers the full shareability of the accesses
>>>> is placed between any accesses to the same memory Location that use
>>>> different attributes."
>>>
>>> Ok, so this is about *mismatched* memory attributes. I searched the Arm ARM
>>> for the string "mixed" and nothing relevant came up.
>>>
>>> Device-whatever memory is outer shareable and kvm-unit-tests maps memory as
>>> inner shareable, so that matches the "different shareability attributes"
>>> part of the paragraph.
>>>
>>> But I would like to point out that there is only one type of cacheable
>>> access that is being performed, when the MMU is on. When the MMU is off,
>>> the access is not cacheable. So there are two types of accesses being
>>> performed:
>>>
>>> - cacheable + inner-shareable (MMU on)
>>> - non-cacheable + outer-shareable (MMU off)
>>>
>>> It looks to me like the paragraph doesn't apply to our case, because there
>>> are no "multiple cacheable accesses [..] made with different shareability
>>> attributes". Do you agree?
>>>

No, I think the rules on mismatched memory attributes still apply.

"Physical memory locations are accessed with mismatched attributes if 
all accesses to the location do not use a common definition of all of 
the following attributes of that location:
• Memory type: Device-nGnRnE, Device-nGnRE, Device-nGRE, Device-GRE or 
Normal."

When we're switching off the MMU, we're changing the type of memory for 
many of the efi regions and therefore the rules on mismatched memory 
attributes still apply.

>>>>
>>>> So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
>>>> will try to find out more about UEFI's page tables.
>>>
>>> That's important to know, especially regarding the text section of the
>>> image. If UEFI doesnt' clean it to PoC, kvm-unit-tests must do it in order
>>> to execute correctly with the MMU off.
>>>

Drew confirmed that UEFI page tables map most memory as Normal, Cacheable.

>>
>> Hi Alex and Nikos,
>>
>> Indeed my experiments on bare-metal made this change necessary. I'm happy
>> to see this discussion, though, as this patch could be tweaked or at least
>> the commit message improved in order to better explain what's going on and
>> why the changes are necessary. IOW, I have no problem with this patch
> 
> If you fix the commit message to be architecturally correct then you will
> come to the conclusion that the patch is architecturally incorrect because
> while it fixes the problem you were seeing, it breaks asm_mmu_disable in
> all other cases.
> > The problem you were seeing according to my investigation was this:
> 
> __phys_offset and __phys_end are written with the MMU on and the most up to
> date value is in the cache. When the MMU is turned off, the value that
> asm_mmu_disable reads is the stale value from main memory and it will not
> clean + invalidate all the memory, which is what we want. This assumes that
> UEFI cleaned the image to PoC, otherwise, that will need to be cleaned too
> by kvm-unit-tests before turning off the MMU.

The clean and/or invalidate instructions are also affected by the Memory 
type attributes so any operation after we switch off the MMU will be 
affected. Having said that, we might be fine:

"For Device memory and Normal memory that is Inner Non-cacheable, Outer 
Non-cacheable, these instructions must affect the caches of all PEs in 
the Outer Shareable shareability domain of the PE on which the 
instruction is operating."

> 
> This was explained before, both on your original UEFI support series on
> github [1], and on this list.
> 
> As for why it breaks asm_mmu_disable for all other cases:
> 
> The purpose of the clean in asm_mmu_disable is for the CPU to sync the
> caches with main memory when the MMU is turned off (to propagate the most
> up-to-date value from the cache to main memory); the purpose of the
> invalidate is to make sure that the CPU reads from main memory instead of
> the cache once the MMU is turned back on - if the cache line is still
> valid, the CPU wll read the values written *before* the MMU was turned
> off, not the values written *after* the MMU was turned off.
> 
> If you do the dcache clean + invalidate *before* turning the MMU on, the
> CPU can speculate a read and allocate a new cache line before the MMU is
> turned off, which would make the invalidate useless. Speculation is
> prohibited with the MMU off, that's why the invalidate must be done with
> the MMU off.
> 
> Because of this reason I believe the patch is incorrect.
> 

Given the effect of the DC instructions for Device Memory maybe keeping 
the invalidation after switching the MMU off seems to be the right 
approach. We only need to clean anything we use during the clean and 
invalidate as you suggest.

>> being dropped and replaced by one of you with something that "makes
>> more sense" as long as the outcome (coherent execution on bare-metal)
>> still works.
> 
> Hmm... maybe an experiment will work. I propose the following:
> > 1. Revert this patch.
> 2. Apply this diff on top of the series:
> 
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 30d04d0eb100..913f4088d96c 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -374,6 +374,11 @@ static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo)
>                  }
>          }
>          __phys_end &= PHYS_MASK;
> +
> +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_offset) : "memory");
> +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_end) : "memory");

These need to be dc cvac.

> +       dsb(sy);
> +
>          asm_mmu_disable();
> 
>          if (free_mem_pages == 0)
> 
> This is the solution, based on an architectural explanation of what we were
> observing, that I proposed on your github branch, a solution that you've
> tested with the result:
> 
> "I tested at least 10 times (lost count) with a build where "arm/arm64:
> mmu_disable: Clean and invalidate before disabling" was reverted from the
> target-efi branch and your hack was applied. It worked every time."
>

FWIW, I don't think running 10 times on one machine shows much about the 
architectural correctness of either solution.

Thanks,

Nikos

> [1] https://github.com/rhdrjones/kvm-unit-tests/commit/fc58684bc47b7d07d75098fdfddb6083e9b12104#commitcomment-44222926
> 
> Thanks,
> Alex
Alexandru Elisei July 1, 2022, 2:39 p.m. UTC | #11
Hi,

On Fri, Jul 01, 2022 at 12:34:43PM +0100, Nikos Nikoleris wrote:
> On 01/07/2022 11:24, Alexandru Elisei wrote:
> > Hi,
> > 
> > On Fri, Jul 01, 2022 at 11:12:14AM +0200, Andrew Jones wrote:
> > > On Thu, Jun 30, 2022 at 04:57:39PM +0100, Alexandru Elisei wrote:
> > > > Hi,
> > > > 
> > > > On Thu, Jun 30, 2022 at 04:16:09PM +0100, Nikos Nikoleris wrote:
> > > > > Hi Alex,
> > > > > 
> > > > > On 30/06/2022 12:24, Alexandru Elisei wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > On Thu, Jun 30, 2022 at 12:08:41PM +0100, Nikos Nikoleris wrote:
> > > > > > > Hi Alex,
> > > > > > > 
> > > > > > > On 30/06/2022 11:20, Alexandru Elisei wrote:
> > > > > > > > Hi,
> > > > > > > > 
> > > > > > > > On Thu, Jun 30, 2022 at 11:03:12AM +0100, Nikos Nikoleris wrote:
> > > > > > > > > From: Andrew Jones <drjones@redhat.com>
> > > > > > > > > 
> > > > > > > > > The commit message of commit 410b3bf09e76 ("arm/arm64: Perform dcache
> > > > > > > > > clean + invalidate after turning MMU off") justifies cleaning and
> > > > > > > > > invalidating the dcache after disabling the MMU by saying it's nice
> > > > > > > > > not to rely on the current page tables and that it should still work
> > > > > > > > > (per the spec), as long as there's an identity map in the current
> > > > > > > > > tables. Doing the invalidation after also somewhat helped with
> > > > > > > > > reenabling the MMU without seeing stale data, but the real problem
> > > > > > > > > with reenabling was because the cache needs to be disabled with
> > > > > > > > > the MMU, but it wasn't.
> > > > > > > > > 
> > > > > > > > > Since we have to trust/validate that the current page tables have an
> > > > > > > > > identity map anyway, then there's no harm in doing the clean
> > > > > > > > > and invalidate first (it feels a little better to do so, anyway,
> > > > > > > > > considering the cache maintenance instructions take virtual
> > > > > > > > > addresses). Then, also disable the cache with the MMU to avoid
> > > > > > > > > problems when reenabling. We invalidate the Icache and disable
> > > > > > > > > that too for good measure. And, a final TLB invalidation ensures
> > > > > > > > > we're crystal clean when we return from asm_mmu_disable().
> > > > > > > > 
> > > > > > > > I'll point you to my previous reply [1] to this exact patch which explains
> > > > > > > > why it's incorrect and is only papering over another problem.
> > > > > > > > 
> > > > > > > > [1] https://lore.kernel.org/all/Yn5Z6Kyj62cUNgRN@monolith.localdoman/
> > > > > > > > 
> > > > > > > 
> > > > > > > Apologies, I didn't mean to ignore your feedback on this. There was a
> > > > > > > parallel discussion in [2] which I thought makes the problem more concrete.
> > > > > > 
> > > > > > No problem, I figured as much :).
> > > > > > 
> > > > > > > 
> > > > > > > This is Drew's patch as soon as he confirms he's also happy with the change
> > > > > > > you suggested in the patch description I am happy to make it.
> > > > > > > 
> > > > > > > Generally, a test will start off with the MMU enabled. At this point, we
> > > > > > > access code, use and modify data (EfiLoaderData, EfiLoaderCode). Any of the
> > > > > > > two regions could be mapped as any type of memory (I need to have another
> > > > > > > look to confirm if it's Normal Memory). Then we want to take over control of
> > > > > > > the page tables and for that reason we have to switch off the MMU. And any
> > > > > > > access to code or data will be with Device-nGnRnE as you pointed out. If we
> > > > > > > don't clean and invalidate, instructions and data might be in the cache and
> > > > > > > we will be mixing memory attributes, won't we?
> > > > > > 
> > > > > > I missed that comment, sorry. I've replied to that comment made in v2,
> > > > > > here, in this ieration, in patch #19 ("arm/arm64: Add a setup sequence for
> > > > > > systems that boot through EFI").
> > > > > > 
> > > > > > This is the second time you've mentioned mixed memory attributes, so I'm
> > > > > > going to reiterate the question I asked in patch #19: what do you mean by
> > > > > > "mixing memory attributes" and what is wrong with it? Because it looks to
> > > > > > me like you're saying that you cannot access data written with the MMU on
> > > > > > when the MMU is off (and I assume the other way around, you cannot data
> > > > > > written with the MMU off when the MMU is on).
> > > > > > 
> > > > > 
> > > > > What I mean by mixing memory attributes is illustrated by the following
> > > > > example.
> > > > > 
> > > > > Take a memory location x, for which the page table entry maps to a physical
> > > > > location as Normal, Inner-Shareable, Inner-writeback and Outer-writeback. If
> > > > > we access it when the MMU is on and subquently when the MMU is off (treated
> > > > > as Device-nGnRnE), then we have two accesses with mismatched memory
> > > > > attributes to the same location. There is a whole section in the Arm ARM on
> > > > > why this needs to be avoided (B2.8 Mismatched memory attributes) but the
> > > > > result is "a loss of the uniprocessor semantics, ordering, or coherency". As
> > > > > I understand, the solution to this is:
> > > > > 
> > > > > "If the mismatched attributes for a Location mean that multiple cacheable
> > > > > accesses to the Location might be made with different shareability
> > > > > attributes, then uniprocessor semantics, ordering, and coherency are
> > > > > guaranteed only if:
> > > > > • Software running on a PE cleans and invalidates a Location from cache
> > > > > before and after each read or write to that Location by that PE.
> > > > > • A DMB barrier with scope that covers the full shareability of the accesses
> > > > > is placed between any accesses to the same memory Location that use
> > > > > different attributes."
> > > > 
> > > > Ok, so this is about *mismatched* memory attributes. I searched the Arm ARM
> > > > for the string "mixed" and nothing relevant came up.
> > > > 
> > > > Device-whatever memory is outer shareable and kvm-unit-tests maps memory as
> > > > inner shareable, so that matches the "different shareability attributes"
> > > > part of the paragraph.
> > > > 
> > > > But I would like to point out that there is only one type of cacheable
> > > > access that is being performed, when the MMU is on. When the MMU is off,
> > > > the access is not cacheable. So there are two types of accesses being
> > > > performed:
> > > > 
> > > > - cacheable + inner-shareable (MMU on)
> > > > - non-cacheable + outer-shareable (MMU off)
> > > > 
> > > > It looks to me like the paragraph doesn't apply to our case, because there
> > > > are no "multiple cacheable accesses [..] made with different shareability
> > > > attributes". Do you agree?
> > > > 
> 
> No, I think the rules on mismatched memory attributes still apply.
> 
> "Physical memory locations are accessed with mismatched attributes if all
> accesses to the location do not use a common definition of all of the
> following attributes of that location:
> • Memory type: Device-nGnRnE, Device-nGnRE, Device-nGRE, Device-GRE or
> Normal."
> 
> When we're switching off the MMU, we're changing the type of memory for many
> of the efi regions and therefore the rules on mismatched memory attributes
> still apply.

Ok, you've successfully argued that every memory access done with the MMU
off (or on) is done with mismatched memory attributes with regard to
accesses done with the MMU on (or, respectively, off).

Taking a step back, what is the problem that this patch is supposed to
solve and how is this patch solving it, taking into account mismatched
memory attributes?

> 
> > > > > 
> > > > > So unless UEFI maps all memory as Device-nGnRnE we have to do something. I
> > > > > will try to find out more about UEFI's page tables.
> > > > 
> > > > That's important to know, especially regarding the text section of the
> > > > image. If UEFI doesnt' clean it to PoC, kvm-unit-tests must do it in order
> > > > to execute correctly with the MMU off.
> > > > 
> 
> Drew confirmed that UEFI page tables map most memory as Normal, Cacheable.

Saw that, thanks!

> 
> > > 
> > > Hi Alex and Nikos,
> > > 
> > > Indeed my experiments on bare-metal made this change necessary. I'm happy
> > > to see this discussion, though, as this patch could be tweaked or at least
> > > the commit message improved in order to better explain what's going on and
> > > why the changes are necessary. IOW, I have no problem with this patch
> > 
> > If you fix the commit message to be architecturally correct then you will
> > come to the conclusion that the patch is architecturally incorrect because
> > while it fixes the problem you were seeing, it breaks asm_mmu_disable in
> > all other cases.
> > > The problem you were seeing according to my investigation was this:
> > 
> > __phys_offset and __phys_end are written with the MMU on and the most up to
> > date value is in the cache. When the MMU is turned off, the value that
> > asm_mmu_disable reads is the stale value from main memory and it will not
> > clean + invalidate all the memory, which is what we want. This assumes that
> > UEFI cleaned the image to PoC, otherwise, that will need to be cleaned too
> > by kvm-unit-tests before turning off the MMU.
> 
> The clean and/or invalidate instructions are also affected by the Memory
> type attributes so any operation after we switch off the MMU will be
> affected. Having said that, we might be fine:
> 
> "For Device memory and Normal memory that is Inner Non-cacheable, Outer
> Non-cacheable, these instructions must affect the caches of all PEs in the
> Outer Shareable shareability domain of the PE on which the instruction is
> operating."
> 
> > 
> > This was explained before, both on your original UEFI support series on
> > github [1], and on this list.
> > 
> > As for why it breaks asm_mmu_disable for all other cases:
> > 
> > The purpose of the clean in asm_mmu_disable is for the CPU to sync the
> > caches with main memory when the MMU is turned off (to propagate the most
> > up-to-date value from the cache to main memory); the purpose of the
> > invalidate is to make sure that the CPU reads from main memory instead of
> > the cache once the MMU is turned back on - if the cache line is still
> > valid, the CPU wll read the values written *before* the MMU was turned
> > off, not the values written *after* the MMU was turned off.
> > 
> > If you do the dcache clean + invalidate *before* turning the MMU on, the
> > CPU can speculate a read and allocate a new cache line before the MMU is
> > turned off, which would make the invalidate useless. Speculation is
> > prohibited with the MMU off, that's why the invalidate must be done with
> > the MMU off.
> > 
> > Because of this reason I believe the patch is incorrect.
> > 
> 
> Given the effect of the DC instructions for Device Memory maybe keeping the
> invalidation after switching the MMU off seems to be the right approach. We
> only need to clean anything we use during the clean and invalidate as you
> suggest.
> 
> > > being dropped and replaced by one of you with something that "makes
> > > more sense" as long as the outcome (coherent execution on bare-metal)
> > > still works.
> > 
> > Hmm... maybe an experiment will work. I propose the following:
> > > 1. Revert this patch.
> > 2. Apply this diff on top of the series:
> > 
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 30d04d0eb100..913f4088d96c 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -374,6 +374,11 @@ static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo)
> >                  }
> >          }
> >          __phys_end &= PHYS_MASK;
> > +
> > +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_offset) : "memory");
> > +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_end) : "memory");
> 
> These need to be dc cvac.

Yes, it needs to be cleaned to PoC, not PoU, my bad.

Thanks,
Alex

> 
> > +       dsb(sy);
> > +
> >          asm_mmu_disable();
> > 
> >          if (free_mem_pages == 0)
> > 
> > This is the solution, based on an architectural explanation of what we were
> > observing, that I proposed on your github branch, a solution that you've
> > tested with the result:
> > 
> > "I tested at least 10 times (lost count) with a build where "arm/arm64:
> > mmu_disable: Clean and invalidate before disabling" was reverted from the
> > target-efi branch and your hack was applied. It worked every time."
> > 
> 
> FWIW, I don't think running 10 times on one machine shows much about the
> architectural correctness of either solution.
> 
> Thanks,
> 
> Nikos
> 
> > [1] https://github.com/rhdrjones/kvm-unit-tests/commit/fc58684bc47b7d07d75098fdfddb6083e9b12104#commitcomment-44222926
> > 
> > Thanks,
> > Alex
Alexandru Elisei July 11, 2022, 2:23 p.m. UTC | #12
Hi Drew,

Sorry, I lost track of this thread.

On Fri, Jul 01, 2022 at 01:16:27PM +0200, Andrew Jones wrote:
> On Fri, Jul 01, 2022 at 11:24:44AM +0100, Alexandru Elisei wrote:
> ...
> > > being dropped and replaced by one of you with something that "makes
> > > more sense" as long as the outcome (coherent execution on bare-metal)
> > > still works.
> > 
> > Hmm... maybe an experiment will work. I propose the following:
> > 
> > 1. Revert this patch.
> > 2. Apply this diff on top of the series:
> > 
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 30d04d0eb100..913f4088d96c 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -374,6 +374,11 @@ static efi_status_t efi_mem_init(efi_bootinfo_t *efi_bootinfo)
> >                 }
> >         }
> >         __phys_end &= PHYS_MASK;
> > +
> > +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_offset) : "memory");
> > +       asm volatile("dc cvau, %0\n" :: "r" (&__phys_end) : "memory");
> > +       dsb(sy);
> > +
> >         asm_mmu_disable();
> > 
> >         if (free_mem_pages == 0)
> > 
> > This is the solution, based on an architectural explanation of what we were
> > observing, that I proposed on your github branch, a solution that you've
> > tested with the result:
> > 
> > "I tested at least 10 times (lost count) with a build where "arm/arm64:
> > mmu_disable: Clean and invalidate before disabling" was reverted from the
> > target-efi branch and your hack was applied. It worked every time."
> > 
> > [1] https://github.com/rhdrjones/kvm-unit-tests/commit/fc58684bc47b7d07d75098fdfddb6083e9b12104#commitcomment-44222926
> >
> 
> Hi Alex,
> 
> Thanks for digging that back up. I had lost track of it. The last comment
> is you saying that you'll send a proper patch. Did you send one that got
> lost? If not, would you like to send one now that Nikos can incorporate?

The "proper patch" that I was referring to was to skip cache maintenance
when the MMU is already off. Based on the ongoing thread with Nikos, we
might have to rethink asm_mmu_disable, so I'm waiting for a conclusion
before I make any changes to asm_mmu_disable.

Thanks,
Alex

> 
> Thanks,
> drew
diff mbox series

Patch

diff --git a/arm/cstart.S b/arm/cstart.S
index 7036e67..dc324c5 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -179,6 +179,7 @@  halt:
 .globl asm_mmu_enable
 asm_mmu_enable:
 	/* TLBIALL */
+	mov	r2, #0
 	mcr	p15, 0, r2, c8, c7, 0
 	dsb	nsh
 
@@ -211,12 +212,7 @@  asm_mmu_enable:
 
 .globl asm_mmu_disable
 asm_mmu_disable:
-	/* SCTLR */
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, #CR_M
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
+	/* Clean + invalidate the entire memory */
 	ldr	r0, =__phys_offset
 	ldr	r0, [r0]
 	ldr	r1, =__phys_end
@@ -224,7 +220,25 @@  asm_mmu_disable:
 	sub	r1, r1, r0
 	dcache_by_line_op dccimvac, sy, r0, r1, r2, r3
 
-	mov     pc, lr
+	/* Invalidate Icache */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0
+	isb
+
+	/*  Disable cache, Icache and MMU */
+	mrc	p15, 0, r0, c1, c0, 0
+	bic	r0, #CR_C
+	bic	r0, #CR_I
+	bic	r0, #CR_M
+	mcr	p15, 0, r0, c1, c0, 0
+	isb
+
+	/* Invalidate TLB */
+	mov	r0, #0
+	mcr	p15, 0, r0, c8, c7, 0
+	dsb	nsh
+
+	mov	pc, lr
 
 /*
  * Vectors
diff --git a/arm/cstart64.S b/arm/cstart64.S
index e4ab7d0..390feb9 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -246,11 +246,6 @@  asm_mmu_enable:
 
 .globl asm_mmu_disable
 asm_mmu_disable:
-	mrs	x0, sctlr_el1
-	bic	x0, x0, SCTLR_EL1_M
-	msr	sctlr_el1, x0
-	isb
-
 	/* Clean + invalidate the entire memory */
 	adrp	x0, __phys_offset
 	ldr	x0, [x0, :lo12:__phys_offset]
@@ -259,6 +254,22 @@  asm_mmu_disable:
 	sub	x1, x1, x0
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 
+	/* Invalidate Icache */
+	ic	iallu
+	isb
+
+	/* Disable cache, Icache and MMU */
+	mrs	x0, sctlr_el1
+	bic	x0, x0, SCTLR_EL1_C
+	bic	x0, x0, SCTLR_EL1_I
+	bic	x0, x0, SCTLR_EL1_M
+	msr	sctlr_el1, x0
+	isb
+
+	/* Invalidate TLB */
+	tlbi	vmalle1
+	dsb	nsh
+
 	ret
 
 /*