diff mbox series

[1/4] arm64: Restrict ARM64_ERRATUM_1188873 mitigation to AArch32

Message ID 20190408160216.223871-2-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Rework handling of erratum 1188873 | expand

Commit Message

Marc Zyngier April 8, 2019, 4:02 p.m. UTC
We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
accesses for both instruction sets. Although nothing wrong comes out
of that, people trying to squeeze the last drop of performance from
buggy HW find this over the top. Oh well.

Let's change the mitigation by flipping the counter enable bit
on return to userspace. Non-broken HW gets an extra branch on
the fast path, which is hopefully not the end of the world.
The arch timer workaround it also removed.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
 drivers/clocksource/arm_arch_timer.c | 15 ---------------
 2 files changed, 23 insertions(+), 15 deletions(-)

Comments

Robin Murphy April 8, 2019, 6:34 p.m. UTC | #1
Hi Marc,

On 08/04/2019 17:02, Marc Zyngier wrote:
> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
> accesses for both instruction sets. Although nothing wrong comes out
> of that, people trying to squeeze the last drop of performance from
> buggy HW find this over the top. Oh well.
> 
> Let's change the mitigation by flipping the counter enable bit
> on return to userspace. Non-broken HW gets an extra branch on
> the fast path, which is hopefully not the end of the world.
> The arch timer workaround it also removed.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>   arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>   drivers/clocksource/arm_arch_timer.c | 15 ---------------
>   2 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index c50a7a75f2e0..e0aed21ab402 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>   alternative_else_nop_endif
>   #endif
>   3:
> +#ifdef CONFIG_ARM64_ERRATUM_1188873
> +alternative_if_not ARM64_WORKAROUND_1188873
> +	b	1f
> +alternative_else_nop_endif
> +
> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
> +	eor	x0, x0, #1			// Negate it
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> +	mrs	x1, cntkctl_el1
> +alternative_else
> +	mrs	x1, cnthctl_el2
> +alternative_endif
> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
> +	cmp	x2, x0
> +	b.eq	1f				// Matches, nothing to do
> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> +	msr	cntkctl_el1, x1
> +alternative_else
> +	msr	cnthctl_el2, x1
> +alternative_endif

Unless I've gone horribly wrong in my reasoning somewhere, I reckon this 
could be a fair bit shorter...


alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
	mrs	x1, cntkctl_el1
alternative_else
	mrs	x1, cnthctl_el2
alternative_endif
	eon	x0, x1, x22, lsr #3
	eor	x1, x1, #1
	tbz	x0, #1, 1f
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
	msr	cntkctl_el1, x1
alternative_else
	msr	cnthctl_el2, x1
alternative_endif


...although whether that's a good idea at all probably depends mostly on 
how many comments you think it needs :)

Robin.

> +1:
> +#endif
>   	apply_ssbd 0, x0, x1
>   	.endif
>   
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 5fcccc467868..27acc9eb0f7c 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -339,13 +339,6 @@ static u64 notrace arm64_858921_read_cntvct_el0(void)
>   }
>   #endif
>   
> -#ifdef CONFIG_ARM64_ERRATUM_1188873
> -static u64 notrace arm64_1188873_read_cntvct_el0(void)
> -{
> -	return read_sysreg(cntvct_el0);
> -}
> -#endif
> -
>   #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
>   /*
>    * The low bits of the counter registers are indeterminate while bit 10 or
> @@ -476,14 +469,6 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
>   		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
>   	},
>   #endif
> -#ifdef CONFIG_ARM64_ERRATUM_1188873
> -	{
> -		.match_type = ate_match_local_cap_id,
> -		.id = (void *)ARM64_WORKAROUND_1188873,
> -		.desc = "ARM erratum 1188873",
> -		.read_cntvct_el0 = arm64_1188873_read_cntvct_el0,
> -	},
> -#endif
>   #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
>   	{
>   		.match_type = ate_match_dt,
>
Marc Zyngier April 9, 2019, 9:18 a.m. UTC | #2
On 08/04/2019 19:34, Robin Murphy wrote:
> Hi Marc,
> 
> On 08/04/2019 17:02, Marc Zyngier wrote:
>> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
>> accesses for both instruction sets. Although nothing wrong comes out
>> of that, people trying to squeeze the last drop of performance from
>> buggy HW find this over the top. Oh well.
>>
>> Let's change the mitigation by flipping the counter enable bit
>> on return to userspace. Non-broken HW gets an extra branch on
>> the fast path, which is hopefully not the end of the world.
>> The arch timer workaround it also removed.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>   arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>>   drivers/clocksource/arm_arch_timer.c | 15 ---------------
>>   2 files changed, 23 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> index c50a7a75f2e0..e0aed21ab402 100644
>> --- a/arch/arm64/kernel/entry.S
>> +++ b/arch/arm64/kernel/entry.S
>> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>>   alternative_else_nop_endif
>>   #endif
>>   3:
>> +#ifdef CONFIG_ARM64_ERRATUM_1188873
>> +alternative_if_not ARM64_WORKAROUND_1188873
>> +	b	1f
>> +alternative_else_nop_endif
>> +
>> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
>> +	eor	x0, x0, #1			// Negate it
>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> +	mrs	x1, cntkctl_el1
>> +alternative_else
>> +	mrs	x1, cnthctl_el2
>> +alternative_endif
>> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
>> +	cmp	x2, x0
>> +	b.eq	1f				// Matches, nothing to do
>> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> +	msr	cntkctl_el1, x1
>> +alternative_else
>> +	msr	cnthctl_el2, x1
>> +alternative_endif
> 
> Unless I've gone horribly wrong in my reasoning somewhere, I reckon this 
> could be a fair bit shorter...
> 
> 
> alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> 	mrs	x1, cntkctl_el1
> alternative_else
> 	mrs	x1, cnthctl_el2
> alternative_endif
> 	eon	x0, x1, x22, lsr #3
> 	eor	x1, x1, #1
> 	tbz	x0, #1, 1f
> alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> 	msr	cntkctl_el1, x1
> alternative_else
> 	msr	cnthctl_el2, x1
> alternative_endif
> 
> 
> ...although whether that's a good idea at all probably depends mostly on 
> how many comments you think it needs :)

Neat! Totally impossible to decipher, and thus perfect! ;-) The way I
understand it that pstate.m32 and cntkctl_el1.el0vcten must always have
opposite values. If they don't, we flip el0vcten.

We could move the eor after the tbz, I think. And we can also get rid of
the VHE alternative, as cntkctl_el1 is a valid accessor for cnthctl_el2
in that case.

Shal we settle for this?

+#ifdef CONFIG_ARM64_ERRATUM_1188873
+alternative_if_not ARM64_WORKAROUND_1188873
+       b       1f
+alternative_else_nop_endif
+
+       // if (!x22.mode32 != cntkctl_el1.el0vcten)
+       //         cntkctl_el1.el0vcten = !cntkctl_el1.el0vcten
+
+       mrs     x1, cntkctl_el1
+
+       eon     x0, x1, x22, lsr #3
+       tbz     x0, #1, 1f
+
+       eor     x1, x1, #1
+       msr     cntkctl_el1, x1
+1:
+#endif

	M.
Robin Murphy April 9, 2019, 9:49 a.m. UTC | #3
On 09/04/2019 10:18, Marc Zyngier wrote:
> On 08/04/2019 19:34, Robin Murphy wrote:
>> Hi Marc,
>>
>> On 08/04/2019 17:02, Marc Zyngier wrote:
>>> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
>>> accesses for both instruction sets. Although nothing wrong comes out
>>> of that, people trying to squeeze the last drop of performance from
>>> buggy HW find this over the top. Oh well.
>>>
>>> Let's change the mitigation by flipping the counter enable bit
>>> on return to userspace. Non-broken HW gets an extra branch on
>>> the fast path, which is hopefully not the end of the world.
>>> The arch timer workaround it also removed.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> ---
>>>    arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>>>    drivers/clocksource/arm_arch_timer.c | 15 ---------------
>>>    2 files changed, 23 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>>> index c50a7a75f2e0..e0aed21ab402 100644
>>> --- a/arch/arm64/kernel/entry.S
>>> +++ b/arch/arm64/kernel/entry.S
>>> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>>>    alternative_else_nop_endif
>>>    #endif
>>>    3:
>>> +#ifdef CONFIG_ARM64_ERRATUM_1188873
>>> +alternative_if_not ARM64_WORKAROUND_1188873
>>> +	b	1f
>>> +alternative_else_nop_endif
>>> +
>>> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
>>> +	eor	x0, x0, #1			// Negate it
>>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>>> +	mrs	x1, cntkctl_el1
>>> +alternative_else
>>> +	mrs	x1, cnthctl_el2
>>> +alternative_endif
>>> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
>>> +	cmp	x2, x0
>>> +	b.eq	1f				// Matches, nothing to do
>>> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
>>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>>> +	msr	cntkctl_el1, x1
>>> +alternative_else
>>> +	msr	cnthctl_el2, x1
>>> +alternative_endif
>>
>> Unless I've gone horribly wrong in my reasoning somewhere, I reckon this
>> could be a fair bit shorter...
>>
>>
>> alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> 	mrs	x1, cntkctl_el1
>> alternative_else
>> 	mrs	x1, cnthctl_el2
>> alternative_endif
>> 	eon	x0, x1, x22, lsr #3
>> 	eor	x1, x1, #1
>> 	tbz	x0, #1, 1f
>> alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> 	msr	cntkctl_el1, x1
>> alternative_else
>> 	msr	cnthctl_el2, x1
>> alternative_endif
>>
>>
>> ...although whether that's a good idea at all probably depends mostly on
>> how many comments you think it needs :)
> 
> Neat! Totally impossible to decipher, and thus perfect! ;-) The way I
> understand it that pstate.m32 and cntkctl_el1.el0vcten must always have
> opposite values. If they don't, we flip el0vcten.

Yup, that's the trick!

> We could move the eor after the tbz, I think. And we can also get rid of
> the VHE alternative, as cntkctl_el1 is a valid accessor for cnthctl_el2
> in that case.

Indeed I initially wrote the logic TBZ-first, but then decided to flip 
them in the spirit of "trying to squeeze the last drop of performance" 
:) Reason being that on most of our microarchitectures a 
shifted-register operand tends to incur +1 cycle of result latency, so 
in theory we can slip in the EOR for free while waiting on the x0 
dependency. I may have got a bit carried away there, since on reflection 
this is of course only going to be run by the affected CPUs which should 
be big and OoO enough to make that entirely irrelevant, so dialling it 
back a bit for the sake of a little clarity seems a sensible thing to do.

> Shal we settle for this?
> 
> +#ifdef CONFIG_ARM64_ERRATUM_1188873
> +alternative_if_not ARM64_WORKAROUND_1188873
> +       b       1f
> +alternative_else_nop_endif
> +
> +       // if (!x22.mode32 != cntkctl_el1.el0vcten)
> +       //         cntkctl_el1.el0vcten = !cntkctl_el1.el0vcten
> +
> +       mrs     x1, cntkctl_el1
> +
> +       eon     x0, x1, x22, lsr #3
> +       tbz     x0, #1, 1f
> +
> +       eor     x1, x1, #1
> +       msr     cntkctl_el1, x1
> +1:
> +#endif

Nice, that looks about as tidy as it'll ever get!

Cheers,
Robin.
Will Deacon April 12, 2019, 1:17 p.m. UTC | #4
Hi Marc,

On Mon, Apr 08, 2019 at 05:02:13PM +0100, Marc Zyngier wrote:
> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
> accesses for both instruction sets. Although nothing wrong comes out
> of that, people trying to squeeze the last drop of performance from
> buggy HW find this over the top. Oh well.
> 
> Let's change the mitigation by flipping the counter enable bit
> on return to userspace. Non-broken HW gets an extra branch on
> the fast path, which is hopefully not the end of the world.
> The arch timer workaround it also removed.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>  drivers/clocksource/arm_arch_timer.c | 15 ---------------
>  2 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index c50a7a75f2e0..e0aed21ab402 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>  alternative_else_nop_endif
>  #endif
>  3:
> +#ifdef CONFIG_ARM64_ERRATUM_1188873
> +alternative_if_not ARM64_WORKAROUND_1188873
> +	b	1f
> +alternative_else_nop_endif
> +
> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
> +	eor	x0, x0, #1			// Negate it
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> +	mrs	x1, cntkctl_el1
> +alternative_else
> +	mrs	x1, cnthctl_el2
> +alternative_endif
> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
> +	cmp	x2, x0

Aren't the flags live at this point (they indicate native vs compat)?
Maybe you can use that instead of re-extracting PSR_MODE32.

> +	b.eq	1f				// Matches, nothing to do
> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place

ARCH_TIMER_USR_VCT_ACCESS_EN

> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> +	msr	cntkctl_el1, x1
> +alternative_else
> +	msr	cnthctl_el2, x1
> +alternative_endif
> +1:

Sorry to be a pain, but could you use label '4:' here and update the others
in this macro, please?

> +#endif
>  	apply_ssbd 0, x0, x1
>  	.endif
>  
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 5fcccc467868..27acc9eb0f7c 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c

I probably need an Ack from Thomas or Daniel on these parts, so I can take
the series via arm64.

Will
Marc Zyngier April 15, 2019, 8:15 a.m. UTC | #5
On 12/04/2019 14:17, Will Deacon wrote:
> Hi Marc,
> 
> On Mon, Apr 08, 2019 at 05:02:13PM +0100, Marc Zyngier wrote:
>> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
>> accesses for both instruction sets. Although nothing wrong comes out
>> of that, people trying to squeeze the last drop of performance from
>> buggy HW find this over the top. Oh well.
>>
>> Let's change the mitigation by flipping the counter enable bit
>> on return to userspace. Non-broken HW gets an extra branch on
>> the fast path, which is hopefully not the end of the world.
>> The arch timer workaround it also removed.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>>  drivers/clocksource/arm_arch_timer.c | 15 ---------------
>>  2 files changed, 23 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> index c50a7a75f2e0..e0aed21ab402 100644
>> --- a/arch/arm64/kernel/entry.S
>> +++ b/arch/arm64/kernel/entry.S
>> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>>  alternative_else_nop_endif
>>  #endif
>>  3:
>> +#ifdef CONFIG_ARM64_ERRATUM_1188873
>> +alternative_if_not ARM64_WORKAROUND_1188873
>> +	b	1f
>> +alternative_else_nop_endif
>> +
>> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
>> +	eor	x0, x0, #1			// Negate it
>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> +	mrs	x1, cntkctl_el1
>> +alternative_else
>> +	mrs	x1, cnthctl_el2
>> +alternative_endif
>> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
>> +	cmp	x2, x0
> 
> Aren't the flags live at this point (they indicate native vs compat)?
> Maybe you can use that instead of re-extracting PSR_MODE32.

Robin and I have been reworking this, see other emails in the same
thread, and the code looks pretty different now (and not breaking things
anymore).

> 
>> +	b.eq	1f				// Matches, nothing to do
>> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
> 
> ARCH_TIMER_USR_VCT_ACCESS_EN
> 
>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>> +	msr	cntkctl_el1, x1
>> +alternative_else
>> +	msr	cnthctl_el2, x1
>> +alternative_endif
>> +1:
> 
> Sorry to be a pain, but could you use label '4:' here and update the others
> in this macro, please?

Sure, no problem.

> 
>> +#endif
>>  	apply_ssbd 0, x0, x1
>>  	.endif
>>  
>> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
>> index 5fcccc467868..27acc9eb0f7c 100644
>> --- a/drivers/clocksource/arm_arch_timer.c
>> +++ b/drivers/clocksource/arm_arch_timer.c
> 
> I probably need an Ack from Thomas or Daniel on these parts, so I can take
> the series via arm64.

Daniel is on Cc, and hopefully v2 will be OK.

Thanks,

	M.
Daniel Lezcano April 15, 2019, 8:26 a.m. UTC | #6
On 15/04/2019 10:15, Marc Zyngier wrote:
> On 12/04/2019 14:17, Will Deacon wrote:
>> Hi Marc,
>>
>> On Mon, Apr 08, 2019 at 05:02:13PM +0100, Marc Zyngier wrote:
>>> We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
>>> accesses for both instruction sets. Although nothing wrong comes out
>>> of that, people trying to squeeze the last drop of performance from
>>> buggy HW find this over the top. Oh well.
>>>
>>> Let's change the mitigation by flipping the counter enable bit
>>> on return to userspace. Non-broken HW gets an extra branch on
>>> the fast path, which is hopefully not the end of the world.
>>> The arch timer workaround it also removed.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

Hi Marc,

for the arm_arch_timer.c part:

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>


>>> ---
>>>  arch/arm64/kernel/entry.S            | 23 +++++++++++++++++++++++
>>>  drivers/clocksource/arm_arch_timer.c | 15 ---------------
>>>  2 files changed, 23 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>>> index c50a7a75f2e0..e0aed21ab402 100644
>>> --- a/arch/arm64/kernel/entry.S
>>> +++ b/arch/arm64/kernel/entry.S
>>> @@ -336,6 +336,29 @@ alternative_if ARM64_WORKAROUND_845719
>>>  alternative_else_nop_endif
>>>  #endif
>>>  3:
>>> +#ifdef CONFIG_ARM64_ERRATUM_1188873
>>> +alternative_if_not ARM64_WORKAROUND_1188873
>>> +	b	1f
>>> +alternative_else_nop_endif
>>> +
>>> +	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
>>> +	eor	x0, x0, #1			// Negate it
>>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>>> +	mrs	x1, cntkctl_el1
>>> +alternative_else
>>> +	mrs	x1, cnthctl_el2
>>> +alternative_endif
>>> +	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
>>> +	cmp	x2, x0
>>
>> Aren't the flags live at this point (they indicate native vs compat)?
>> Maybe you can use that instead of re-extracting PSR_MODE32.
> 
> Robin and I have been reworking this, see other emails in the same
> thread, and the code looks pretty different now (and not breaking things
> anymore).
> 
>>
>>> +	b.eq	1f				// Matches, nothing to do
>>> +	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
>>
>> ARCH_TIMER_USR_VCT_ACCESS_EN
>>
>>> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
>>> +	msr	cntkctl_el1, x1
>>> +alternative_else
>>> +	msr	cnthctl_el2, x1
>>> +alternative_endif
>>> +1:
>>
>> Sorry to be a pain, but could you use label '4:' here and update the others
>> in this macro, please?
> 
> Sure, no problem.
> 
>>
>>> +#endif
>>>  	apply_ssbd 0, x0, x1
>>>  	.endif
>>>  
>>> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
>>> index 5fcccc467868..27acc9eb0f7c 100644
>>> --- a/drivers/clocksource/arm_arch_timer.c
>>> +++ b/drivers/clocksource/arm_arch_timer.c
>>
>> I probably need an Ack from Thomas or Daniel on these parts, so I can take
>> the series via arm64.
> 
> Daniel is on Cc, and hopefully v2 will be OK.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c50a7a75f2e0..e0aed21ab402 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -336,6 +336,29 @@  alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+alternative_if_not ARM64_WORKAROUND_1188873
+	b	1f
+alternative_else_nop_endif
+
+	ubfx	x0, x22, #4, #1			// Extract PSR_MODE32
+	eor	x0, x0, #1			// Negate it
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	mrs	x1, cntkctl_el1
+alternative_else
+	mrs	x1, cnthctl_el2
+alternative_endif
+	ubfx	x2, x1, #1, #1			// Extract EL0VCTEN
+	cmp	x2, x0
+	b.eq	1f				// Matches, nothing to do
+	bfi	x1, x0, #1, #1			// Move EL0VCTEN in place
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	msr	cntkctl_el1, x1
+alternative_else
+	msr	cnthctl_el2, x1
+alternative_endif
+1:
+#endif
 	apply_ssbd 0, x0, x1
 	.endif
 
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5fcccc467868..27acc9eb0f7c 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -339,13 +339,6 @@  static u64 notrace arm64_858921_read_cntvct_el0(void)
 }
 #endif
 
-#ifdef CONFIG_ARM64_ERRATUM_1188873
-static u64 notrace arm64_1188873_read_cntvct_el0(void)
-{
-	return read_sysreg(cntvct_el0);
-}
-#endif
-
 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
 /*
  * The low bits of the counter registers are indeterminate while bit 10 or
@@ -476,14 +469,6 @@  static const struct arch_timer_erratum_workaround ool_workarounds[] = {
 		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_1188873
-	{
-		.match_type = ate_match_local_cap_id,
-		.id = (void *)ARM64_WORKAROUND_1188873,
-		.desc = "ARM erratum 1188873",
-		.read_cntvct_el0 = arm64_1188873_read_cntvct_el0,
-	},
-#endif
 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
 	{
 		.match_type = ate_match_dt,