diff mbox series

[v4,06/10] arm64: cpufeature: Detect HCR_EL2.NV1 being RES0

Message ID 20240122181344.258974-7-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: Add support for FEAT_E2H0, or lack thereof | expand

Commit Message

Marc Zyngier Jan. 22, 2024, 6:13 p.m. UTC
A variant of FEAT_E2H0 not being implemented exists in the form of
HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
only VHE is supported on the host and nested guests).

Add the necessary infrastructure for this new CPU capability.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 12 ++++++++++++
 arch/arm64/tools/cpucaps       |  1 +
 2 files changed, 13 insertions(+)

Comments

Catalin Marinas Feb. 8, 2024, 12:26 p.m. UTC | #1
On Mon, Jan 22, 2024 at 06:13:40PM +0000, Marc Zyngier wrote:
> A variant of FEAT_E2H0 not being implemented exists in the form of
> HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
> only VHE is supported on the host and nested guests).
> 
> Add the necessary infrastructure for this new CPU capability.
> 
> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Catalin Marinas Feb. 8, 2024, 12:27 p.m. UTC | #2
On Mon, Jan 22, 2024 at 06:13:40PM +0000, Marc Zyngier wrote:
> A variant of FEAT_E2H0 not being implemented exists in the form of
> HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
> only VHE is supported on the host and nested guests).
> 
> Add the necessary infrastructure for this new CPU capability.
> 
> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Marek Szyprowski Feb. 12, 2024, 12:48 p.m. UTC | #3
Dead All,

On 22.01.2024 19:13, Marc Zyngier wrote:
> A variant of FEAT_E2H0 not being implemented exists in the form of
> HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
> only VHE is supported on the host and nested guests).
>
> Add the necessary infrastructure for this new CPU capability.
>
> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---

This patch landed recently in linux-next as commit da9af5071b25 ("arm64: 
cpufeature: Detect HCR_EL2.NV1 being RES0"). I found that it causes a 
following regression in the CPU hot-plug operation:

# for i in /sys/devices/system/cpu/cpu[1-9]; do echo 1 >$i/online; done
------------[ cut here ]------------
kernel BUG at arch/arm64/kernel/cpufeature.c:1468!
Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
Modules linked in: dw_hdmi_cec dw_hdmi_i2s_audio crct10dif_ce 
snd_soc_simple_card rockchip_saradc industrialio_triggered_buffer 
hantro_vpu kfifo_buf snd_soc_simple_card_utils rockchip_thermal 
phy_rockchip_naneng_combphy display_connector gpio_ir_recv v4l2_vp9 
v4l2_h264 v4l2_mem2mem dwmac_rk videobuf2_dma_contig stmmac_platform 
videobuf2_memops videobuf2_v4l2 stmmac rockchipdrm videodev 
snd_soc_rockchip_i2s_tdm snd_soc_rk817 pcs_xpcs panfrost rtc_rk808 
rk805_pwrkey analogix_dp rk817_charger spi_rockchip_sfc dw_mipi_dsi 
videobuf2_common dw_hdmi drm_shmem_helper mc gpu_sched 
drm_display_helper ahci_dwc ip_tables x_tables ipv6
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0-rc1+ #14563
Hardware name: Hardkernel ODROID-M1 (DT)
pstate: 800001c9 (Nzcv dAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : __read_sysreg_by_encoding+0x38c/0x390
lr : read_scoped_sysreg+0x4c/0x70
...
Call trace:
  __read_sysreg_by_encoding+0x38c/0x390
  read_scoped_sysreg+0x4c/0x70
  has_nv1+0x18/0x48
  verify_local_cpu_caps+0x54/0x124
  check_local_cpu_capabilities+0x28/0x208
  secondary_start_kernel+0xb0/0x154
  __secondary_switched+0xb8/0xbc
Code: d53802f3 17ffff3b d5380253 17ffff39 (d4210000)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Attempted to kill the idle task!
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x1,80000000,80050295,2100721b
Memory Limit: none
---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---

It looks that some additional checks are needed for the CPU hot-plug case.

>   arch/arm64/kernel/cpufeature.c | 12 ++++++++++++
>   arch/arm64/tools/cpucaps       |  1 +
>   2 files changed, 13 insertions(+)
>
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index ad3753fbdcb1..91249d20883b 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1794,6 +1794,11 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
>   	return !meltdown_safe;
>   }
>   
> +static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
> +{
> +	return !has_cpuid_feature(entry, scope);
> +}
> +
>   #if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
>   static bool has_lpa2_at_stage1(u64 mmfr0)
>   {
> @@ -2794,6 +2799,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>   		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
>   		.matches = has_lpa2,
>   	},
> +	{
> +		.desc = "NV1",
> +		.capability = ARM64_HAS_HCR_NV1,
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.matches = has_nv1,
> +		ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
> +	},
>   	{},
>   };
>   
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index b912b1409fc0..65090dd34641 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -35,6 +35,7 @@ HAS_GENERIC_AUTH_IMP_DEF
>   HAS_GIC_CPUIF_SYSREGS
>   HAS_GIC_PRIO_MASKING
>   HAS_GIC_PRIO_RELAXED_SYNC
> +HAS_HCR_NV1
>   HAS_HCX
>   HAS_LDAPR
>   HAS_LPA2

Best regards
Marc Zyngier Feb. 12, 2024, 2 p.m. UTC | #4
On Mon, 12 Feb 2024 12:48:35 +0000,
Marek Szyprowski <m.szyprowski@samsung.com> wrote:
> 
> Dead All,
> 
> On 22.01.2024 19:13, Marc Zyngier wrote:
> > A variant of FEAT_E2H0 not being implemented exists in the form of
> > HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
> > only VHE is supported on the host and nested guests).
> >
> > Add the necessary infrastructure for this new CPU capability.
> >
> > Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> 
> This patch landed recently in linux-next as commit da9af5071b25 ("arm64: 
> cpufeature: Detect HCR_EL2.NV1 being RES0"). I found that it causes a 
> following regression in the CPU hot-plug operation:
> 
> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 1 >$i/online; done
> ------------[ cut here ]------------
> kernel BUG at arch/arm64/kernel/cpufeature.c:1468!
> Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
> Modules linked in: dw_hdmi_cec dw_hdmi_i2s_audio crct10dif_ce 
> snd_soc_simple_card rockchip_saradc industrialio_triggered_buffer 
> hantro_vpu kfifo_buf snd_soc_simple_card_utils rockchip_thermal 
> phy_rockchip_naneng_combphy display_connector gpio_ir_recv v4l2_vp9 
> v4l2_h264 v4l2_mem2mem dwmac_rk videobuf2_dma_contig stmmac_platform 
> videobuf2_memops videobuf2_v4l2 stmmac rockchipdrm videodev 
> snd_soc_rockchip_i2s_tdm snd_soc_rk817 pcs_xpcs panfrost rtc_rk808 
> rk805_pwrkey analogix_dp rk817_charger spi_rockchip_sfc dw_mipi_dsi 
> videobuf2_common dw_hdmi drm_shmem_helper mc gpu_sched 
> drm_display_helper ahci_dwc ip_tables x_tables ipv6
> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0-rc1+ #14563
> Hardware name: Hardkernel ODROID-M1 (DT)
> pstate: 800001c9 (Nzcv dAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : __read_sysreg_by_encoding+0x38c/0x390
> lr : read_scoped_sysreg+0x4c/0x70
> ...
> Call trace:
>   __read_sysreg_by_encoding+0x38c/0x390
>   read_scoped_sysreg+0x4c/0x70
>   has_nv1+0x18/0x48
>   verify_local_cpu_caps+0x54/0x124
>   check_local_cpu_capabilities+0x28/0x208
>   secondary_start_kernel+0xb0/0x154
>   __secondary_switched+0xb8/0xbc
> Code: d53802f3 17ffff3b d5380253 17ffff39 (d4210000)
> ---[ end trace 0000000000000000 ]---
> Kernel panic - not syncing: Attempted to kill the idle task!
> SMP: stopping secondary CPUs
> Kernel Offset: disabled
> CPU features: 0x1,80000000,80050295,2100721b
> Memory Limit: none
> ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
> 
> It looks that some additional checks are needed for the CPU hot-plug case.

Nah, that's much simpler.

I simply missed a spot where ID_AA64MMFR4_EL1 must be handled (it is
getting annoying that we have more than a single place where these
"read all the ID registers" are handled).

Can you please give the following a go? If that works for you (it does
for me here), I'll post an actual fix (plus another fix for another
buglet I just noticed).

Thanks,

	M.

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0f29ac43c7a2..2f8958f27e9e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1456,6 +1456,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
 	read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
 	read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
 	read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
+	read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
 	read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
 	read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
 	read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
Marek Szyprowski Feb. 12, 2024, 2:21 p.m. UTC | #5
On 12.02.2024 15:00, Marc Zyngier wrote:
> On Mon, 12 Feb 2024 12:48:35 +0000,
> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
>> On 22.01.2024 19:13, Marc Zyngier wrote:
>>> A variant of FEAT_E2H0 not being implemented exists in the form of
>>> HCR_EL2.E2H being RES1 *and* HCR_EL2.NV1 being RES0 (indicating that
>>> only VHE is supported on the host and nested guests).
>>>
>>> Add the necessary infrastructure for this new CPU capability.
>>>
>>> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>> ---
>> This patch landed recently in linux-next as commit da9af5071b25 ("arm64:
>> cpufeature: Detect HCR_EL2.NV1 being RES0"). I found that it causes a
>> following regression in the CPU hot-plug operation:
>>
>> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 1 >$i/online; done
>> ------------[ cut here ]------------
>> kernel BUG at arch/arm64/kernel/cpufeature.c:1468!
>> Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
>> Modules linked in: dw_hdmi_cec dw_hdmi_i2s_audio crct10dif_ce
>> snd_soc_simple_card rockchip_saradc industrialio_triggered_buffer
>> hantro_vpu kfifo_buf snd_soc_simple_card_utils rockchip_thermal
>> phy_rockchip_naneng_combphy display_connector gpio_ir_recv v4l2_vp9
>> v4l2_h264 v4l2_mem2mem dwmac_rk videobuf2_dma_contig stmmac_platform
>> videobuf2_memops videobuf2_v4l2 stmmac rockchipdrm videodev
>> snd_soc_rockchip_i2s_tdm snd_soc_rk817 pcs_xpcs panfrost rtc_rk808
>> rk805_pwrkey analogix_dp rk817_charger spi_rockchip_sfc dw_mipi_dsi
>> videobuf2_common dw_hdmi drm_shmem_helper mc gpu_sched
>> drm_display_helper ahci_dwc ip_tables x_tables ipv6
>> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0-rc1+ #14563
>> Hardware name: Hardkernel ODROID-M1 (DT)
>> pstate: 800001c9 (Nzcv dAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> pc : __read_sysreg_by_encoding+0x38c/0x390
>> lr : read_scoped_sysreg+0x4c/0x70
>> ...
>> Call trace:
>>    __read_sysreg_by_encoding+0x38c/0x390
>>    read_scoped_sysreg+0x4c/0x70
>>    has_nv1+0x18/0x48
>>    verify_local_cpu_caps+0x54/0x124
>>    check_local_cpu_capabilities+0x28/0x208
>>    secondary_start_kernel+0xb0/0x154
>>    __secondary_switched+0xb8/0xbc
>> Code: d53802f3 17ffff3b d5380253 17ffff39 (d4210000)
>> ---[ end trace 0000000000000000 ]---
>> Kernel panic - not syncing: Attempted to kill the idle task!
>> SMP: stopping secondary CPUs
>> Kernel Offset: disabled
>> CPU features: 0x1,80000000,80050295,2100721b
>> Memory Limit: none
>> ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
>>
>> It looks that some additional checks are needed for the CPU hot-plug case.
> Nah, that's much simpler.
>
> I simply missed a spot where ID_AA64MMFR4_EL1 must be handled (it is
> getting annoying that we have more than a single place where these
> "read all the ID registers" are handled).
>
> Can you please give the following a go? If that works for you (it does
> for me here), I'll post an actual fix (plus another fix for another
> buglet I just noticed).

Works fine now.

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Thanks,
>
> 	M.
>
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 0f29ac43c7a2..2f8958f27e9e 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1456,6 +1456,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
>   	read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
>   	read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
>   	read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
> +	read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
>   	read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
>   	read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
>   	read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
>
Best regards
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ad3753fbdcb1..91249d20883b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1794,6 +1794,11 @@  static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	return !meltdown_safe;
 }
 
+static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	return !has_cpuid_feature(entry, scope);
+}
+
 #if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
 static bool has_lpa2_at_stage1(u64 mmfr0)
 {
@@ -2794,6 +2799,13 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_lpa2,
 	},
+	{
+		.desc = "NV1",
+		.capability = ARM64_HAS_HCR_NV1,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_nv1,
+		ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
+	},
 	{},
 };
 
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index b912b1409fc0..65090dd34641 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,6 +35,7 @@  HAS_GENERIC_AUTH_IMP_DEF
 HAS_GIC_CPUIF_SYSREGS
 HAS_GIC_PRIO_MASKING
 HAS_GIC_PRIO_RELAXED_SYNC
+HAS_HCR_NV1
 HAS_HCX
 HAS_LDAPR
 HAS_LPA2