diff mbox

[v2,1/6] arm64: cpufeature: Allow early detect of specific features

Message ID 1516190084-18978-2-git-send-email-julien.thierry@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Julien Thierry Jan. 17, 2018, 11:54 a.m. UTC
From: Daniel Thompson <daniel.thompson@linaro.org>

Currently it is not possible to detect features of the boot CPU
until the other CPUs have been brought up.

This prevents us from reacting to features of the boot CPU until
fairly late in the boot process. To solve this we allow a subset
of features (that are likely to be common to all clusters) to be
detected based on the boot CPU alone.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
[julien.thierry@arm.com: check non-boot cpu missing early features, avoid
			 duplicates between early features and normal
			 features]
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 69 ++++++++++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 22 deletions(-)

--
1.9.1

Comments

Suzuki K Poulose Jan. 22, 2018, 12:05 p.m. UTC | #1
On 17/01/18 11:54, Julien Thierry wrote:
> From: Daniel Thompson <daniel.thompson@linaro.org>
> 
> Currently it is not possible to detect features of the boot CPU
> until the other CPUs have been brought up.
> 
> This prevents us from reacting to features of the boot CPU until
> fairly late in the boot process. To solve this we allow a subset
> of features (that are likely to be common to all clusters) to be
> detected based on the boot CPU alone.
> 
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
> 			 duplicates between early features and normal
> 			 features]
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> ---
>   arch/arm64/kernel/cpufeature.c | 69 ++++++++++++++++++++++++++++--------------
>   1 file changed, 47 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index a73a592..6698404 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -52,6 +52,8 @@
>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>   EXPORT_SYMBOL(cpu_hwcaps);
> 
> +static void __init setup_early_feature_capabilities(void);
> +
>   /*
>    * Flag to indicate if we have computed the system wide
>    * capabilities based on the boot time active CPUs. This
> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>   		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>   		sve_init_vq_map();
>   	}
> +
> +	setup_early_feature_capabilities();
>   }
> 
>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>   					ID_AA64PFR0_FP_SHIFT) < 0;
>   }
> 
> -static const struct arm64_cpu_capabilities arm64_features[] = {
> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>   	{
>   		.desc = "GIC system register CPU interface",
>   		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>   		.sign = FTR_UNSIGNED,
>   		.min_field_value = 1,
>   	},
> +	{}
> +};
> +


Julien,

One potential problem with this is that we don't have a way
to make this work on a "theoretical" system with and without
GIC system reg interface. i.e, if we don't have the CONFIG
enabled for using ICC system regs for IRQ flags, the kernel
could still panic. I understand this is not a "normal" configuration
but, may be we could make the panic option based on whether
we actually use the system regs early enough ?

Btw, I am rewriting the capabilities infrastructure to allow per-cap
control on how it should be treated. I might add an EARLY scope for
caps which could cover this and may be VHE.

Suzuki
Julien Thierry Jan. 22, 2018, 12:21 p.m. UTC | #2
On 22/01/18 12:05, Suzuki K Poulose wrote:
> On 17/01/18 11:54, Julien Thierry wrote:
>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>
>> Currently it is not possible to detect features of the boot CPU
>> until the other CPUs have been brought up.
>>
>> This prevents us from reacting to features of the boot CPU until
>> fairly late in the boot process. To solve this we allow a subset
>> of features (that are likely to be common to all clusters) to be
>> detected based on the boot CPU alone.
>>
>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>              duplicates between early features and normal
>>              features]
>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>> ---
>>   arch/arm64/kernel/cpufeature.c | 69 
>> ++++++++++++++++++++++++++++--------------
>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpufeature.c 
>> b/arch/arm64/kernel/cpufeature.c
>> index a73a592..6698404 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -52,6 +52,8 @@
>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>   EXPORT_SYMBOL(cpu_hwcaps);
>>
>> +static void __init setup_early_feature_capabilities(void);
>> +
>>   /*
>>    * Flag to indicate if we have computed the system wide
>>    * capabilities based on the boot time active CPUs. This
>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 
>> *info)
>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>           sve_init_vq_map();
>>       }
>> +
>> +    setup_early_feature_capabilities();
>>   }
>>
>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct 
>> arm64_cpu_capabilities *entry, int __unus
>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>   }
>>
>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>       {
>>           .desc = "GIC system register CPU interface",
>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct 
>> arm64_cpu_capabilities *entry, int __unus
>>           .sign = FTR_UNSIGNED,
>>           .min_field_value = 1,
>>       },
>> +    {}
>> +};
>> +
> 
> 
> Julien,
> 
> One potential problem with this is that we don't have a way
> to make this work on a "theoretical" system with and without
> GIC system reg interface. i.e, if we don't have the CONFIG
> enabled for using ICC system regs for IRQ flags, the kernel
> could still panic. I understand this is not a "normal" configuration
> but, may be we could make the panic option based on whether
> we actually use the system regs early enough ?
> 

I see, however I'm not sure what happens in the GIC drivers if we have a 
CPU running with a GICv3 and other CPUs with something else... But of 
course this is not technically limited by the arm64 capabilities handling.

What behaviour would you be looking for? A way to prevent the CPU to be 
brought up instead of panicking?

> Btw, I am rewriting the capabilities infrastructure to allow per-cap
> control on how it should be treated. I might add an EARLY scope for
> caps which could cover this and may be VHE.

Thanks,
Daniel Thompson Jan. 22, 2018, 1:38 p.m. UTC | #3
On Mon, Jan 22, 2018 at 12:21:55PM +0000, Julien Thierry wrote:
> On 22/01/18 12:05, Suzuki K Poulose wrote:
> > On 17/01/18 11:54, Julien Thierry wrote:
> > > From: Daniel Thompson <daniel.thompson@linaro.org>
> > > 
> > > Currently it is not possible to detect features of the boot CPU
> > > until the other CPUs have been brought up.
> > > 
> > > This prevents us from reacting to features of the boot CPU until
> > > fairly late in the boot process. To solve this we allow a subset
> > > of features (that are likely to be common to all clusters) to be
> > > detected based on the boot CPU alone.
> > > 
> > > Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> > > [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
> > >              duplicates between early features and normal
> > >              features]
> > > Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > Cc: Will Deacon <will.deacon@arm.com>
> > > Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> > > ---
> > >   arch/arm64/kernel/cpufeature.c | 69
> > > ++++++++++++++++++++++++++++--------------
> > >   1 file changed, 47 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kernel/cpufeature.c
> > > b/arch/arm64/kernel/cpufeature.c
> > > index a73a592..6698404 100644
> > > --- a/arch/arm64/kernel/cpufeature.c
> > > +++ b/arch/arm64/kernel/cpufeature.c
> > > @@ -52,6 +52,8 @@
> > >   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
> > >   EXPORT_SYMBOL(cpu_hwcaps);
> > > 
> > > +static void __init setup_early_feature_capabilities(void);
> > > +
> > >   /*
> > >    * Flag to indicate if we have computed the system wide
> > >    * capabilities based on the boot time active CPUs. This
> > > @@ -542,6 +544,8 @@ void __init init_cpu_features(struct
> > > cpuinfo_arm64 *info)
> > >           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
> > >           sve_init_vq_map();
> > >       }
> > > +
> > > +    setup_early_feature_capabilities();
> > >   }
> > > 
> > >   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
> > > @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct
> > > arm64_cpu_capabilities *entry, int __unus
> > >                       ID_AA64PFR0_FP_SHIFT) < 0;
> > >   }
> > > 
> > > -static const struct arm64_cpu_capabilities arm64_features[] = {
> > > +static const struct arm64_cpu_capabilities arm64_early_features[] = {
> > >       {
> > >           .desc = "GIC system register CPU interface",
> > >           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
> > > @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct
> > > arm64_cpu_capabilities *entry, int __unus
> > >           .sign = FTR_UNSIGNED,
> > >           .min_field_value = 1,
> > >       },
> > > +    {}
> > > +};
> > > +
> > 
> > 
> > Julien,
> > 
> > One potential problem with this is that we don't have a way
> > to make this work on a "theoretical" system with and without
> > GIC system reg interface. i.e, if we don't have the CONFIG
> > enabled for using ICC system regs for IRQ flags, the kernel
> > could still panic. I understand this is not a "normal" configuration
> > but, may be we could make the panic option based on whether
> > we actually use the system regs early enough ?
> > 
> 
> I see, however I'm not sure what happens in the GIC drivers if we have a CPU
> running with a GICv3 and other CPUs with something else... But of course
> this is not technically limited by the arm64 capabilities handling.

Shouldn't each CPU be sharing the same GIC anyway? It so its not some
have GICv3+ and some have GICv2. The theoretical system described above 
*has* a GICv3+ but some participants in the cluster are not able to 
talk to it as like a co-processor.

The ARM ARM is a little vague about whether, if a GIC implements a
system register interface, then a core must provide access to it. Even
so, first question is whether such a system is architecture compliant?


Daniel.


> What behaviour would you be looking for? A way to prevent the CPU to be
> brought up instead of panicking?
> 
> > Btw, I am rewriting the capabilities infrastructure to allow per-cap
> > control on how it should be treated. I might add an EARLY scope for
> > caps which could cover this and may be VHE.
> 
> Thanks,
> 
> -- 
> Julien Thierry
Marc Zyngier Jan. 22, 2018, 1:57 p.m. UTC | #4
On 22/01/18 13:38, Daniel Thompson wrote:
> On Mon, Jan 22, 2018 at 12:21:55PM +0000, Julien Thierry wrote:
>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>
>>>> Currently it is not possible to detect features of the boot CPU
>>>> until the other CPUs have been brought up.
>>>>
>>>> This prevents us from reacting to features of the boot CPU until
>>>> fairly late in the boot process. To solve this we allow a subset
>>>> of features (that are likely to be common to all clusters) to be
>>>> detected based on the boot CPU alone.
>>>>
>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>>>              duplicates between early features and normal
>>>>              features]
>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>> ---
>>>>   arch/arm64/kernel/cpufeature.c | 69
>>>> ++++++++++++++++++++++++++++--------------
>>>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kernel/cpufeature.c
>>>> b/arch/arm64/kernel/cpufeature.c
>>>> index a73a592..6698404 100644
>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>> @@ -52,6 +52,8 @@
>>>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>   EXPORT_SYMBOL(cpu_hwcaps);
>>>>
>>>> +static void __init setup_early_feature_capabilities(void);
>>>> +
>>>>   /*
>>>>    * Flag to indicate if we have computed the system wide
>>>>    * capabilities based on the boot time active CPUs. This
>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct
>>>> cpuinfo_arm64 *info)
>>>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>           sve_init_vq_map();
>>>>       }
>>>> +
>>>> +    setup_early_feature_capabilities();
>>>>   }
>>>>
>>>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct
>>>> arm64_cpu_capabilities *entry, int __unus
>>>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>>>   }
>>>>
>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>>       {
>>>>           .desc = "GIC system register CPU interface",
>>>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct
>>>> arm64_cpu_capabilities *entry, int __unus
>>>>           .sign = FTR_UNSIGNED,
>>>>           .min_field_value = 1,
>>>>       },
>>>> +    {}
>>>> +};
>>>> +
>>>
>>>
>>> Julien,
>>>
>>> One potential problem with this is that we don't have a way
>>> to make this work on a "theoretical" system with and without
>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>> enabled for using ICC system regs for IRQ flags, the kernel
>>> could still panic. I understand this is not a "normal" configuration
>>> but, may be we could make the panic option based on whether
>>> we actually use the system regs early enough ?
>>>
>>
>> I see, however I'm not sure what happens in the GIC drivers if we have a CPU
>> running with a GICv3 and other CPUs with something else... But of course
>> this is not technically limited by the arm64 capabilities handling.
> 
> Shouldn't each CPU be sharing the same GIC anyway? It so its not some
> have GICv3+ and some have GICv2. The theoretical system described above 
> *has* a GICv3+ but some participants in the cluster are not able to 
> talk to it as like a co-processor.

There is some level of confusion between the GIC CPU interface (which is
really in the CPU) and the GIC itself. You can easily end-up in a
situation where you do have the HW, but it is configured in a way that
prevents you from using it. Case in point: GICv3 with GICv2
compatibility used in virtualization.

> The ARM ARM is a little vague about whether, if a GIC implements a
> system register interface, then a core must provide access to it. Even
> so, first question is whether such a system is architecture compliant?

Again, it is not the GIC that implements the system registers. And no,
these system registers are not required to be accessible (see
ICC_SRE_EL2.Enable == 0 for example).

So I believe there is value in checking those as early as possible, and
set the expectations accordingly (such as in [1] and [2]).

Thanks,

	M.

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/irqchip/irq-gic-v3.c#n536
[2]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/kernel/cpufeature.c#n798
Julien Thierry Jan. 22, 2018, 2:14 p.m. UTC | #5
On 22/01/18 13:57, Marc Zyngier wrote:
> On 22/01/18 13:38, Daniel Thompson wrote:
>> On Mon, Jan 22, 2018 at 12:21:55PM +0000, Julien Thierry wrote:
>>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>
>>>>> Currently it is not possible to detect features of the boot CPU
>>>>> until the other CPUs have been brought up.
>>>>>
>>>>> This prevents us from reacting to features of the boot CPU until
>>>>> fairly late in the boot process. To solve this we allow a subset
>>>>> of features (that are likely to be common to all clusters) to be
>>>>> detected based on the boot CPU alone.
>>>>>
>>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>>>>               duplicates between early features and normal
>>>>>               features]
>>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>>> ---
>>>>>    arch/arm64/kernel/cpufeature.c | 69
>>>>> ++++++++++++++++++++++++++++--------------
>>>>>    1 file changed, 47 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/kernel/cpufeature.c
>>>>> b/arch/arm64/kernel/cpufeature.c
>>>>> index a73a592..6698404 100644
>>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>>> @@ -52,6 +52,8 @@
>>>>>    DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>>    EXPORT_SYMBOL(cpu_hwcaps);
>>>>>
>>>>> +static void __init setup_early_feature_capabilities(void);
>>>>> +
>>>>>    /*
>>>>>     * Flag to indicate if we have computed the system wide
>>>>>     * capabilities based on the boot time active CPUs. This
>>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct
>>>>> cpuinfo_arm64 *info)
>>>>>            init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>>            sve_init_vq_map();
>>>>>        }
>>>>> +
>>>>> +    setup_early_feature_capabilities();
>>>>>    }
>>>>>
>>>>>    static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct
>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>                        ID_AA64PFR0_FP_SHIFT) < 0;
>>>>>    }
>>>>>
>>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>>>        {
>>>>>            .desc = "GIC system register CPU interface",
>>>>>            .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct
>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>            .sign = FTR_UNSIGNED,
>>>>>            .min_field_value = 1,
>>>>>        },
>>>>> +    {}
>>>>> +};
>>>>> +
>>>>
>>>>
>>>> Julien,
>>>>
>>>> One potential problem with this is that we don't have a way
>>>> to make this work on a "theoretical" system with and without
>>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>>> enabled for using ICC system regs for IRQ flags, the kernel
>>>> could still panic. I understand this is not a "normal" configuration
>>>> but, may be we could make the panic option based on whether
>>>> we actually use the system regs early enough ?
>>>>
>>>
>>> I see, however I'm not sure what happens in the GIC drivers if we have a CPU
>>> running with a GICv3 and other CPUs with something else... But of course
>>> this is not technically limited by the arm64 capabilities handling.
>>
>> Shouldn't each CPU be sharing the same GIC anyway? It so its not some
>> have GICv3+ and some have GICv2. The theoretical system described above
>> *has* a GICv3+ but some participants in the cluster are not able to
>> talk to it as like a co-processor.
> 
> There is some level of confusion between the GIC CPU interface (which is
> really in the CPU) and the GIC itself. You can easily end-up in a
> situation where you do have the HW, but it is configured in a way that
> prevents you from using it. Case in point: GICv3 with GICv2
> compatibility used in virtualization.
> 
>> The ARM ARM is a little vague about whether, if a GIC implements a
>> system register interface, then a core must provide access to it. Even
>> so, first question is whether such a system is architecture compliant?
> 
> Again, it is not the GIC that implements the system registers. And no,
> these system registers are not required to be accessible (see
> ICC_SRE_EL2.Enable == 0 for example).
> 
> So I believe there is value in checking those as early as possible, and
> set the expectations accordingly (such as in [1] and [2]).
> 

So in the end, if we boot on a CPU that can access ICC_CPUIF, it looks 
like we'll prevent bringing up the CPUs that cannot access the 
ICC_CPUIF, and if we boot on a CPU that cannot access ICC_CPUIF, 
everything that gets brought up afterwards will be run on GICv2 
compatibility mode?
We never run different GIC driver on different CPUs, right?


In the patch, check_early_cpu_features panics when features don't match, 
but nothing really prevents us to use cpu_die_early instead.

Would that solve the issue Suzuki?

Cheers,
Marc Zyngier Jan. 22, 2018, 2:20 p.m. UTC | #6
On 22/01/18 14:14, Julien Thierry wrote:
> 
> 
> On 22/01/18 13:57, Marc Zyngier wrote:
>> On 22/01/18 13:38, Daniel Thompson wrote:
>>> On Mon, Jan 22, 2018 at 12:21:55PM +0000, Julien Thierry wrote:
>>>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>>
>>>>>> Currently it is not possible to detect features of the boot CPU
>>>>>> until the other CPUs have been brought up.
>>>>>>
>>>>>> This prevents us from reacting to features of the boot CPU until
>>>>>> fairly late in the boot process. To solve this we allow a subset
>>>>>> of features (that are likely to be common to all clusters) to be
>>>>>> detected based on the boot CPU alone.
>>>>>>
>>>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>>>>>               duplicates between early features and normal
>>>>>>               features]
>>>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>>>> ---
>>>>>>    arch/arm64/kernel/cpufeature.c | 69
>>>>>> ++++++++++++++++++++++++++++--------------
>>>>>>    1 file changed, 47 insertions(+), 22 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/kernel/cpufeature.c
>>>>>> b/arch/arm64/kernel/cpufeature.c
>>>>>> index a73a592..6698404 100644
>>>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>>>> @@ -52,6 +52,8 @@
>>>>>>    DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>>>    EXPORT_SYMBOL(cpu_hwcaps);
>>>>>>
>>>>>> +static void __init setup_early_feature_capabilities(void);
>>>>>> +
>>>>>>    /*
>>>>>>     * Flag to indicate if we have computed the system wide
>>>>>>     * capabilities based on the boot time active CPUs. This
>>>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct
>>>>>> cpuinfo_arm64 *info)
>>>>>>            init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>>>            sve_init_vq_map();
>>>>>>        }
>>>>>> +
>>>>>> +    setup_early_feature_capabilities();
>>>>>>    }
>>>>>>
>>>>>>    static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct
>>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>>                        ID_AA64PFR0_FP_SHIFT) < 0;
>>>>>>    }
>>>>>>
>>>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>>>>        {
>>>>>>            .desc = "GIC system register CPU interface",
>>>>>>            .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct
>>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>>            .sign = FTR_UNSIGNED,
>>>>>>            .min_field_value = 1,
>>>>>>        },
>>>>>> +    {}
>>>>>> +};
>>>>>> +
>>>>>
>>>>>
>>>>> Julien,
>>>>>
>>>>> One potential problem with this is that we don't have a way
>>>>> to make this work on a "theoretical" system with and without
>>>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>>>> enabled for using ICC system regs for IRQ flags, the kernel
>>>>> could still panic. I understand this is not a "normal" configuration
>>>>> but, may be we could make the panic option based on whether
>>>>> we actually use the system regs early enough ?
>>>>>
>>>>
>>>> I see, however I'm not sure what happens in the GIC drivers if we have a CPU
>>>> running with a GICv3 and other CPUs with something else... But of course
>>>> this is not technically limited by the arm64 capabilities handling.
>>>
>>> Shouldn't each CPU be sharing the same GIC anyway? It so its not some
>>> have GICv3+ and some have GICv2. The theoretical system described above
>>> *has* a GICv3+ but some participants in the cluster are not able to
>>> talk to it as like a co-processor.
>>
>> There is some level of confusion between the GIC CPU interface (which is
>> really in the CPU) and the GIC itself. You can easily end-up in a
>> situation where you do have the HW, but it is configured in a way that
>> prevents you from using it. Case in point: GICv3 with GICv2
>> compatibility used in virtualization.
>>
>>> The ARM ARM is a little vague about whether, if a GIC implements a
>>> system register interface, then a core must provide access to it. Even
>>> so, first question is whether such a system is architecture compliant?
>>
>> Again, it is not the GIC that implements the system registers. And no,
>> these system registers are not required to be accessible (see
>> ICC_SRE_EL2.Enable == 0 for example).
>>
>> So I believe there is value in checking those as early as possible, and
>> set the expectations accordingly (such as in [1] and [2]).
>>
> 
> So in the end, if we boot on a CPU that can access ICC_CPUIF, it looks 
> like we'll prevent bringing up the CPUs that cannot access the 
> ICC_CPUIF, 

Correct.

> and if we boot on a CPU that cannot access ICC_CPUIF, 
> everything that gets brought up afterwards will be run on GICv2 
> compatibility mode?

Probably not, as I assume the firmware still gives you the description
of a GICv3, so things will grind to a halt at that point.

> We never run different GIC driver on different CPUs, right?

We don't. And please stop giving people horrible ideas! ;-)

Thanks,

	M.

> In the patch, check_early_cpu_features panics when features don't match, 
> but nothing really prevents us to use cpu_die_early instead.
> 
> Would that solve the issue Suzuki?
> 
> Cheers,
>
Suzuki K Poulose Jan. 22, 2018, 2:45 p.m. UTC | #7
On 22/01/18 12:21, Julien Thierry wrote:
> 
> 
> On 22/01/18 12:05, Suzuki K Poulose wrote:
>> On 17/01/18 11:54, Julien Thierry wrote:
>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>
>>> Currently it is not possible to detect features of the boot CPU
>>> until the other CPUs have been brought up.
>>>
>>> This prevents us from reacting to features of the boot CPU until
>>> fairly late in the boot process. To solve this we allow a subset
>>> of features (that are likely to be common to all clusters) to be
>>> detected based on the boot CPU alone.
>>>
>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>>              duplicates between early features and normal
>>>              features]
>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will.deacon@arm.com>
>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>> ---
>>>   arch/arm64/kernel/cpufeature.c | 69 ++++++++++++++++++++++++++++--------------
>>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>> index a73a592..6698404 100644
>>> --- a/arch/arm64/kernel/cpufeature.c
>>> +++ b/arch/arm64/kernel/cpufeature.c
>>> @@ -52,6 +52,8 @@
>>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>   EXPORT_SYMBOL(cpu_hwcaps);
>>>
>>> +static void __init setup_early_feature_capabilities(void);
>>> +
>>>   /*
>>>    * Flag to indicate if we have computed the system wide
>>>    * capabilities based on the boot time active CPUs. This
>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>           sve_init_vq_map();
>>>       }
>>> +
>>> +    setup_early_feature_capabilities();
>>>   }
>>>
>>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>>   }
>>>
>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>       {
>>>           .desc = "GIC system register CPU interface",
>>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>>>           .sign = FTR_UNSIGNED,
>>>           .min_field_value = 1,
>>>       },
>>> +    {}
>>> +};
>>> +
>>
>>
>> Julien,
>>
>> One potential problem with this is that we don't have a way
>> to make this work on a "theoretical" system with and without
>> GIC system reg interface. i.e, if we don't have the CONFIG
>> enabled for using ICC system regs for IRQ flags, the kernel
>> could still panic. I understand this is not a "normal" configuration
>> but, may be we could make the panic option based on whether
>> we actually use the system regs early enough ?
>>
> 
> I see, however I'm not sure what happens in the GIC drivers if we have a CPU running with a GICv3 and other CPUs with something else... But of course this is not technically limited by the arm64 capabilities handling.
> 
> What behaviour would you be looking for? A way to prevent the CPU to be brought up instead of panicking?
> 

If we have the CONFIG enabled for using system regs, we can continue
to panic the system. Otherwise, we should ignore the mismatch early,
as we don't use the system register access unless all boot time active
CPUs have it.

In a nutshell, this is an early feature only if the CONFIG is enabled,
otherwise should fall back to the normal behavior.

Cheers
Suzuki
Julien Thierry Jan. 22, 2018, 3:01 p.m. UTC | #8
On 22/01/18 14:45, Suzuki K Poulose wrote:
> On 22/01/18 12:21, Julien Thierry wrote:
>>
>>
>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>
>>>> Currently it is not possible to detect features of the boot CPU
>>>> until the other CPUs have been brought up.
>>>>
>>>> This prevents us from reacting to features of the boot CPU until
>>>> fairly late in the boot process. To solve this we allow a subset
>>>> of features (that are likely to be common to all clusters) to be
>>>> detected based on the boot CPU alone.
>>>>
>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>> [julien.thierry@arm.com: check non-boot cpu missing early features, 
>>>> avoid
>>>>              duplicates between early features and normal
>>>>              features]
>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>> ---
>>>>   arch/arm64/kernel/cpufeature.c | 69 
>>>> ++++++++++++++++++++++++++++--------------
>>>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kernel/cpufeature.c 
>>>> b/arch/arm64/kernel/cpufeature.c
>>>> index a73a592..6698404 100644
>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>> @@ -52,6 +52,8 @@
>>>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>   EXPORT_SYMBOL(cpu_hwcaps);
>>>>
>>>> +static void __init setup_early_feature_capabilities(void);
>>>> +
>>>>   /*
>>>>    * Flag to indicate if we have computed the system wide
>>>>    * capabilities based on the boot time active CPUs. This
>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct 
>>>> cpuinfo_arm64 *info)
>>>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>           sve_init_vq_map();
>>>>       }
>>>> +
>>>> +    setup_early_feature_capabilities();
>>>>   }
>>>>
>>>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct 
>>>> arm64_cpu_capabilities *entry, int __unus
>>>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>>>   }
>>>>
>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>>       {
>>>>           .desc = "GIC system register CPU interface",
>>>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct 
>>>> arm64_cpu_capabilities *entry, int __unus
>>>>           .sign = FTR_UNSIGNED,
>>>>           .min_field_value = 1,
>>>>       },
>>>> +    {}
>>>> +};
>>>> +
>>>
>>>
>>> Julien,
>>>
>>> One potential problem with this is that we don't have a way
>>> to make this work on a "theoretical" system with and without
>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>> enabled for using ICC system regs for IRQ flags, the kernel
>>> could still panic. I understand this is not a "normal" configuration
>>> but, may be we could make the panic option based on whether
>>> we actually use the system regs early enough ?
>>>
>>
>> I see, however I'm not sure what happens in the GIC drivers if we have 
>> a CPU running with a GICv3 and other CPUs with something else... But 
>> of course this is not technically limited by the arm64 capabilities 
>> handling.
>>
>> What behaviour would you be looking for? A way to prevent the CPU to 
>> be brought up instead of panicking?
>>
> 
> If we have the CONFIG enabled for using system regs, we can continue
> to panic the system. Otherwise, we should ignore the mismatch early,
> as we don't use the system register access unless all boot time active
> CPUs have it.
> 

Hmmm, we use the CPUIF (if available) in the first CPU pretty much as 
soon as we re-enable interrupts in the GICv3 driver, which is way before 
the other CPUs are brought up.

other CPUs get to die_early().

> In a nutshell, this is an early feature only if the CONFIG is enabled,
> otherwise should fall back to the normal behavior.
> 

Maybe we should just not panic and let the mismatching CPUs die.
It's a system wide feature and linux will try to make the other CPUs 
match the boot CPU's config anyway.
Suzuki K Poulose Jan. 22, 2018, 3:13 p.m. UTC | #9
On 22/01/18 15:01, Julien Thierry wrote:
> 
> 
> On 22/01/18 14:45, Suzuki K Poulose wrote:
>> On 22/01/18 12:21, Julien Thierry wrote:
>>>
>>>
>>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>
>>>>> Currently it is not possible to detect features of the boot CPU
>>>>> until the other CPUs have been brought up.
>>>>>
>>>>> This prevents us from reacting to features of the boot CPU until
>>>>> fairly late in the boot process. To solve this we allow a subset
>>>>> of features (that are likely to be common to all clusters) to be
>>>>> detected based on the boot CPU alone.
>>>>>
>>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>>> [julien.thierry@arm.com: check non-boot cpu missing early features, avoid
>>>>>              duplicates between early features and normal
>>>>>              features]
>>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>>> ---
>>>>>   arch/arm64/kernel/cpufeature.c | 69 ++++++++++++++++++++++++++++--------------
>>>>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>>>> index a73a592..6698404 100644
>>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>>> @@ -52,6 +52,8 @@
>>>>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>>   EXPORT_SYMBOL(cpu_hwcaps);
>>>>>
>>>>> +static void __init setup_early_feature_capabilities(void);
>>>>> +
>>>>>   /*
>>>>>    * Flag to indicate if we have computed the system wide
>>>>>    * capabilities based on the boot time active CPUs. This
>>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>>>>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>>           sve_init_vq_map();
>>>>>       }
>>>>> +
>>>>> +    setup_early_feature_capabilities();
>>>>>   }
>>>>>
>>>>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>>>>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>>>>   }
>>>>>
>>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] = {
>>>>>       {
>>>>>           .desc = "GIC system register CPU interface",
>>>>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
>>>>>           .sign = FTR_UNSIGNED,
>>>>>           .min_field_value = 1,
>>>>>       },
>>>>> +    {}
>>>>> +};
>>>>> +
>>>>
>>>>
>>>> Julien,
>>>>
>>>> One potential problem with this is that we don't have a way
>>>> to make this work on a "theoretical" system with and without
>>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>>> enabled for using ICC system regs for IRQ flags, the kernel
>>>> could still panic. I understand this is not a "normal" configuration
>>>> but, may be we could make the panic option based on whether
>>>> we actually use the system regs early enough ?
>>>>
>>>
>>> I see, however I'm not sure what happens in the GIC drivers if we have a CPU running with a GICv3 and other CPUs with something else... But of course this is not technically limited by the arm64 capabilities handling.
>>>
>>> What behaviour would you be looking for? A way to prevent the CPU to be brought up instead of panicking?
>>>
>>
>> If we have the CONFIG enabled for using system regs, we can continue
>> to panic the system. Otherwise, we should ignore the mismatch early,
>> as we don't use the system register access unless all boot time active
>> CPUs have it.
>>
> 
> Hmmm, we use the CPUIF (if available) in the first CPU pretty much as soon as we re-enable interrupts in the GICv3 driver, which is way before the other CPUs are brought up.

Isn't this CPUIF access an alternative, patched only when CPUIF feature
enabled ? (which is done only after all the allowed SMP CPUs are brought up )
> 
> other CPUs get to die_early().

Really ? I thought only late CPUs are sent to die_early().

> 
>> In a nutshell, this is an early feature only if the CONFIG is enabled,
>> otherwise should fall back to the normal behavior.
>>
> 
> Maybe we should just not panic and let the mismatching CPUs die.
> It's a system wide feature and linux will try to make the other CPUs match the boot CPU's config anyway.
> 

Suzuki
Julien Thierry Jan. 22, 2018, 3:23 p.m. UTC | #10
On 22/01/18 15:13, Suzuki K Poulose wrote:
> On 22/01/18 15:01, Julien Thierry wrote:
>>
>>
>> On 22/01/18 14:45, Suzuki K Poulose wrote:
>>> On 22/01/18 12:21, Julien Thierry wrote:
>>>>
>>>>
>>>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>>
>>>>>> Currently it is not possible to detect features of the boot CPU
>>>>>> until the other CPUs have been brought up.
>>>>>>
>>>>>> This prevents us from reacting to features of the boot CPU until
>>>>>> fairly late in the boot process. To solve this we allow a subset
>>>>>> of features (that are likely to be common to all clusters) to be
>>>>>> detected based on the boot CPU alone.
>>>>>>
>>>>>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>>>>>> [julien.thierry@arm.com: check non-boot cpu missing early 
>>>>>> features, avoid
>>>>>>              duplicates between early features and normal
>>>>>>              features]
>>>>>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>>>> ---
>>>>>>   arch/arm64/kernel/cpufeature.c | 69 
>>>>>> ++++++++++++++++++++++++++++--------------
>>>>>>   1 file changed, 47 insertions(+), 22 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/kernel/cpufeature.c 
>>>>>> b/arch/arm64/kernel/cpufeature.c
>>>>>> index a73a592..6698404 100644
>>>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>>>> @@ -52,6 +52,8 @@
>>>>>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>>>>>>   EXPORT_SYMBOL(cpu_hwcaps);
>>>>>>
>>>>>> +static void __init setup_early_feature_capabilities(void);
>>>>>> +
>>>>>>   /*
>>>>>>    * Flag to indicate if we have computed the system wide
>>>>>>    * capabilities based on the boot time active CPUs. This
>>>>>> @@ -542,6 +544,8 @@ void __init init_cpu_features(struct 
>>>>>> cpuinfo_arm64 *info)
>>>>>>           init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
>>>>>>           sve_init_vq_map();
>>>>>>       }
>>>>>> +
>>>>>> +    setup_early_feature_capabilities();
>>>>>>   }
>>>>>>
>>>>>>   static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
>>>>>> @@ -846,7 +850,7 @@ static bool has_no_fpsimd(const struct 
>>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>>                       ID_AA64PFR0_FP_SHIFT) < 0;
>>>>>>   }
>>>>>>
>>>>>> -static const struct arm64_cpu_capabilities arm64_features[] = {
>>>>>> +static const struct arm64_cpu_capabilities arm64_early_features[] 
>>>>>> = {
>>>>>>       {
>>>>>>           .desc = "GIC system register CPU interface",
>>>>>>           .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
>>>>>> @@ -857,6 +861,10 @@ static bool has_no_fpsimd(const struct 
>>>>>> arm64_cpu_capabilities *entry, int __unus
>>>>>>           .sign = FTR_UNSIGNED,
>>>>>>           .min_field_value = 1,
>>>>>>       },
>>>>>> +    {}
>>>>>> +};
>>>>>> +
>>>>>
>>>>>
>>>>> Julien,
>>>>>
>>>>> One potential problem with this is that we don't have a way
>>>>> to make this work on a "theoretical" system with and without
>>>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>>>> enabled for using ICC system regs for IRQ flags, the kernel
>>>>> could still panic. I understand this is not a "normal" configuration
>>>>> but, may be we could make the panic option based on whether
>>>>> we actually use the system regs early enough ?
>>>>>
>>>>
>>>> I see, however I'm not sure what happens in the GIC drivers if we 
>>>> have a CPU running with a GICv3 and other CPUs with something 
>>>> else... But of course this is not technically limited by the arm64 
>>>> capabilities handling.
>>>>
>>>> What behaviour would you be looking for? A way to prevent the CPU to 
>>>> be brought up instead of panicking?
>>>>
>>>
>>> If we have the CONFIG enabled for using system regs, we can continue
>>> to panic the system. Otherwise, we should ignore the mismatch early,
>>> as we don't use the system register access unless all boot time active
>>> CPUs have it.
>>>
>>
>> Hmmm, we use the CPUIF (if available) in the first CPU pretty much as 
>> soon as we re-enable interrupts in the GICv3 driver, which is way 
>> before the other CPUs are brought up.
> 
> Isn't this CPUIF access an alternative, patched only when CPUIF feature
> enabled ? (which is done only after all the allowed SMP CPUs are brought 
> up )

The GICv3 doesn't rely on the alternatives, most of the operations are 
done via the CPUIF (ack IRQ, eoi, send sgi, etc ...).

So once GICv3 has been successfully probed and interrupts enabled, CPUIF 
might get used by the GICv3 driver.

>>
>> other CPUs get to die_early().
> 
> Really ? I thought only late CPUs are sent to die_early().

Hmmm, I might be wrong here but that was my understanding of the call to 
verify_local_cpu_features in verify_local_cpu_capabilities.

>>
>>> In a nutshell, this is an early feature only if the CONFIG is enabled,
>>> otherwise should fall back to the normal behavior.
>>>
>>
>> Maybe we should just not panic and let the mismatching CPUs die.
>> It's a system wide feature and linux will try to make the other CPUs 
>> match the boot CPU's config anyway.
>>
> 
> Suzuki
Suzuki K Poulose Jan. 22, 2018, 3:34 p.m. UTC | #11
On 22/01/18 15:23, Julien Thierry wrote:
> 
> 
> On 22/01/18 15:13, Suzuki K Poulose wrote:
>> On 22/01/18 15:01, Julien Thierry wrote:
>>>
>>>
>>> On 22/01/18 14:45, Suzuki K Poulose wrote:
>>>> On 22/01/18 12:21, Julien Thierry wrote:
>>>>>
>>>>>
>>>>> On 22/01/18 12:05, Suzuki K Poulose wrote:
>>>>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>>>>> From: Daniel Thompson <daniel.thompson@linaro.org>

>>>>>> Julien,
>>>>>>
>>>>>> One potential problem with this is that we don't have a way
>>>>>> to make this work on a "theoretical" system with and without
>>>>>> GIC system reg interface. i.e, if we don't have the CONFIG
>>>>>> enabled for using ICC system regs for IRQ flags, the kernel
>>>>>> could still panic. I understand this is not a "normal" configuration
>>>>>> but, may be we could make the panic option based on whether
>>>>>> we actually use the system regs early enough ?
>>>>>>
>>>>>
>>>>> I see, however I'm not sure what happens in the GIC drivers if we have a CPU running with a GICv3 and other CPUs with something else... But of course this is not technically limited by the arm64 capabilities handling.
>>>>>
>>>>> What behaviour would you be looking for? A way to prevent the CPU to be brought up instead of panicking?
>>>>>
>>>>
>>>> If we have the CONFIG enabled for using system regs, we can continue
>>>> to panic the system. Otherwise, we should ignore the mismatch early,
>>>> as we don't use the system register access unless all boot time active
>>>> CPUs have it.
>>>>
>>>
>>> Hmmm, we use the CPUIF (if available) in the first CPU pretty much as soon as we re-enable interrupts in the GICv3 driver, which is way before the other CPUs are brought up.
>>
>> Isn't this CPUIF access an alternative, patched only when CPUIF feature
>> enabled ? (which is done only after all the allowed SMP CPUs are brought up )
> 
> The GICv3 doesn't rely on the alternatives, most of the operations are done via the CPUIF (ack IRQ, eoi, send sgi, etc ...).
> 
> So once GICv3 has been successfully probed and interrupts enabled, CPUIF might get used by the GICv3 driver.
> 

Aha, OK. I am sorry. I was thinking that the ARM64_HAS_SYSREG_GIC_CPUIF was used just for that.
In that case, I think you are not breaking any current behavior, so thats fine.

>>>
>>> other CPUs get to die_early().
>>
>> Really ? I thought only late CPUs are sent to die_early().
> 
> Hmmm, I might be wrong here but that was my understanding of the call to verify_local_cpu_features in verify_local_cpu_capabilities.
> 

The verify_local_cpu_features() is invoked only if the CPU is brought up late from userspace,
after we have finalised the system wide capabilities.

Sorry for the noise.

Suzuki
diff mbox

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a73a592..6698404 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -52,6 +52,8 @@ 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcaps);

+static void __init setup_early_feature_capabilities(void);
+
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
@@ -542,6 +544,8 @@  void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
 		sve_init_vq_map();
 	}
+
+	setup_early_feature_capabilities();
 }

 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -846,7 +850,7 @@  static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 					ID_AA64PFR0_FP_SHIFT) < 0;
 }

-static const struct arm64_cpu_capabilities arm64_features[] = {
+static const struct arm64_cpu_capabilities arm64_early_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
 		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
@@ -857,6 +861,10 @@  static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
+	{}
+};
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
 #ifdef CONFIG_ARM64_PAN
 	{
 		.desc = "Privileged Access Never",
@@ -1111,6 +1119,29 @@  void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 	}
 }

+/* Returns false on a capability mismatch */
+static bool
+verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
+{
+	for (; caps->matches; caps++) {
+		if (!cpus_have_cap(caps->capability))
+			continue;
+		/*
+		 * If the new CPU misses an advertised feature, we cannot
+		 * proceed further, park the cpu.
+		 */
+		if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
+			pr_crit("CPU%d: missing feature: %s\n",
+					smp_processor_id(), caps->desc);
+			return false;
+		}
+		if (caps->enable)
+			caps->enable(NULL);
+	}
+
+	return true;
+}
+
 /*
  * Check for CPU features that are used in early boot
  * based on the Boot CPU value.
@@ -1119,6 +1150,9 @@  static void check_early_cpu_features(void)
 {
 	verify_cpu_run_el();
 	verify_cpu_asid_bits();
+
+	if (!verify_local_cpu_features(arm64_early_features))
+		cpu_panic_kernel();
 }

 static void
@@ -1133,26 +1167,6 @@  static void check_early_cpu_features(void)
 		}
 }

-static void
-verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
-{
-	for (; caps->matches; caps++) {
-		if (!cpus_have_cap(caps->capability))
-			continue;
-		/*
-		 * If the new CPU misses an advertised feature, we cannot proceed
-		 * further, park the cpu.
-		 */
-		if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
-			pr_crit("CPU%d: missing feature: %s\n",
-					smp_processor_id(), caps->desc);
-			cpu_die_early();
-		}
-		if (caps->enable)
-			caps->enable(NULL);
-	}
-}
-
 static void verify_sve_features(void)
 {
 	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
@@ -1181,7 +1195,10 @@  static void verify_sve_features(void)
 static void verify_local_cpu_capabilities(void)
 {
 	verify_local_cpu_errata_workarounds();
-	verify_local_cpu_features(arm64_features);
+
+	if (!verify_local_cpu_features(arm64_features))
+		cpu_die_early();
+
 	verify_local_elf_hwcaps(arm64_elf_hwcaps);

 	if (system_supports_32bit_el0())
@@ -1211,6 +1228,13 @@  void check_local_cpu_capabilities(void)
 		verify_local_cpu_capabilities();
 }

+static void __init setup_early_feature_capabilities(void)
+{
+	update_cpu_capabilities(arm64_early_features,
+				"early detected feature:");
+	enable_cpu_capabilities(arm64_early_features);
+}
+
 static void __init setup_feature_capabilities(void)
 {
 	update_cpu_capabilities(arm64_features, "detected feature:");
@@ -1249,6 +1273,7 @@  static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
 bool this_cpu_has_cap(unsigned int cap)
 {
 	return (__this_cpu_has_cap(arm64_features, cap) ||
+		__this_cpu_has_cap(arm64_early_features, cap) ||
 		__this_cpu_has_cap(arm64_errata, cap));
 }