diff mbox

[v2,2/6] arm64: alternative: Apply alternatives early in boot process

Message ID 1516190084-18978-3-git-send-email-julien.thierry@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Julien Thierry Jan. 17, 2018, 11:54 a.m. UTC
From: Daniel Thompson <daniel.thompson@linaro.org>

Currently alternatives are applied very late in the boot process (and
a long time after we enable scheduling). Some alternative sequences,
such as those that alter the way CPU context is stored, must be applied
much earlier in the boot sequence.

Introduce apply_alternatives_early() to allow some alternatives to be
applied immediately after we detect the CPU features of the boot CPU.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/alternative.h |  1 +
 arch/arm64/kernel/alternative.c      | 39 +++++++++++++++++++++++++++++++++---
 arch/arm64/kernel/smp.c              |  6 ++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

--
1.9.1

Comments

Julien Thierry May 4, 2018, 10:06 a.m. UTC | #1
Hi,

In order to prepare the v3 of this patchset, I'd like people's opinion 
on what this patch does. More below.

On 17/01/18 11:54, Julien Thierry wrote:
> From: Daniel Thompson <daniel.thompson@linaro.org>
> 
> Currently alternatives are applied very late in the boot process (and
> a long time after we enable scheduling). Some alternative sequences,
> such as those that alter the way CPU context is stored, must be applied
> much earlier in the boot sequence.
> 
> Introduce apply_alternatives_early() to allow some alternatives to be
> applied immediately after we detect the CPU features of the boot CPU.
> 
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> ---
>   arch/arm64/include/asm/alternative.h |  1 +
>   arch/arm64/kernel/alternative.c      | 39 +++++++++++++++++++++++++++++++++---
>   arch/arm64/kernel/smp.c              |  6 ++++++
>   3 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
> index 4a85c69..1fc1cdb 100644
> --- a/arch/arm64/include/asm/alternative.h
> +++ b/arch/arm64/include/asm/alternative.h
> @@ -20,6 +20,7 @@ struct alt_instr {
>   	u8  alt_len;		/* size of new instruction(s), <= orig_len */
>   };
> 
> +void __init apply_alternatives_early(void);
>   void __init apply_alternatives_all(void);
>   void apply_alternatives(void *start, size_t length);
> 
> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> index 6dd0a3a3..78051d4 100644
> --- a/arch/arm64/kernel/alternative.c
> +++ b/arch/arm64/kernel/alternative.c
> @@ -28,6 +28,18 @@
>   #include <asm/sections.h>
>   #include <linux/stop_machine.h>
> 
> +/*
> + * early-apply features are detected using only the boot CPU and checked on
> + * secondary CPUs startup, even then,
> + * These early-apply features should only include features where we must
> + * patch the kernel very early in the boot process.
> + *
> + * Note that the cpufeature logic *must* be made aware of early-apply
> + * features to ensure they are reported as enabled without waiting
> + * for other CPUs to boot.
> + */
> +#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
> +

Following the change in the cpufeature infrastructure, 
ARM64_HAS_SYSREG_GIC_CPUIF will have the scope 
ARM64_CPUCAP_SCOPE_BOOT_CPU in order to be checked early in the boot 
process.

Now, regarding the early application of alternative, I am wondering 
whether we can apply all the alternatives associated with SCOPE_BOOT 
features that *do not* have a cpu_enable callback.

Otherwise we can keep the macro to list individually each feature that 
is patchable at boot time as the current patch does (or put this info in 
a flag within the arm64_cpu_capabilities structure).

Any thoughts or preferences on this?

Thanks,

>   #define __ALT_PTR(a,f)		((void *)&(a)->f + (a)->f)
>   #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
>   #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
> @@ -105,7 +117,8 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
>   	return insn;
>   }
> 
> -static void __apply_alternatives(void *alt_region, bool use_linear_alias)
> +static void __apply_alternatives(void *alt_region,  bool use_linear_alias,
> +				 unsigned long feature_mask)
>   {
>   	struct alt_instr *alt;
>   	struct alt_region *region = alt_region;
> @@ -115,6 +128,9 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>   		u32 insn;
>   		int i, nr_inst;
> 
> +		if ((BIT(alt->cpufeature) & feature_mask) == 0)
> +			continue;
> +
>   		if (!cpus_have_cap(alt->cpufeature))
>   			continue;
> 
> @@ -138,6 +154,21 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
>   }
> 
>   /*
> + * This is called very early in the boot process (directly after we run
> + * a feature detect on the boot CPU). No need to worry about other CPUs
> + * here.
> + */
> +void apply_alternatives_early(void)
> +{
> +	struct alt_region region = {
> +		.begin	= (struct alt_instr *)__alt_instructions,
> +		.end	= (struct alt_instr *)__alt_instructions_end,
> +	};
> +
> +	__apply_alternatives(&region, true, EARLY_APPLY_FEATURE_MASK);
> +}
> +
> +/*
>    * We might be patching the stop_machine state machine, so implement a
>    * really simple polling protocol here.
>    */
> @@ -156,7 +187,9 @@ static int __apply_alternatives_multi_stop(void *unused)
>   		isb();
>   	} else {
>   		BUG_ON(patched);
> -		__apply_alternatives(&region, true);
> +
> +		__apply_alternatives(&region, true, ~EARLY_APPLY_FEATURE_MASK);
> +
>   		/* Barriers provided by the cache flushing */
>   		WRITE_ONCE(patched, 1);
>   	}
> @@ -177,5 +210,5 @@ void apply_alternatives(void *start, size_t length)
>   		.end	= start + length,
>   	};
> 
> -	__apply_alternatives(&region, false);
> +	__apply_alternatives(&region, false, -1);
>   }
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 551eb07..37361b5 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -453,6 +453,12 @@ void __init smp_prepare_boot_cpu(void)
>   	 * cpuinfo_store_boot_cpu() above.
>   	 */
>   	update_cpu_errata_workarounds();
> +	/*
> +	 * We now know enough about the boot CPU to apply the
> +	 * alternatives that cannot wait until interrupt handling
> +	 * and/or scheduling is enabled.
> +	 */
> +	apply_alternatives_early();
>   }
> 
>   static u64 __init of_get_cpu_mpidr(struct device_node *dn)
> --
> 1.9.1
>
Daniel Thompson May 9, 2018, 2:27 p.m. UTC | #2
On Fri, May 04, 2018 at 11:06:56AM +0100, Julien Thierry wrote:
> Hi,
> 
> In order to prepare the v3 of this patchset, I'd like people's opinion on
> what this patch does. More below.
> 
> On 17/01/18 11:54, Julien Thierry wrote:
> > From: Daniel Thompson <daniel.thompson@linaro.org>
> > 
> > Currently alternatives are applied very late in the boot process (and
> > a long time after we enable scheduling). Some alternative sequences,
> > such as those that alter the way CPU context is stored, must be applied
> > much earlier in the boot sequence.
> > 
> > Introduce apply_alternatives_early() to allow some alternatives to be
> > applied immediately after we detect the CPU features of the boot CPU.
> > 
> > Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> > Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will.deacon@arm.com>
> > ---
> >   arch/arm64/include/asm/alternative.h |  1 +
> >   arch/arm64/kernel/alternative.c      | 39 +++++++++++++++++++++++++++++++++---
> >   arch/arm64/kernel/smp.c              |  6 ++++++
> >   3 files changed, 43 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
> > index 4a85c69..1fc1cdb 100644
> > --- a/arch/arm64/include/asm/alternative.h
> > +++ b/arch/arm64/include/asm/alternative.h
> > @@ -20,6 +20,7 @@ struct alt_instr {
> >   	u8  alt_len;		/* size of new instruction(s), <= orig_len */
> >   };
> > 
> > +void __init apply_alternatives_early(void);
> >   void __init apply_alternatives_all(void);
> >   void apply_alternatives(void *start, size_t length);
> > 
> > diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
> > index 6dd0a3a3..78051d4 100644
> > --- a/arch/arm64/kernel/alternative.c
> > +++ b/arch/arm64/kernel/alternative.c
> > @@ -28,6 +28,18 @@
> >   #include <asm/sections.h>
> >   #include <linux/stop_machine.h>
> > 
> > +/*
> > + * early-apply features are detected using only the boot CPU and checked on
> > + * secondary CPUs startup, even then,
> > + * These early-apply features should only include features where we must
> > + * patch the kernel very early in the boot process.
> > + *
> > + * Note that the cpufeature logic *must* be made aware of early-apply
> > + * features to ensure they are reported as enabled without waiting
> > + * for other CPUs to boot.
> > + */
> > +#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
> > +
> 
> Following the change in the cpufeature infrastructure,
> ARM64_HAS_SYSREG_GIC_CPUIF will have the scope ARM64_CPUCAP_SCOPE_BOOT_CPU
> in order to be checked early in the boot process.
> 
> Now, regarding the early application of alternative, I am wondering whether
> we can apply all the alternatives associated with SCOPE_BOOT features that
> *do not* have a cpu_enable callback.
> 
> Otherwise we can keep the macro to list individually each feature that is
> patchable at boot time as the current patch does (or put this info in a flag
> within the arm64_cpu_capabilities structure).
> 
> Any thoughts or preferences on this?

If I understand ARM64_CPUCAP_SCOPE_BOOT_CPU correctly it certainly seems
safe to apply the alternatives early (it means that a CPU that 
contradicts a CSCOPE_BOOT_CPU won't be allowed to join the system,
right?).

It also makes the system to apply errata fixes more powerful: maybe a
future errata must be applied before we commence threading.

This I have a preference for striping this out and relying on
SCOPE_BOOT_CPU instead. It's a weak preference though since I haven't
studied exactly what errate fixes this will bring into the scope of
early boot.

I don't think you'll regret changing it. This patch has always been
a *total* PITA to rebase so aligning it better with upstream will make
it easier to nurse the patch set until the if-and-when point it hits
the upstream.


Daniel.




> Thanks,
> 
> >   #define __ALT_PTR(a,f)		((void *)&(a)->f + (a)->f)
> >   #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
> >   #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
> > @@ -105,7 +117,8 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
> >   	return insn;
> >   }
> > 
> > -static void __apply_alternatives(void *alt_region, bool use_linear_alias)
> > +static void __apply_alternatives(void *alt_region,  bool use_linear_alias,
> > +				 unsigned long feature_mask)
> >   {
> >   	struct alt_instr *alt;
> >   	struct alt_region *region = alt_region;
> > @@ -115,6 +128,9 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
> >   		u32 insn;
> >   		int i, nr_inst;
> > 
> > +		if ((BIT(alt->cpufeature) & feature_mask) == 0)
> > +			continue;
> > +
> >   		if (!cpus_have_cap(alt->cpufeature))
> >   			continue;
> > 
> > @@ -138,6 +154,21 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
> >   }
> > 
> >   /*
> > + * This is called very early in the boot process (directly after we run
> > + * a feature detect on the boot CPU). No need to worry about other CPUs
> > + * here.
> > + */
> > +void apply_alternatives_early(void)
> > +{
> > +	struct alt_region region = {
> > +		.begin	= (struct alt_instr *)__alt_instructions,
> > +		.end	= (struct alt_instr *)__alt_instructions_end,
> > +	};
> > +
> > +	__apply_alternatives(&region, true, EARLY_APPLY_FEATURE_MASK);
> > +}
> > +
> > +/*
> >    * We might be patching the stop_machine state machine, so implement a
> >    * really simple polling protocol here.
> >    */
> > @@ -156,7 +187,9 @@ static int __apply_alternatives_multi_stop(void *unused)
> >   		isb();
> >   	} else {
> >   		BUG_ON(patched);
> > -		__apply_alternatives(&region, true);
> > +
> > +		__apply_alternatives(&region, true, ~EARLY_APPLY_FEATURE_MASK);
> > +
> >   		/* Barriers provided by the cache flushing */
> >   		WRITE_ONCE(patched, 1);
> >   	}
> > @@ -177,5 +210,5 @@ void apply_alternatives(void *start, size_t length)
> >   		.end	= start + length,
> >   	};
> > 
> > -	__apply_alternatives(&region, false);
> > +	__apply_alternatives(&region, false, -1);
> >   }
> > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> > index 551eb07..37361b5 100644
> > --- a/arch/arm64/kernel/smp.c
> > +++ b/arch/arm64/kernel/smp.c
> > @@ -453,6 +453,12 @@ void __init smp_prepare_boot_cpu(void)
> >   	 * cpuinfo_store_boot_cpu() above.
> >   	 */
> >   	update_cpu_errata_workarounds();
> > +	/*
> > +	 * We now know enough about the boot CPU to apply the
> > +	 * alternatives that cannot wait until interrupt handling
> > +	 * and/or scheduling is enabled.
> > +	 */
> > +	apply_alternatives_early();
> >   }
> > 
> >   static u64 __init of_get_cpu_mpidr(struct device_node *dn)
> > --
> > 1.9.1
> > 
> 
> -- 
> Julien Thierry
Suzuki K Poulose May 9, 2018, 9:52 p.m. UTC | #3
On 05/04/2018 11:06 AM, Julien Thierry wrote:
> Hi,
> 
> In order to prepare the v3 of this patchset, I'd like people's opinion 
> on what this patch does. More below.
> 
> On 17/01/18 11:54, Julien Thierry wrote:
>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>
>> Currently alternatives are applied very late in the boot process (and
>> a long time after we enable scheduling). Some alternative sequences,
>> such as those that alter the way CPU context is stored, must be applied
>> much earlier in the boot sequence.

>> +/*
>> + * early-apply features are detected using only the boot CPU and 
>> checked on
>> + * secondary CPUs startup, even then,
>> + * These early-apply features should only include features where we must
>> + * patch the kernel very early in the boot process.
>> + *
>> + * Note that the cpufeature logic *must* be made aware of early-apply
>> + * features to ensure they are reported as enabled without waiting
>> + * for other CPUs to boot.
>> + */
>> +#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
>> +
> 
> Following the change in the cpufeature infrastructure, 
> ARM64_HAS_SYSREG_GIC_CPUIF will have the scope 
> ARM64_CPUCAP_SCOPE_BOOT_CPU in order to be checked early in the boot 
> process.

Thats correct.

> 
> Now, regarding the early application of alternative, I am wondering 
> whether we can apply all the alternatives associated with SCOPE_BOOT 
> features that *do not* have a cpu_enable callback.
> 

I don't understand why would you skip the ones that have a "cpu_enable" 
callback. Could you explain this a bit ? Ideally you should be able to
apply the alternatives for features with the SCOPE_BOOT, provided the
cpu_enable() callback is written properly.


> Otherwise we can keep the macro to list individually each feature that 
> is patchable at boot time as the current patch does (or put this info in 
> a flag within the arm64_cpu_capabilities structure)

You may be able to build up the mask of *available* capabilities with 
SCOPE_BOOT at boot time by playing some trick in the 
setup_boot_cpu_capabilities(), rather than embedding it in the 
capabilities (and then parsing the entire table(s)) or manually keeping
track of the capabilities by having a separate mask.

Suzuki

> 
> Any thoughts or preferences on this?
> 
> Thanks,
> 
>>   #define __ALT_PTR(a,f)        ((void *)&(a)->f + (a)->f)
>>   #define ALT_ORIG_PTR(a)        __ALT_PTR(a, orig_offset)
>>   #define ALT_REPL_PTR(a)        __ALT_PTR(a, alt_offset)
>> @@ -105,7 +117,8 @@ static u32 get_alt_insn(struct alt_instr *alt, 
>> __le32 *insnptr, __le32 *altinsnp
>>       return insn;
>>   }
>>
>> -static void __apply_alternatives(void *alt_region, bool 
>> use_linear_alias)
>> +static void __apply_alternatives(void *alt_region,  bool 
>> use_linear_alias,
>> +                 unsigned long feature_mask)
>>   {
>>       struct alt_instr *alt;
>>       struct alt_region *region = alt_region;
>> @@ -115,6 +128,9 @@ static void __apply_alternatives(void *alt_region, 
>> bool use_linear_alias)
>>           u32 insn;
>>           int i, nr_inst;
>>
>> +        if ((BIT(alt->cpufeature) & feature_mask) == 0)
>> +            continue;
>> +
>>           if (!cpus_have_cap(alt->cpufeature))
>>               continue;
>>
>> @@ -138,6 +154,21 @@ static void __apply_alternatives(void 
>> *alt_region, bool use_linear_alias)
>>   }
>>
>>   /*
>> + * This is called very early in the boot process (directly after we run
>> + * a feature detect on the boot CPU). No need to worry about other CPUs
>> + * here.
>> + */
>> +void apply_alternatives_early(void)
>> +{
>> +    struct alt_region region = {
>> +        .begin    = (struct alt_instr *)__alt_instructions,
>> +        .end    = (struct alt_instr *)__alt_instructions_end,
>> +    };
>> +
>> +    __apply_alternatives(&region, true, EARLY_APPLY_FEATURE_MASK);
>> +}
>> +
>> +/*
>>    * We might be patching the stop_machine state machine, so implement a
>>    * really simple polling protocol here.
>>    */
>> @@ -156,7 +187,9 @@ static int __apply_alternatives_multi_stop(void 
>> *unused)
>>           isb();
>>       } else {
>>           BUG_ON(patched);
>> -        __apply_alternatives(&region, true);
>> +
>> +        __apply_alternatives(&region, true, ~EARLY_APPLY_FEATURE_MASK);
>> +
>>           /* Barriers provided by the cache flushing */
>>           WRITE_ONCE(patched, 1);
>>       }
>> @@ -177,5 +210,5 @@ void apply_alternatives(void *start, size_t length)
>>           .end    = start + length,
>>       };
>>
>> -    __apply_alternatives(&region, false);
>> +    __apply_alternatives(&region, false, -1);
>>   }
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index 551eb07..37361b5 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -453,6 +453,12 @@ void __init smp_prepare_boot_cpu(void)
>>        * cpuinfo_store_boot_cpu() above.
>>        */
>>       update_cpu_errata_workarounds();
>> +    /*
>> +     * We now know enough about the boot CPU to apply the
>> +     * alternatives that cannot wait until interrupt handling
>> +     * and/or scheduling is enabled.
>> +     */
>> +    apply_alternatives_early();
>>   }
>>
>>   static u64 __init of_get_cpu_mpidr(struct device_node *dn)
>> -- 
>> 1.9.1
>>
>
Julien Thierry May 11, 2018, 8:12 a.m. UTC | #4
On 09/05/18 22:52, Suzuki K Poulose wrote:
> On 05/04/2018 11:06 AM, Julien Thierry wrote:
>> Hi,
>>
>> In order to prepare the v3 of this patchset, I'd like people's opinion 
>> on what this patch does. More below.
>>
>> On 17/01/18 11:54, Julien Thierry wrote:
>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>
>>> Currently alternatives are applied very late in the boot process (and
>>> a long time after we enable scheduling). Some alternative sequences,
>>> such as those that alter the way CPU context is stored, must be applied
>>> much earlier in the boot sequence.
> 
>>> +/*
>>> + * early-apply features are detected using only the boot CPU and 
>>> checked on
>>> + * secondary CPUs startup, even then,
>>> + * These early-apply features should only include features where we 
>>> must
>>> + * patch the kernel very early in the boot process.
>>> + *
>>> + * Note that the cpufeature logic *must* be made aware of early-apply
>>> + * features to ensure they are reported as enabled without waiting
>>> + * for other CPUs to boot.
>>> + */
>>> +#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
>>> +
>>
>> Following the change in the cpufeature infrastructure, 
>> ARM64_HAS_SYSREG_GIC_CPUIF will have the scope 
>> ARM64_CPUCAP_SCOPE_BOOT_CPU in order to be checked early in the boot 
>> process.
> 
> Thats correct.
> 
>>
>> Now, regarding the early application of alternative, I am wondering 
>> whether we can apply all the alternatives associated with SCOPE_BOOT 
>> features that *do not* have a cpu_enable callback.
>>
> 
> I don't understand why would you skip the ones that have a "cpu_enable" 
> callback. Could you explain this a bit ? Ideally you should be able to
> apply the alternatives for features with the SCOPE_BOOT, provided the
> cpu_enable() callback is written properly.
> 

In my mind the "cpu_enable" callback is the setup a cpu should perform 
before using the feature (i.e. the code getting patched in by the 
alternative). So I was worried about the code getting patched by the 
boot cpu and then have the secondary cpus ending up executing patched 
code before the cpu_enable for the corresponding feature gets called.
Or is there a requirement for secondary cpu startup code to be free of 
alternative code?

> 
>> Otherwise we can keep the macro to list individually each feature that 
>> is patchable at boot time as the current patch does (or put this info 
>> in a flag within the arm64_cpu_capabilities structure)
> 
> You may be able to build up the mask of *available* capabilities with 
> SCOPE_BOOT at boot time by playing some trick in the 
> setup_boot_cpu_capabilities(), rather than embedding it in the 
> capabilities (and then parsing the entire table(s)) or manually keeping
> track of the capabilities by having a separate mask.
> 

Yes, I like that idea.

Thanks,

> Suzuki
> 
>>
>> Any thoughts or preferences on this?
>>
>> Thanks,
>>
>>>   #define __ALT_PTR(a,f)        ((void *)&(a)->f + (a)->f)
>>>   #define ALT_ORIG_PTR(a)        __ALT_PTR(a, orig_offset)
>>>   #define ALT_REPL_PTR(a)        __ALT_PTR(a, alt_offset)
>>> @@ -105,7 +117,8 @@ static u32 get_alt_insn(struct alt_instr *alt, 
>>> __le32 *insnptr, __le32 *altinsnp
>>>       return insn;
>>>   }
>>>
>>> -static void __apply_alternatives(void *alt_region, bool 
>>> use_linear_alias)
>>> +static void __apply_alternatives(void *alt_region,  bool 
>>> use_linear_alias,
>>> +                 unsigned long feature_mask)
>>>   {
>>>       struct alt_instr *alt;
>>>       struct alt_region *region = alt_region;
>>> @@ -115,6 +128,9 @@ static void __apply_alternatives(void 
>>> *alt_region, bool use_linear_alias)
>>>           u32 insn;
>>>           int i, nr_inst;
>>>
>>> +        if ((BIT(alt->cpufeature) & feature_mask) == 0)
>>> +            continue;
>>> +
>>>           if (!cpus_have_cap(alt->cpufeature))
>>>               continue;
>>>
>>> @@ -138,6 +154,21 @@ static void __apply_alternatives(void 
>>> *alt_region, bool use_linear_alias)
>>>   }
>>>
>>>   /*
>>> + * This is called very early in the boot process (directly after we run
>>> + * a feature detect on the boot CPU). No need to worry about other CPUs
>>> + * here.
>>> + */
>>> +void apply_alternatives_early(void)
>>> +{
>>> +    struct alt_region region = {
>>> +        .begin    = (struct alt_instr *)__alt_instructions,
>>> +        .end    = (struct alt_instr *)__alt_instructions_end,
>>> +    };
>>> +
>>> +    __apply_alternatives(&region, true, EARLY_APPLY_FEATURE_MASK);
>>> +}
>>> +
>>> +/*
>>>    * We might be patching the stop_machine state machine, so implement a
>>>    * really simple polling protocol here.
>>>    */
>>> @@ -156,7 +187,9 @@ static int __apply_alternatives_multi_stop(void 
>>> *unused)
>>>           isb();
>>>       } else {
>>>           BUG_ON(patched);
>>> -        __apply_alternatives(&region, true);
>>> +
>>> +        __apply_alternatives(&region, true, ~EARLY_APPLY_FEATURE_MASK);
>>> +
>>>           /* Barriers provided by the cache flushing */
>>>           WRITE_ONCE(patched, 1);
>>>       }
>>> @@ -177,5 +210,5 @@ void apply_alternatives(void *start, size_t length)
>>>           .end    = start + length,
>>>       };
>>>
>>> -    __apply_alternatives(&region, false);
>>> +    __apply_alternatives(&region, false, -1);
>>>   }
>>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>>> index 551eb07..37361b5 100644
>>> --- a/arch/arm64/kernel/smp.c
>>> +++ b/arch/arm64/kernel/smp.c
>>> @@ -453,6 +453,12 @@ void __init smp_prepare_boot_cpu(void)
>>>        * cpuinfo_store_boot_cpu() above.
>>>        */
>>>       update_cpu_errata_workarounds();
>>> +    /*
>>> +     * We now know enough about the boot CPU to apply the
>>> +     * alternatives that cannot wait until interrupt handling
>>> +     * and/or scheduling is enabled.
>>> +     */
>>> +    apply_alternatives_early();
>>>   }
>>>
>>>   static u64 __init of_get_cpu_mpidr(struct device_node *dn)
>>> -- 
>>> 1.9.1
>>>
>>
>
Suzuki K Poulose May 11, 2018, 9:19 a.m. UTC | #5
On 11/05/18 09:12, Julien Thierry wrote:
> 
> 
> On 09/05/18 22:52, Suzuki K Poulose wrote:
>> On 05/04/2018 11:06 AM, Julien Thierry wrote:
>>> Hi,
>>>
>>> In order to prepare the v3 of this patchset, I'd like people's opinion on what this patch does. More below.
>>>
>>> On 17/01/18 11:54, Julien Thierry wrote:
>>>> From: Daniel Thompson <daniel.thompson@linaro.org>
>>>>
>>>> Currently alternatives are applied very late in the boot process (and
>>>> a long time after we enable scheduling). Some alternative sequences,
>>>> such as those that alter the way CPU context is stored, must be applied
>>>> much earlier in the boot sequence.
>>
>>>> +/*
>>>> + * early-apply features are detected using only the boot CPU and checked on
>>>> + * secondary CPUs startup, even then,
>>>> + * These early-apply features should only include features where we must
>>>> + * patch the kernel very early in the boot process.
>>>> + *
>>>> + * Note that the cpufeature logic *must* be made aware of early-apply
>>>> + * features to ensure they are reported as enabled without waiting
>>>> + * for other CPUs to boot.
>>>> + */
>>>> +#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
>>>> +
>>>
>>> Following the change in the cpufeature infrastructure, ARM64_HAS_SYSREG_GIC_CPUIF will have the scope ARM64_CPUCAP_SCOPE_BOOT_CPU in order to be checked early in the boot process.
>>
>> Thats correct.
>>
>>>
>>> Now, regarding the early application of alternative, I am wondering whether we can apply all the alternatives associated with SCOPE_BOOT features that *do not* have a cpu_enable callback.
>>>
>>
>> I don't understand why would you skip the ones that have a "cpu_enable" callback. Could you explain this a bit ? Ideally you should be able to
>> apply the alternatives for features with the SCOPE_BOOT, provided the
>> cpu_enable() callback is written properly.
>>
> 
> In my mind the "cpu_enable" callback is the setup a cpu should perform before using the feature (i.e. the code getting patched in by the alternative). So I was worried about the code getting patched by the boot cpu and then have the secondary cpus ending up executing patched code before the cpu_enable for the corresponding feature gets called.
> Or is there a requirement for secondary cpu startup code to be free of alternative code?

There are no imposed restrictions. It is upto the capability to decide
what can be done in cpu_enable() and what can be patched. So, if you
make sure the patched code can be safely executed by secondary it is
fine. May be you could even patch in some code in the early boot up, to
do what you do in "cpu_enable()" for the secondary to safely execute
the patched code.

Anyway, if the secondary CPUs don't have the feature you are going to
panic the system. So I don't think there is a big difference in the
outcome if there is a mismatch, except for a clean message about the
conflict.


Cheers
Suzuki
diff mbox

Patch

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4a85c69..1fc1cdb 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -20,6 +20,7 @@  struct alt_instr {
 	u8  alt_len;		/* size of new instruction(s), <= orig_len */
 };

+void __init apply_alternatives_early(void);
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);

diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 6dd0a3a3..78051d4 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -28,6 +28,18 @@ 
 #include <asm/sections.h>
 #include <linux/stop_machine.h>

+/*
+ * early-apply features are detected using only the boot CPU and checked on
+ * secondary CPUs startup, even then,
+ * These early-apply features should only include features where we must
+ * patch the kernel very early in the boot process.
+ *
+ * Note that the cpufeature logic *must* be made aware of early-apply
+ * features to ensure they are reported as enabled without waiting
+ * for other CPUs to boot.
+ */
+#define EARLY_APPLY_FEATURE_MASK BIT(ARM64_HAS_SYSREG_GIC_CPUIF)
+
 #define __ALT_PTR(a,f)		((void *)&(a)->f + (a)->f)
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
@@ -105,7 +117,8 @@  static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
 	return insn;
 }

-static void __apply_alternatives(void *alt_region, bool use_linear_alias)
+static void __apply_alternatives(void *alt_region,  bool use_linear_alias,
+				 unsigned long feature_mask)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
@@ -115,6 +128,9 @@  static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 		u32 insn;
 		int i, nr_inst;

+		if ((BIT(alt->cpufeature) & feature_mask) == 0)
+			continue;
+
 		if (!cpus_have_cap(alt->cpufeature))
 			continue;

@@ -138,6 +154,21 @@  static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 }

 /*
+ * This is called very early in the boot process (directly after we run
+ * a feature detect on the boot CPU). No need to worry about other CPUs
+ * here.
+ */
+void apply_alternatives_early(void)
+{
+	struct alt_region region = {
+		.begin	= (struct alt_instr *)__alt_instructions,
+		.end	= (struct alt_instr *)__alt_instructions_end,
+	};
+
+	__apply_alternatives(&region, true, EARLY_APPLY_FEATURE_MASK);
+}
+
+/*
  * We might be patching the stop_machine state machine, so implement a
  * really simple polling protocol here.
  */
@@ -156,7 +187,9 @@  static int __apply_alternatives_multi_stop(void *unused)
 		isb();
 	} else {
 		BUG_ON(patched);
-		__apply_alternatives(&region, true);
+
+		__apply_alternatives(&region, true, ~EARLY_APPLY_FEATURE_MASK);
+
 		/* Barriers provided by the cache flushing */
 		WRITE_ONCE(patched, 1);
 	}
@@ -177,5 +210,5 @@  void apply_alternatives(void *start, size_t length)
 		.end	= start + length,
 	};

-	__apply_alternatives(&region, false);
+	__apply_alternatives(&region, false, -1);
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 551eb07..37361b5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -453,6 +453,12 @@  void __init smp_prepare_boot_cpu(void)
 	 * cpuinfo_store_boot_cpu() above.
 	 */
 	update_cpu_errata_workarounds();
+	/*
+	 * We now know enough about the boot CPU to apply the
+	 * alternatives that cannot wait until interrupt handling
+	 * and/or scheduling is enabled.
+	 */
+	apply_alternatives_early();
 }

 static u64 __init of_get_cpu_mpidr(struct device_node *dn)