Message ID | 20240729142421.137283-4-stewart.hildebrand@amd.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Stack checking on Arm | expand |
On 29.07.2024 16:24, Stewart Hildebrand wrote: > RFC: Should we put this under its own config option instead of reusing > CONFIG_DEBUG? I'd say yes, with it merely defaulting to DEBUG. And then have this in generic code rather than ... > --- a/xen/arch/arm/arch.mk > +++ b/xen/arch/arm/arch.mk > @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access > CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic > CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc > $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) > +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions ... here, with arches simply indicating whether the Kconfig is usable (i.e. arch code had been suitably enabled). Jan
Hi Stewart, On 29/07/2024 15:24, Stewart Hildebrand wrote: > Use the -finstrument-functions option to check that the stack pointer is Is the feature supported by both clang and GCC? If so, which from versions? From README, this is what we currently support. - For ARM 32-bit: - GCC 4.9 or later - GNU Binutils 2.24 or later - For ARM 64-bit: - GCC 5.1 or later - GNU Binutils 2.24 or later We don't mention Clang, but I would expect a clang from 4-5 years should still be Arm (not cross-compile as we never fully added the support). > valid upon each function entry. Only enable it for debug builds due to > the overhead introduced. How much overhead is it? We use debug builds during the dev cycle, so we need to make sure this is not noticeable. In any case, it would be better if this new option is under its own kconfig options. We can separately decide whether it is on or off by default when CONFIG_DEBUG=y. > > Use per-cpu variables to store the stack base and nesting level. The > nesting level is used to avoid invoking __cyg_profile_func_enter > recursively. > > A check is added for whether per-cpu data has been initialized. Since > TPIDR_EL2 resets to an unknown value, initialize it to an invalid value. > > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> > --- > RFC: I only tested this on arm64. I still need to test with arm32. > > RFC: Should we put this under its own config option instead of reusing > CONFIG_DEBUG? See above. > > RFC: Is there a better value for INVALID_PER_CPU_OFFSET? We can't use 0 > because 0 is a valid value for get_per_cpu_offset(). I would consider to use ~0 because this is very unlikely to be used as an offset (at least on arm64). > --- > xen/arch/arm/arch.mk | 1 + > xen/arch/arm/arm64/head.S | 4 +++ > xen/arch/arm/domain.c | 3 +++ > xen/arch/arm/include/asm/page.h | 2 ++ > xen/arch/arm/include/asm/traps.h | 8 ++++++ > xen/arch/arm/setup.c | 4 +++ > xen/arch/arm/smpboot.c | 3 +++ > xen/arch/arm/traps.c | 45 ++++++++++++++++++++++++++++++++ > 8 files changed, 70 insertions(+) > > diff --git a/xen/arch/arm/arch.mk b/xen/arch/arm/arch.mk > index 022dcda19224..c39cb65d183d 100644 > --- a/xen/arch/arm/arch.mk > +++ b/xen/arch/arm/arch.mk > @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access > CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic > CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc > $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) > +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions > > ifneq ($(filter command line environment,$(origin CONFIG_EARLY_PRINTK)),) > $(error You must use 'make menuconfig' to enable/disable early printk now) > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S > index 2fa07dc3a04b..4a332b9cbc7c 100644 > --- a/xen/arch/arm/arm64/head.S > +++ b/xen/arch/arm/arm64/head.S > @@ -399,6 +399,10 @@ FUNC_LOCAL(cpu_init) > * than SP_EL0. > */ > msr spsel, #1 > + I would add a comment explaining what we are doing. > + ldr x0, =INVALID_PER_CPU_OFFSET > + msr tpidr_el2, x0 > + > ret > END(cpu_init) > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 7cfcefd27944..93ebe6e5f8af 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -26,6 +26,7 @@ > #include <asm/procinfo.h> > #include <asm/regs.h> > #include <asm/tee/tee.h> > +#include <asm/traps.h> > #include <asm/vfp.h> > #include <asm/vgic.h> > #include <asm/vtimer.h> > @@ -328,6 +329,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) > > set_current(next); > > + stack_set(next->arch.stack); > + > prev = __context_switch(prev, next); > > schedule_tail(prev); > diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h > index 69f817d1e68a..6b5aaf1eb3ff 100644 > --- a/xen/arch/arm/include/asm/page.h > +++ b/xen/arch/arm/include/asm/page.h > @@ -7,6 +7,8 @@ > #include <asm/lpae.h> > #include <asm/sysregs.h> > > +#define INVALID_PER_CPU_OFFSET _AC(0x1, UL) > + > /* Shareability values for the LPAE entries */ > #define LPAE_SH_NON_SHAREABLE 0x0 > #define LPAE_SH_UNPREDICTALE 0x1 > diff --git a/xen/arch/arm/include/asm/traps.h b/xen/arch/arm/include/asm/traps.h > index 9a60dbf70e5b..13e6997c2643 100644 > --- a/xen/arch/arm/include/asm/traps.h > +++ b/xen/arch/arm/include/asm/traps.h > @@ -118,6 +118,14 @@ static inline register_t sign_extend(const struct hsr_dabt dabt, register_t r) > > void finalize_instr_emulation(const struct instr_details *instr); > > +#ifdef CONFIG_DEBUG > +void stack_check_init(void); > +void stack_set(unsigned char *base); > +#else > +static inline void stack_check_init(void) { } > +static inline void stack_set(unsigned char *base) { } > +#endif > + > #endif /* __ASM_ARM_TRAPS__ */ > /* > * Local variables: > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c > index 0c2fdaceaf21..07d07feff602 100644 > --- a/xen/arch/arm/setup.c > +++ b/xen/arch/arm/setup.c > @@ -47,6 +47,7 @@ > #include <asm/setup.h> > #include <xsm/xsm.h> > #include <asm/acpi.h> > +#include <asm/traps.h> > > struct bootinfo __initdata bootinfo = BOOTINFO_INIT; > > @@ -733,6 +734,8 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > percpu_init_areas(); > set_processor_id(0); /* needed early, for smp_processor_id() */ > > + stack_check_init(); > + > /* Initialize traps early allow us to get backtrace when an error occurred */ > init_traps(); > > @@ -924,6 +927,7 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > * since the static one we're running on is about to be freed. */ > memcpy(idle_vcpu[0]->arch.cpu_info, get_cpu_info(), > sizeof(struct cpu_info)); > + stack_set(idle_vcpu[0]->arch.stack); > switch_stack_and_jump(idle_vcpu[0]->arch.cpu_info, init_done); > } > > diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c > index 04e363088d60..1c689f2caed7 100644 > --- a/xen/arch/arm/smpboot.c > +++ b/xen/arch/arm/smpboot.c > @@ -30,6 +30,7 @@ > #include <asm/psci.h> > #include <asm/acpi.h> > #include <asm/tee/tee.h> > +#include <asm/traps.h> > > /* Override macros from asm/page.h to make them work with mfn_t */ > #undef virt_to_mfn > @@ -329,6 +330,8 @@ void asmlinkage start_secondary(void) > > set_processor_id(cpuid); > > + stack_check_init(); > + > identify_cpu(¤t_cpu_data); > processor_setup(); > > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c > index aac6c599f878..b4890eec7ec4 100644 > --- a/xen/arch/arm/traps.c > +++ b/xen/arch/arm/traps.c > @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) > arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); > } > > +#ifdef CONFIG_DEBUG Loooking at the code below. Aside the stack pointer part, nothing seems specific to Arm. So maybe this could be moved to common code? > +DEFINE_PER_CPU(unsigned int, stack_check_nesting); > +DEFINE_PER_CPU(unsigned char *, stack_base); I think this could be "const unsigned char *" as the stack should not be modified directly. > + > +void __attribute__((no_instrument_function)) stack_set(unsigned char *base) > +{ > + this_cpu(stack_base) = base; > +} > + > +void __init __attribute__((no_instrument_function)) stack_check_init(void) > +{ > + this_cpu(stack_check_nesting) = 0; > + stack_set(init_data.stack); > +} > + > +__attribute__((no_instrument_function)) > +void __cyg_profile_func_enter(void *this_fn, void *call_site) > +{ > + unsigned char *sp; > + > + if ( get_per_cpu_offset() == INVALID_PER_CPU_OFFSET ) > + return; > + > + asm volatile ("mov %0, sp" : "=r" (sp) ); > + > + if ( sp < this_cpu(stack_base) || > + sp > (this_cpu(stack_base) + STACK_SIZE) ) The top of the stack is used to store struct cpu_info. So you want to substract its size (see arch_vcpu_create()). > + { > + if ( this_cpu(stack_check_nesting) ) > + return; > + > + this_cpu(stack_check_nesting)++; Looking at the use, it only seems to be used as "print/panic once". So maybe use a bool to avoid any overflow. > + printk("CPU %d stack pointer out of bounds (sp %#lx, stack base %#lx)\n", > + smp_processor_id(), (uintptr_t)sp, > + (uintptr_t)this_cpu(stack_base)); > + BUG(); I would consider to call panic(). But is it safe to call any of this if we blew the stack? IOW, should we have a buffer? Cheers,
Hi again, On 29/07/2024 15:24, Stewart Hildebrand wrote: > Use the -finstrument-functions option to check that the stack pointer is > valid upon each function entry. Only enable it for debug builds due to > the overhead introduced. > > Use per-cpu variables to store the stack base and nesting level. The > nesting level is used to avoid invoking __cyg_profile_func_enter > recursively. > > A check is added for whether per-cpu data has been initialized. Since > TPIDR_EL2 resets to an unknown value, initialize it to an invalid value. > > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> > --- > RFC: I only tested this on arm64. I still need to test with arm32. > > RFC: Should we put this under its own config option instead of reusing > CONFIG_DEBUG? > > RFC: Is there a better value for INVALID_PER_CPU_OFFSET? We can't use 0 > because 0 is a valid value for get_per_cpu_offset(). > --- > xen/arch/arm/arch.mk | 1 + > xen/arch/arm/arm64/head.S | 4 +++ > xen/arch/arm/domain.c | 3 +++ > xen/arch/arm/include/asm/page.h | 2 ++ > xen/arch/arm/include/asm/traps.h | 8 ++++++ > xen/arch/arm/setup.c | 4 +++ > xen/arch/arm/smpboot.c | 3 +++ > xen/arch/arm/traps.c | 45 ++++++++++++++++++++++++++++++++ > 8 files changed, 70 insertions(+) > > diff --git a/xen/arch/arm/arch.mk b/xen/arch/arm/arch.mk > index 022dcda19224..c39cb65d183d 100644 > --- a/xen/arch/arm/arch.mk > +++ b/xen/arch/arm/arch.mk > @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access > CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic > CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc > $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) > +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions > > ifneq ($(filter command line environment,$(origin CONFIG_EARLY_PRINTK)),) > $(error You must use 'make menuconfig' to enable/disable early printk now) > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S > index 2fa07dc3a04b..4a332b9cbc7c 100644 > --- a/xen/arch/arm/arm64/head.S > +++ b/xen/arch/arm/arm64/head.S > @@ -399,6 +399,10 @@ FUNC_LOCAL(cpu_init) > * than SP_EL0. > */ > msr spsel, #1 > + > + ldr x0, =INVALID_PER_CPU_OFFSET > + msr tpidr_el2, x0 > + > ret > END(cpu_init) > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 7cfcefd27944..93ebe6e5f8af 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -26,6 +26,7 @@ > #include <asm/procinfo.h> > #include <asm/regs.h> > #include <asm/tee/tee.h> > +#include <asm/traps.h> > #include <asm/vfp.h> > #include <asm/vgic.h> > #include <asm/vtimer.h> > @@ -328,6 +329,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) > > set_current(next); > > + stack_set(next->arch.stack); > + > prev = __context_switch(prev, next); > > schedule_tail(prev); > diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h > index 69f817d1e68a..6b5aaf1eb3ff 100644 > --- a/xen/arch/arm/include/asm/page.h > +++ b/xen/arch/arm/include/asm/page.h > @@ -7,6 +7,8 @@ > #include <asm/lpae.h> > #include <asm/sysregs.h> > > +#define INVALID_PER_CPU_OFFSET _AC(0x1, UL) > + > /* Shareability values for the LPAE entries */ > #define LPAE_SH_NON_SHAREABLE 0x0 > #define LPAE_SH_UNPREDICTALE 0x1 > diff --git a/xen/arch/arm/include/asm/traps.h b/xen/arch/arm/include/asm/traps.h > index 9a60dbf70e5b..13e6997c2643 100644 > --- a/xen/arch/arm/include/asm/traps.h > +++ b/xen/arch/arm/include/asm/traps.h > @@ -118,6 +118,14 @@ static inline register_t sign_extend(const struct hsr_dabt dabt, register_t r) > > void finalize_instr_emulation(const struct instr_details *instr); > > +#ifdef CONFIG_DEBUG > +void stack_check_init(void); > +void stack_set(unsigned char *base); > +#else > +static inline void stack_check_init(void) { } > +static inline void stack_set(unsigned char *base) { } > +#endif > + > #endif /* __ASM_ARM_TRAPS__ */ > /* > * Local variables: > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c > index 0c2fdaceaf21..07d07feff602 100644 > --- a/xen/arch/arm/setup.c > +++ b/xen/arch/arm/setup.c > @@ -47,6 +47,7 @@ > #include <asm/setup.h> > #include <xsm/xsm.h> > #include <asm/acpi.h> > +#include <asm/traps.h> > > struct bootinfo __initdata bootinfo = BOOTINFO_INIT; > > @@ -733,6 +734,8 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > percpu_init_areas(); > set_processor_id(0); /* needed early, for smp_processor_id() */ > > + stack_check_init(); > + > /* Initialize traps early allow us to get backtrace when an error occurred */ > init_traps(); > > @@ -924,6 +927,7 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, > * since the static one we're running on is about to be freed. */ > memcpy(idle_vcpu[0]->arch.cpu_info, get_cpu_info(), > sizeof(struct cpu_info)); > + stack_set(idle_vcpu[0]->arch.stack); > switch_stack_and_jump(idle_vcpu[0]->arch.cpu_info, init_done); > } > > diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c > index 04e363088d60..1c689f2caed7 100644 > --- a/xen/arch/arm/smpboot.c > +++ b/xen/arch/arm/smpboot.c > @@ -30,6 +30,7 @@ > #include <asm/psci.h> > #include <asm/acpi.h> > #include <asm/tee/tee.h> > +#include <asm/traps.h> > > /* Override macros from asm/page.h to make them work with mfn_t */ > #undef virt_to_mfn > @@ -329,6 +330,8 @@ void asmlinkage start_secondary(void) > > set_processor_id(cpuid); > > + stack_check_init(); > + > identify_cpu(¤t_cpu_data); > processor_setup(); > > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c > index aac6c599f878..b4890eec7ec4 100644 > --- a/xen/arch/arm/traps.c > +++ b/xen/arch/arm/traps.c > @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) > arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); > } > > +#ifdef CONFIG_DEBUG > +DEFINE_PER_CPU(unsigned int, stack_check_nesting); > +DEFINE_PER_CPU(unsigned char *, stack_base); While looking at the code, I just realized that this should be equivalent to current->arch.base. So do we actually need stack_base? Cheers,
On 7/29/24 14:50, Julien Grall wrote: > Hi again, > > On 29/07/2024 15:24, Stewart Hildebrand wrote: >> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c >> index aac6c599f878..b4890eec7ec4 100644 >> --- a/xen/arch/arm/traps.c >> +++ b/xen/arch/arm/traps.c >> @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) >> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); >> } >> +#ifdef CONFIG_DEBUG >> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >> +DEFINE_PER_CPU(unsigned char *, stack_base); > > While looking at the code, I just realized that this should be equivalent to current->arch.base. current->arch.stack That's true only after the idle vcpu stacks have been allocated. > So do we actually need stack_base? This is a way to enable stack instrumentation much earlier during boot when we are still using the static boot stack.
Hi, On 29/07/2024 20:40, Stewart Hildebrand wrote: > On 7/29/24 14:50, Julien Grall wrote: >> Hi again, >> >> On 29/07/2024 15:24, Stewart Hildebrand wrote: >>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c >>> index aac6c599f878..b4890eec7ec4 100644 >>> --- a/xen/arch/arm/traps.c >>> +++ b/xen/arch/arm/traps.c >>> @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) >>> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); >>> } >>> +#ifdef CONFIG_DEBUG >>> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >>> +DEFINE_PER_CPU(unsigned char *, stack_base); >> >> While looking at the code, I just realized that this should be equivalent to current->arch.base. > > current->arch.stack > > That's true only after the idle vcpu stacks have been allocated. > >> So do we actually need stack_base? > > This is a way to enable stack instrumentation much earlier during boot > when we are still using the static boot stack. Sure. But we are only partially checking the static boot stack. I am not entirely sure if there is any value for that because at that point the stack would be barely used. Anyway, this is only for debug build so far, so I am ok to keep it. Cheers,
On 29.07.2024 20:36, Julien Grall wrote: > On 29/07/2024 15:24, Stewart Hildebrand wrote: >> Use the -finstrument-functions option to check that the stack pointer is > > Is the feature supported by both clang and GCC? If so, which from versions? > > From README, this is what we currently support. > > - For ARM 32-bit: > - GCC 4.9 or later > - GNU Binutils 2.24 or later > - For ARM 64-bit: > - GCC 5.1 or later > - GNU Binutils 2.24 or later > > We don't mention Clang, but I would expect a clang from 4-5 years should > still be Arm (not cross-compile as we never fully added the support). I was wondering the same, but already on patch 1 and with other architectures also in mind, as there's common (XSM) code gaining respective attributes. For gcc I checked that even 4.1 supports that attribute, so I expect we're fine here (and double checking with godbolt, Clang 3.0.0 also supports the option). Jan
On 7/29/24 14:36, Julien Grall wrote: > Hi Stewart, > > On 29/07/2024 15:24, Stewart Hildebrand wrote: >> Use the -finstrument-functions option to check that the stack pointer is > > Is the feature supported by both clang and GCC? Yes, I tested both. > If so, which from versions? gcc since at least 1998, not sure what version specifically. clang since v2.8 (2010) > > From README, this is what we currently support. > > - For ARM 32-bit: > - GCC 4.9 or later > - GNU Binutils 2.24 or later > - For ARM 64-bit: > - GCC 5.1 or later > - GNU Binutils 2.24 or later > > We don't mention Clang, but I would expect a clang from 4-5 years should still be Arm (not cross-compile as we never fully added the support). There is a way cross compile with clang today, but still using gnu linker and such. Here's my clang build rune: make -j $(nproc) \ clang=y \ CC="clang --target=aarch64-none-linux-gnu -march=armv8a+nocrypto" \ CXX="clang++ --target=aarch64-none-linux-gnu -march=armv8a+nocrypto" \ HOSTCC=clang \ HOSTCXX=clang++ \ XEN_TARGET_ARCH=arm64 \ CROSS_COMPILE=aarch64-none-linux-gnu- \ dist-xen > > >> valid upon each function entry. Only enable it for debug builds due to >> the overhead introduced. > > How much overhead is it? We use debug builds during the dev cycle, so we need to make sure this is not noticeable. Xen binary size without instrumentation: 1.3M Xen binary size with instrumentation: 1.9M Subjectively, the Xen boot time on ZCU102 hardware seems comparable. On qemu-system-aarch64, unfortunately, I'm seeing about a 7x slowdown in Xen boot time. It seems page allocation operations are particularly affected. > > In any case, it would be better if this new option is under its own kconfig options. We can separately decide whether it is on or off by default when CONFIG_DEBUG=y. Will do. Given the significant overhead when running on qemu, I think I must reluctantly suggest that it be off by default. For CI runs with real hardware, we could turn it on. > >> >> Use per-cpu variables to store the stack base and nesting level. The >> nesting level is used to avoid invoking __cyg_profile_func_enter >> recursively. >> >> A check is added for whether per-cpu data has been initialized. Since >> TPIDR_EL2 resets to an unknown value, initialize it to an invalid value. >> >> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> >> --- >> RFC: I only tested this on arm64. I still need to test with arm32. >> >> RFC: Should we put this under its own config option instead of reusing >> CONFIG_DEBUG? > > See above. > >> >> RFC: Is there a better value for INVALID_PER_CPU_OFFSET? We can't use 0 >> because 0 is a valid value for get_per_cpu_offset(). > > I would consider to use ~0 because this is very unlikely to be used as an offset (at least on arm64). Yep, ~0 works, will do. > >> --- >> xen/arch/arm/arch.mk | 1 + >> xen/arch/arm/arm64/head.S | 4 +++ >> xen/arch/arm/domain.c | 3 +++ >> xen/arch/arm/include/asm/page.h | 2 ++ >> xen/arch/arm/include/asm/traps.h | 8 ++++++ >> xen/arch/arm/setup.c | 4 +++ >> xen/arch/arm/smpboot.c | 3 +++ >> xen/arch/arm/traps.c | 45 ++++++++++++++++++++++++++++++++ >> 8 files changed, 70 insertions(+) >> >> diff --git a/xen/arch/arm/arch.mk b/xen/arch/arm/arch.mk >> index 022dcda19224..c39cb65d183d 100644 >> --- a/xen/arch/arm/arch.mk >> +++ b/xen/arch/arm/arch.mk >> @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access >> CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic >> CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc >> $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) >> +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions >> ifneq ($(filter command line environment,$(origin CONFIG_EARLY_PRINTK)),) >> $(error You must use 'make menuconfig' to enable/disable early printk now) >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S >> index 2fa07dc3a04b..4a332b9cbc7c 100644 >> --- a/xen/arch/arm/arm64/head.S >> +++ b/xen/arch/arm/arm64/head.S >> @@ -399,6 +399,10 @@ FUNC_LOCAL(cpu_init) >> * than SP_EL0. >> */ >> msr spsel, #1 >> + > > I would add a comment explaining what we are doing. Will do. > >> + ldr x0, =INVALID_PER_CPU_OFFSET >> + msr tpidr_el2, x0 >> + >> ret >> END(cpu_init) >> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c >> index 7cfcefd27944..93ebe6e5f8af 100644 >> --- a/xen/arch/arm/domain.c >> +++ b/xen/arch/arm/domain.c >> @@ -26,6 +26,7 @@ >> #include <asm/procinfo.h> >> #include <asm/regs.h> >> #include <asm/tee/tee.h> >> +#include <asm/traps.h> >> #include <asm/vfp.h> >> #include <asm/vgic.h> >> #include <asm/vtimer.h> >> @@ -328,6 +329,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) >> set_current(next); >> + stack_set(next->arch.stack); >> + >> prev = __context_switch(prev, next); >> schedule_tail(prev); >> diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h >> index 69f817d1e68a..6b5aaf1eb3ff 100644 >> --- a/xen/arch/arm/include/asm/page.h >> +++ b/xen/arch/arm/include/asm/page.h >> @@ -7,6 +7,8 @@ >> #include <asm/lpae.h> >> #include <asm/sysregs.h> >> +#define INVALID_PER_CPU_OFFSET _AC(0x1, UL) >> + >> /* Shareability values for the LPAE entries */ >> #define LPAE_SH_NON_SHAREABLE 0x0 >> #define LPAE_SH_UNPREDICTALE 0x1 >> diff --git a/xen/arch/arm/include/asm/traps.h b/xen/arch/arm/include/asm/traps.h >> index 9a60dbf70e5b..13e6997c2643 100644 >> --- a/xen/arch/arm/include/asm/traps.h >> +++ b/xen/arch/arm/include/asm/traps.h >> @@ -118,6 +118,14 @@ static inline register_t sign_extend(const struct hsr_dabt dabt, register_t r) >> void finalize_instr_emulation(const struct instr_details *instr); >> +#ifdef CONFIG_DEBUG >> +void stack_check_init(void); >> +void stack_set(unsigned char *base); >> +#else >> +static inline void stack_check_init(void) { } >> +static inline void stack_set(unsigned char *base) { } >> +#endif >> + >> #endif /* __ASM_ARM_TRAPS__ */ >> /* >> * Local variables: >> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c >> index 0c2fdaceaf21..07d07feff602 100644 >> --- a/xen/arch/arm/setup.c >> +++ b/xen/arch/arm/setup.c >> @@ -47,6 +47,7 @@ >> #include <asm/setup.h> >> #include <xsm/xsm.h> >> #include <asm/acpi.h> >> +#include <asm/traps.h> >> struct bootinfo __initdata bootinfo = BOOTINFO_INIT; >> @@ -733,6 +734,8 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, >> percpu_init_areas(); >> set_processor_id(0); /* needed early, for smp_processor_id() */ >> + stack_check_init(); >> + >> /* Initialize traps early allow us to get backtrace when an error occurred */ >> init_traps(); >> @@ -924,6 +927,7 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, >> * since the static one we're running on is about to be freed. */ >> memcpy(idle_vcpu[0]->arch.cpu_info, get_cpu_info(), >> sizeof(struct cpu_info)); >> + stack_set(idle_vcpu[0]->arch.stack); >> switch_stack_and_jump(idle_vcpu[0]->arch.cpu_info, init_done); >> } >> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c >> index 04e363088d60..1c689f2caed7 100644 >> --- a/xen/arch/arm/smpboot.c >> +++ b/xen/arch/arm/smpboot.c >> @@ -30,6 +30,7 @@ >> #include <asm/psci.h> >> #include <asm/acpi.h> >> #include <asm/tee/tee.h> >> +#include <asm/traps.h> >> /* Override macros from asm/page.h to make them work with mfn_t */ >> #undef virt_to_mfn >> @@ -329,6 +330,8 @@ void asmlinkage start_secondary(void) >> set_processor_id(cpuid); >> + stack_check_init(); >> + >> identify_cpu(¤t_cpu_data); >> processor_setup(); >> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c >> index aac6c599f878..b4890eec7ec4 100644 >> --- a/xen/arch/arm/traps.c >> +++ b/xen/arch/arm/traps.c >> @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) >> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); >> } >> +#ifdef CONFIG_DEBUG > > Loooking at the code below. Aside the stack pointer part, nothing seems specific to Arm. So maybe this could be moved to common code? Yes, I suppose so. > >> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >> +DEFINE_PER_CPU(unsigned char *, stack_base); > > I think this could be "const unsigned char *" as the stack should not be modified directly. Every time there's a vcpu context switch we will have a new stack. > >> + >> +void __attribute__((no_instrument_function)) stack_set(unsigned char *base) >> +{ >> + this_cpu(stack_base) = base; >> +} >> + >> +void __init __attribute__((no_instrument_function)) stack_check_init(void) >> +{ >> + this_cpu(stack_check_nesting) = 0; >> + stack_set(init_data.stack); >> +} >> + >> +__attribute__((no_instrument_function)) >> +void __cyg_profile_func_enter(void *this_fn, void *call_site) >> +{ >> + unsigned char *sp; >> + >> + if ( get_per_cpu_offset() == INVALID_PER_CPU_OFFSET ) >> + return; >> + >> + asm volatile ("mov %0, sp" : "=r" (sp) ); >> + >> + if ( sp < this_cpu(stack_base) || >> + sp > (this_cpu(stack_base) + STACK_SIZE) ) > > The top of the stack is used to store struct cpu_info. So you want to substract its size (see arch_vcpu_create()). Will do. > >> + { >> + if ( this_cpu(stack_check_nesting) ) >> + return; >> + >> + this_cpu(stack_check_nesting)++; > > Looking at the use, it only seems to be used as "print/panic once". So maybe use a bool to avoid any overflow. It will only ever be incremented once. I'll still change it to a bool, this should make it more obvious. > >> + printk("CPU %d stack pointer out of bounds (sp %#lx, stack base %#lx)\n", >> + smp_processor_id(), (uintptr_t)sp, >> + (uintptr_t)this_cpu(stack_base)); >> + BUG(); > > I would consider to call panic(). panic() alone doesn't show the stack trace / call trace. > But is it safe to call any of this if we blew the stack? Nope, it sure isn't! > IOW, should we have a buffer? Yes. After some experimentation, I found that this printk and a WARN (similar to BUG, but resumes execution and allows me to collect these metrics) uses approximately 1632 bytes of stack. Assuming BUG uses a similar amount of stack as WARN, and adding in a comfortable margin for error, I'll add a 4096 byte buffer (i.e. invoke the print/BUG with 4096 bytes remaining on the stack).
On 7/29/24 16:37, Julien Grall wrote: > Hi, > > On 29/07/2024 20:40, Stewart Hildebrand wrote: >> On 7/29/24 14:50, Julien Grall wrote: >>> Hi again, >>> >>> On 29/07/2024 15:24, Stewart Hildebrand wrote: >>>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c >>>> index aac6c599f878..b4890eec7ec4 100644 >>>> --- a/xen/arch/arm/traps.c >>>> +++ b/xen/arch/arm/traps.c >>>> @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) >>>> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); >>>> } >>>> +#ifdef CONFIG_DEBUG >>>> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >>>> +DEFINE_PER_CPU(unsigned char *, stack_base); >>> >>> While looking at the code, I just realized that this should be equivalent to current->arch.base. >> >> current->arch.stack >> >> That's true only after the idle vcpu stacks have been allocated. >> >>> So do we actually need stack_base? >> >> This is a way to enable stack instrumentation much earlier during boot >> when we are still using the static boot stack. > > Sure. But we are only partially checking the static boot stack. The stack checking begins just a few lines into C world, after the percpu_init_areas(), set_processor_id(), and stack_check_init() calls in arch/arm/setup.c:start_xen(). > I am not entirely sure if there is any value for that because at that point the stack would be barely used. The entirety of start_xen() uses the boot stack, and it makes plenty of device tree parsing calls (where there is recursion) and performs domain creation, which hits the stack significantly. arch/arm/dom0less-build.c: In function ‘construct_domU’: arch/arm/dom0less-build.c:742:19: warning: stack usage is 7824 bytes [-Wstack-usage=] 742 | static int __init construct_domU(struct domain *d, | ^~~~~~~~~~~~~~ arch/arm/domain_build.c: In function ‘make_memory_node’: arch/arm/domain_build.c:788:12: warning: stack usage is 4720 bytes [-Wstack-usage=] 788 | int __init make_memory_node(const struct kernel_info *kinfo, int addrcells, | ^~~~~~~~~~~~~~~~ arch/arm/domain_build.c: In function ‘construct_dom0’: arch/arm/domain_build.c:2120:19: warning: stack usage is 7776 bytes [-Wstack-usage=] 2120 | static int __init construct_dom0(struct domain *d) | ^~~~~~~~~~~~~~ > > Anyway, this is only for debug build so far, so I am ok to keep it. > > Cheers, >
Hi, On 30/07/2024 18:50, Stewart Hildebrand wrote: > On 7/29/24 14:36, Julien Grall wrote: >> Hi Stewart, >> >> On 29/07/2024 15:24, Stewart Hildebrand wrote: >>> Use the -finstrument-functions option to check that the stack pointer is >> >> Is the feature supported by both clang and GCC? > > Yes, I tested both. > >> If so, which from versions? > > gcc since at least 1998, not sure what version specifically. > > clang since v2.8 (2010) That's quite old :). Thanks for checking. >> >> In any case, it would be better if this new option is under its own kconfig options. We can separately decide whether it is on or off by default when CONFIG_DEBUG=y. > > Will do. Given the significant overhead when running on qemu, I think I > must reluctantly suggest that it be off by default. For CI runs with > real hardware, we could turn it on. That makes sense. >>> Use per-cpu variables to store the stack base and nesting level. The >>> nesting level is used to avoid invoking __cyg_profile_func_enter >>> recursively. >>> >>> A check is added for whether per-cpu data has been initialized. Since >>> TPIDR_EL2 resets to an unknown value, initialize it to an invalid value. >>> >>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> >>> --- >>> RFC: I only tested this on arm64. I still need to test with arm32. >>> >>> RFC: Should we put this under its own config option instead of reusing >>> CONFIG_DEBUG? >> >> See above. >> >>> >>> RFC: Is there a better value for INVALID_PER_CPU_OFFSET? We can't use 0 >>> because 0 is a valid value for get_per_cpu_offset(). >> >> I would consider to use ~0 because this is very unlikely to be used as an offset (at least on arm64). > > Yep, ~0 works, will do. > >> >>> --- >>> xen/arch/arm/arch.mk | 1 + >>> xen/arch/arm/arm64/head.S | 4 +++ >>> xen/arch/arm/domain.c | 3 +++ >>> xen/arch/arm/include/asm/page.h | 2 ++ >>> xen/arch/arm/include/asm/traps.h | 8 ++++++ >>> xen/arch/arm/setup.c | 4 +++ >>> xen/arch/arm/smpboot.c | 3 +++ >>> xen/arch/arm/traps.c | 45 ++++++++++++++++++++++++++++++++ >>> 8 files changed, 70 insertions(+) >>> >>> diff --git a/xen/arch/arm/arch.mk b/xen/arch/arm/arch.mk >>> index 022dcda19224..c39cb65d183d 100644 >>> --- a/xen/arch/arm/arch.mk >>> +++ b/xen/arch/arm/arch.mk >>> @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access >>> CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic >>> CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc >>> $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) >>> +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions >>> ifneq ($(filter command line environment,$(origin CONFIG_EARLY_PRINTK)),) >>> $(error You must use 'make menuconfig' to enable/disable early printk now) >>> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S >>> index 2fa07dc3a04b..4a332b9cbc7c 100644 >>> --- a/xen/arch/arm/arm64/head.S >>> +++ b/xen/arch/arm/arm64/head.S >>> @@ -399,6 +399,10 @@ FUNC_LOCAL(cpu_init) >>> * than SP_EL0. >>> */ >>> msr spsel, #1 >>> + >> >> I would add a comment explaining what we are doing. > > Will do. > >> >>> + ldr x0, =INVALID_PER_CPU_OFFSET >>> + msr tpidr_el2, x0 >>> + >>> ret >>> END(cpu_init) >>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c >>> index 7cfcefd27944..93ebe6e5f8af 100644 >>> --- a/xen/arch/arm/domain.c >>> +++ b/xen/arch/arm/domain.c >>> @@ -26,6 +26,7 @@ >>> #include <asm/procinfo.h> >>> #include <asm/regs.h> >>> #include <asm/tee/tee.h> >>> +#include <asm/traps.h> >>> #include <asm/vfp.h> >>> #include <asm/vgic.h> >>> #include <asm/vtimer.h> >>> @@ -328,6 +329,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) >>> set_current(next); >>> + stack_set(next->arch.stack); >>> + >>> prev = __context_switch(prev, next); >>> schedule_tail(prev); >>> diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h >>> index 69f817d1e68a..6b5aaf1eb3ff 100644 >>> --- a/xen/arch/arm/include/asm/page.h >>> +++ b/xen/arch/arm/include/asm/page.h >>> @@ -7,6 +7,8 @@ >>> #include <asm/lpae.h> >>> #include <asm/sysregs.h> >>> +#define INVALID_PER_CPU_OFFSET _AC(0x1, UL) >>> + >>> /* Shareability values for the LPAE entries */ >>> #define LPAE_SH_NON_SHAREABLE 0x0 >>> #define LPAE_SH_UNPREDICTALE 0x1 >>> diff --git a/xen/arch/arm/include/asm/traps.h b/xen/arch/arm/include/asm/traps.h >>> index 9a60dbf70e5b..13e6997c2643 100644 >>> --- a/xen/arch/arm/include/asm/traps.h >>> +++ b/xen/arch/arm/include/asm/traps.h >>> @@ -118,6 +118,14 @@ static inline register_t sign_extend(const struct hsr_dabt dabt, register_t r) >>> void finalize_instr_emulation(const struct instr_details *instr); >>> +#ifdef CONFIG_DEBUG >>> +void stack_check_init(void); >>> +void stack_set(unsigned char *base); >>> +#else >>> +static inline void stack_check_init(void) { } >>> +static inline void stack_set(unsigned char *base) { } >>> +#endif >>> + >>> #endif /* __ASM_ARM_TRAPS__ */ >>> /* >>> * Local variables: >>> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c >>> index 0c2fdaceaf21..07d07feff602 100644 >>> --- a/xen/arch/arm/setup.c >>> +++ b/xen/arch/arm/setup.c >>> @@ -47,6 +47,7 @@ >>> #include <asm/setup.h> >>> #include <xsm/xsm.h> >>> #include <asm/acpi.h> >>> +#include <asm/traps.h> >>> struct bootinfo __initdata bootinfo = BOOTINFO_INIT; >>> @@ -733,6 +734,8 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, >>> percpu_init_areas(); >>> set_processor_id(0); /* needed early, for smp_processor_id() */ >>> + stack_check_init(); >>> + >>> /* Initialize traps early allow us to get backtrace when an error occurred */ >>> init_traps(); >>> @@ -924,6 +927,7 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, >>> * since the static one we're running on is about to be freed. */ >>> memcpy(idle_vcpu[0]->arch.cpu_info, get_cpu_info(), >>> sizeof(struct cpu_info)); >>> + stack_set(idle_vcpu[0]->arch.stack); >>> switch_stack_and_jump(idle_vcpu[0]->arch.cpu_info, init_done); >>> } >>> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c >>> index 04e363088d60..1c689f2caed7 100644 >>> --- a/xen/arch/arm/smpboot.c >>> +++ b/xen/arch/arm/smpboot.c >>> @@ -30,6 +30,7 @@ >>> #include <asm/psci.h> >>> #include <asm/acpi.h> >>> #include <asm/tee/tee.h> >>> +#include <asm/traps.h> >>> /* Override macros from asm/page.h to make them work with mfn_t */ >>> #undef virt_to_mfn >>> @@ -329,6 +330,8 @@ void asmlinkage start_secondary(void) >>> set_processor_id(cpuid); >>> + stack_check_init(); >>> + >>> identify_cpu(¤t_cpu_data); >>> processor_setup(); >>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c >>> index aac6c599f878..b4890eec7ec4 100644 >>> --- a/xen/arch/arm/traps.c >>> +++ b/xen/arch/arm/traps.c >>> @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) >>> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); >>> } >>> +#ifdef CONFIG_DEBUG >> >> Loooking at the code below. Aside the stack pointer part, nothing seems specific to Arm. So maybe this could be moved to common code? > > Yes, I suppose so. > >> >>> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >>> +DEFINE_PER_CPU(unsigned char *, stack_base); >> >> I think this could be "const unsigned char *" as the stack should not be modified directly. > > Every time there's a vcpu context switch we will have a new stack. I am not sure I follow. "const unsigned char *" should still allow you to update stack_base. It will just prevent anyone to try to write to modify the stack via stack_base. > >> >>> + >>> +void __attribute__((no_instrument_function)) stack_set(unsigned char *base) >>> +{ >>> + this_cpu(stack_base) = base; >>> +} >>> + >>> +void __init __attribute__((no_instrument_function)) stack_check_init(void) >>> +{ >>> + this_cpu(stack_check_nesting) = 0; >>> + stack_set(init_data.stack); >>> +} >>> + >>> +__attribute__((no_instrument_function)) >>> +void __cyg_profile_func_enter(void *this_fn, void *call_site) >>> +{ >>> + unsigned char *sp; >>> + >>> + if ( get_per_cpu_offset() == INVALID_PER_CPU_OFFSET ) >>> + return; >>> + >>> + asm volatile ("mov %0, sp" : "=r" (sp) ); >>> + >>> + if ( sp < this_cpu(stack_base) || >>> + sp > (this_cpu(stack_base) + STACK_SIZE) ) >> >> The top of the stack is used to store struct cpu_info. So you want to substract its size (see arch_vcpu_create()). > > Will do. > >> >>> + { >>> + if ( this_cpu(stack_check_nesting) ) >>> + return; >>> + >>> + this_cpu(stack_check_nesting)++; >> >> Looking at the use, it only seems to be used as "print/panic once". So maybe use a bool to avoid any overflow. > > It will only ever be incremented once. I'll still change it to a bool, > this should make it more obvious. > >> >>> + printk("CPU %d stack pointer out of bounds (sp %#lx, stack base %#lx)\n", >>> + smp_processor_id(), (uintptr_t)sp, >>> + (uintptr_t)this_cpu(stack_base)); >>> + BUG(); >> >> I would consider to call panic(). > > panic() alone doesn't show the stack trace / call trace. Ah good point. But TBH, I have never really understood why panic() didn't return a call stack. There are a few places where I found beneficial when debugging. Anyway, I guess this could be handled separately. > >> But is it safe to call any of this if we blew the stack? > > Nope, it sure isn't! > >> IOW, should we have a buffer? > > Yes. After some experimentation, I found that this printk and a WARN > (similar to BUG, but resumes execution and allows me to collect these > metrics) uses approximately 1632 bytes of stack. Assuming BUG uses a > similar amount of stack as WARN, and adding in a comfortable margin for > error, I'll add a 4096 byte buffer (i.e. invoke the print/BUG with 4096 > bytes remaining on the stack). AFAICT, the stack on Arm is 32KB. So we 1/8 of the stack as a buffer. Do you know the current stack use in a normal setup (e.g. boot a guest)? Anyway, so long the feature is not enabled in production, then it might be ok to steal 4KB. We could increase the stack if we see any issue. Cheers,
On 7/30/24 14:07, Julien Grall wrote: > Hi, > > On 30/07/2024 18:50, Stewart Hildebrand wrote: >> On 7/29/24 14:36, Julien Grall wrote: >>> Hi Stewart, >>> >>> On 29/07/2024 15:24, Stewart Hildebrand wrote: >>>> +DEFINE_PER_CPU(unsigned int, stack_check_nesting); >>>> +DEFINE_PER_CPU(unsigned char *, stack_base); >>> >>> I think this could be "const unsigned char *" as the stack should not be modified directly. >> >> Every time there's a vcpu context switch we will have a new stack. > > I am not sure I follow. "const unsigned char *" should still allow you to update stack_base. It will just prevent anyone to try to write to modify the stack via stack_base. Ah, of course. You're right. I'll change it to "const unsigned char *". > >> >>> >>>> + >>>> +void __attribute__((no_instrument_function)) stack_set(unsigned char *base) >>>> +{ >>>> + this_cpu(stack_base) = base; >>>> +} >>>> + >>>> +void __init __attribute__((no_instrument_function)) stack_check_init(void) >>>> +{ >>>> + this_cpu(stack_check_nesting) = 0; >>>> + stack_set(init_data.stack); >>>> +} >>>> + >>>> +__attribute__((no_instrument_function)) >>>> +void __cyg_profile_func_enter(void *this_fn, void *call_site) >>>> +{ >>>> + unsigned char *sp; >>>> + >>>> + if ( get_per_cpu_offset() == INVALID_PER_CPU_OFFSET ) >>>> + return; >>>> + >>>> + asm volatile ("mov %0, sp" : "=r" (sp) ); >>>> + >>>> + if ( sp < this_cpu(stack_base) || >>>> + sp > (this_cpu(stack_base) + STACK_SIZE) ) >>> >>> The top of the stack is used to store struct cpu_info. So you want to substract its size (see arch_vcpu_create()). >> >> Will do. >> >>> >>>> + { >>>> + if ( this_cpu(stack_check_nesting) ) >>>> + return; >>>> + >>>> + this_cpu(stack_check_nesting)++; >>> >>> Looking at the use, it only seems to be used as "print/panic once". So maybe use a bool to avoid any overflow. >> >> It will only ever be incremented once. I'll still change it to a bool, >> this should make it more obvious. >> >>> >>>> + printk("CPU %d stack pointer out of bounds (sp %#lx, stack base %#lx)\n", >>>> + smp_processor_id(), (uintptr_t)sp, >>>> + (uintptr_t)this_cpu(stack_base)); >>>> + BUG(); >>> >>> I would consider to call panic(). >> >> panic() alone doesn't show the stack trace / call trace. > > Ah good point. But TBH, I have never really understood why panic() didn't return a call stack. There are a few places where I found beneficial when debugging. > > Anyway, I guess this could be handled separately. > >> >>> But is it safe to call any of this if we blew the stack? >> >> Nope, it sure isn't! >> >>> IOW, should we have a buffer? >> >> Yes. After some experimentation, I found that this printk and a WARN >> (similar to BUG, but resumes execution and allows me to collect these >> metrics) uses approximately 1632 bytes of stack. Assuming BUG uses a >> similar amount of stack as WARN, and adding in a comfortable margin for >> error, I'll add a 4096 byte buffer (i.e. invoke the print/BUG with 4096 >> bytes remaining on the stack). > > AFAICT, the stack on Arm is 32KB. So we 1/8 of the stack as a buffer. Do you know the current stack use in a normal setup (e.g. boot a guest)? In my particular test case simply booting a dom0, it uses about 14k of stack. Of course this could vary with booting dom0less domUs, complexity of device tree parsing, etc. > > Anyway, so long the feature is not enabled in production, then it might be ok to steal 4KB. We could increase the stack if we see any issue. > > Cheers, >
diff --git a/xen/arch/arm/arch.mk b/xen/arch/arm/arch.mk index 022dcda19224..c39cb65d183d 100644 --- a/xen/arch/arm/arch.mk +++ b/xen/arch/arm/arch.mk @@ -12,6 +12,7 @@ CFLAGS-$(CONFIG_ARM_32) += -mno-unaligned-access CFLAGS-$(CONFIG_ARM_64) += -mcpu=generic CFLAGS-$(CONFIG_ARM_64) += -mgeneral-regs-only # No fp registers etc $(call cc-option-add,CFLAGS-$(CONFIG_ARM_64),CC,-mno-outline-atomics) +CFLAGS-$(CONFIG_DEBUG) += -finstrument-functions ifneq ($(filter command line environment,$(origin CONFIG_EARLY_PRINTK)),) $(error You must use 'make menuconfig' to enable/disable early printk now) diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S index 2fa07dc3a04b..4a332b9cbc7c 100644 --- a/xen/arch/arm/arm64/head.S +++ b/xen/arch/arm/arm64/head.S @@ -399,6 +399,10 @@ FUNC_LOCAL(cpu_init) * than SP_EL0. */ msr spsel, #1 + + ldr x0, =INVALID_PER_CPU_OFFSET + msr tpidr_el2, x0 + ret END(cpu_init) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 7cfcefd27944..93ebe6e5f8af 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -26,6 +26,7 @@ #include <asm/procinfo.h> #include <asm/regs.h> #include <asm/tee/tee.h> +#include <asm/traps.h> #include <asm/vfp.h> #include <asm/vgic.h> #include <asm/vtimer.h> @@ -328,6 +329,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next) set_current(next); + stack_set(next->arch.stack); + prev = __context_switch(prev, next); schedule_tail(prev); diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h index 69f817d1e68a..6b5aaf1eb3ff 100644 --- a/xen/arch/arm/include/asm/page.h +++ b/xen/arch/arm/include/asm/page.h @@ -7,6 +7,8 @@ #include <asm/lpae.h> #include <asm/sysregs.h> +#define INVALID_PER_CPU_OFFSET _AC(0x1, UL) + /* Shareability values for the LPAE entries */ #define LPAE_SH_NON_SHAREABLE 0x0 #define LPAE_SH_UNPREDICTALE 0x1 diff --git a/xen/arch/arm/include/asm/traps.h b/xen/arch/arm/include/asm/traps.h index 9a60dbf70e5b..13e6997c2643 100644 --- a/xen/arch/arm/include/asm/traps.h +++ b/xen/arch/arm/include/asm/traps.h @@ -118,6 +118,14 @@ static inline register_t sign_extend(const struct hsr_dabt dabt, register_t r) void finalize_instr_emulation(const struct instr_details *instr); +#ifdef CONFIG_DEBUG +void stack_check_init(void); +void stack_set(unsigned char *base); +#else +static inline void stack_check_init(void) { } +static inline void stack_set(unsigned char *base) { } +#endif + #endif /* __ASM_ARM_TRAPS__ */ /* * Local variables: diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c index 0c2fdaceaf21..07d07feff602 100644 --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -47,6 +47,7 @@ #include <asm/setup.h> #include <xsm/xsm.h> #include <asm/acpi.h> +#include <asm/traps.h> struct bootinfo __initdata bootinfo = BOOTINFO_INIT; @@ -733,6 +734,8 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, percpu_init_areas(); set_processor_id(0); /* needed early, for smp_processor_id() */ + stack_check_init(); + /* Initialize traps early allow us to get backtrace when an error occurred */ init_traps(); @@ -924,6 +927,7 @@ void asmlinkage __init start_xen(unsigned long boot_phys_offset, * since the static one we're running on is about to be freed. */ memcpy(idle_vcpu[0]->arch.cpu_info, get_cpu_info(), sizeof(struct cpu_info)); + stack_set(idle_vcpu[0]->arch.stack); switch_stack_and_jump(idle_vcpu[0]->arch.cpu_info, init_done); } diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c index 04e363088d60..1c689f2caed7 100644 --- a/xen/arch/arm/smpboot.c +++ b/xen/arch/arm/smpboot.c @@ -30,6 +30,7 @@ #include <asm/psci.h> #include <asm/acpi.h> #include <asm/tee/tee.h> +#include <asm/traps.h> /* Override macros from asm/page.h to make them work with mfn_t */ #undef virt_to_mfn @@ -329,6 +330,8 @@ void asmlinkage start_secondary(void) set_processor_id(cpuid); + stack_check_init(); + identify_cpu(¤t_cpu_data); processor_setup(); diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index aac6c599f878..b4890eec7ec4 100644 --- a/xen/arch/arm/traps.c +++ b/xen/arch/arm/traps.c @@ -2325,6 +2325,51 @@ void asmlinkage leave_hypervisor_to_guest(void) arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL); } +#ifdef CONFIG_DEBUG +DEFINE_PER_CPU(unsigned int, stack_check_nesting); +DEFINE_PER_CPU(unsigned char *, stack_base); + +void __attribute__((no_instrument_function)) stack_set(unsigned char *base) +{ + this_cpu(stack_base) = base; +} + +void __init __attribute__((no_instrument_function)) stack_check_init(void) +{ + this_cpu(stack_check_nesting) = 0; + stack_set(init_data.stack); +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_enter(void *this_fn, void *call_site) +{ + unsigned char *sp; + + if ( get_per_cpu_offset() == INVALID_PER_CPU_OFFSET ) + return; + + asm volatile ("mov %0, sp" : "=r" (sp) ); + + if ( sp < this_cpu(stack_base) || + sp > (this_cpu(stack_base) + STACK_SIZE) ) + { + if ( this_cpu(stack_check_nesting) ) + return; + + this_cpu(stack_check_nesting)++; + printk("CPU %d stack pointer out of bounds (sp %#lx, stack base %#lx)\n", + smp_processor_id(), (uintptr_t)sp, + (uintptr_t)this_cpu(stack_base)); + BUG(); + } +} + +__attribute__((no_instrument_function)) +void __cyg_profile_func_exit(void *this_fn, void *call_site) +{ +} +#endif + /* * Local variables: * mode: C
Use the -finstrument-functions option to check that the stack pointer is valid upon each function entry. Only enable it for debug builds due to the overhead introduced. Use per-cpu variables to store the stack base and nesting level. The nesting level is used to avoid invoking __cyg_profile_func_enter recursively. A check is added for whether per-cpu data has been initialized. Since TPIDR_EL2 resets to an unknown value, initialize it to an invalid value. Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> --- RFC: I only tested this on arm64. I still need to test with arm32. RFC: Should we put this under its own config option instead of reusing CONFIG_DEBUG? RFC: Is there a better value for INVALID_PER_CPU_OFFSET? We can't use 0 because 0 is a valid value for get_per_cpu_offset(). --- xen/arch/arm/arch.mk | 1 + xen/arch/arm/arm64/head.S | 4 +++ xen/arch/arm/domain.c | 3 +++ xen/arch/arm/include/asm/page.h | 2 ++ xen/arch/arm/include/asm/traps.h | 8 ++++++ xen/arch/arm/setup.c | 4 +++ xen/arch/arm/smpboot.c | 3 +++ xen/arch/arm/traps.c | 45 ++++++++++++++++++++++++++++++++ 8 files changed, 70 insertions(+)