Message ID | 20190423232622.GA11776@beast (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] arm64: sysreg: Make mrs_s and msr_s macros work with Clang and LTO | expand |
On Tue, Apr 23, 2019 at 4:26 PM Kees Cook <keescook@chromium.org> wrote: > > Clang's integrated assembler does not allow assembly macros defined > in one inline asm block using the .macro directive to be used across > separate asm blocks. LLVM developers consider this a feature and not a > bug, recommending code refactoring: > > https://bugs.llvm.org/show_bug.cgi?id=19749 > > As binutils doesn't allow macros to be redefined, this change uses > UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros > in-place and workaround gcc and clang limitations on redefining macros > across different assembler blocks. > > Specifically, the current state after preprocessing looks like this: > > asm volatile(".macro mXX_s ... .endm"); > void f() > { > asm volatile("mXX_s a, b"); > } > > With GCC, it gives macro redefinition error because sysreg.h is included > in multiple source files, and assembler code for all of them is later > combined for LTO (I've seen an intermediate file with hundreds of > identical definitions). > > With clang, it gives macro undefined error because clang doesn't allow > sharing macros between inline asm statements. > > I also seem to remember catching another sort of undefined error with > GCC due to reordering of macro definition asm statement and generated > asm code for function that uses the macro. > > The solution with defining and undefining for each use, while certainly > not elegant, satisfies both GCC and clang, LTO and non-LTO. > > Co-developed-by: Alex Matveev <alxmtvv@gmail.com> > Co-developed-by: Yury Norov <ynorov@caviumnetworks.com> > Co-developed-by: Sami Tolvanen <samitolvanen@google.com> > Signed-off-by: Kees Cook <keescook@chromium.org> > --- > v4: move to using preprocessor entirely, split constraints for "rZ" case. > v3: added more uses in irqflags, updated commit log, based on > discussion in https://lore.kernel.org/patchwork/patch/851580/ > --- > arch/arm64/include/asm/irqflags.h | 8 ++--- > arch/arm64/include/asm/kvm_hyp.h | 4 +-- > arch/arm64/include/asm/sysreg.h | 51 +++++++++++++++++++++++-------- > 3 files changed, 44 insertions(+), 19 deletions(-) > > diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h > index 43d8366c1e87..d359f28765cb 100644 > --- a/arch/arm64/include/asm/irqflags.h > +++ b/arch/arm64/include/asm/irqflags.h > @@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void) > asm volatile(ALTERNATIVE( > "msr daifclr, #2 // arch_local_irq_enable\n" > "nop", > - "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" > + __msr_s(SYS_ICC_PMR_EL1) > "dsb sy", > ARM64_HAS_IRQ_PRIO_MASKING) > : > @@ -55,7 +55,7 @@ static inline void arch_local_irq_disable(void) > { > asm volatile(ALTERNATIVE( > "msr daifset, #2 // arch_local_irq_disable", > - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0", > + __msr_s(SYS_ICC_PMR_EL1), > ARM64_HAS_IRQ_PRIO_MASKING) > : > : "r" ((unsigned long) GIC_PRIO_IRQOFF) > @@ -86,7 +86,7 @@ static inline unsigned long arch_local_save_flags(void) > "mov %0, %1\n" > "nop\n" > "nop", > - "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n" > + __mrs_s(SYS_ICC_PMR_EL1) > "ands %1, %1, " __stringify(PSR_I_BIT) "\n" > "csel %0, %0, %2, eq", > ARM64_HAS_IRQ_PRIO_MASKING) > @@ -116,7 +116,7 @@ static inline void arch_local_irq_restore(unsigned long flags) > asm volatile(ALTERNATIVE( > "msr daif, %0\n" > "nop", > - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n" > + __msr_s(SYS_ICC_PMR_EL1) > "dsb sy", > ARM64_HAS_IRQ_PRIO_MASKING) > : "+r" (flags) > diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h > index 4da765f2cca5..f2e681d657bb 100644 > --- a/arch/arm64/include/asm/kvm_hyp.h > +++ b/arch/arm64/include/asm/kvm_hyp.h > @@ -30,7 +30,7 @@ > ({ \ > u64 reg; \ > asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ > - "mrs_s %0, " __stringify(r##vh),\ > + __mrs_s(r##vh), \ > ARM64_HAS_VIRT_HOST_EXTN) \ > : "=r" (reg)); \ > reg; \ > @@ -40,7 +40,7 @@ > do { \ > u64 __val = (u64)(v); \ > asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ > - "msr_s " __stringify(r##vh) ", %x0",\ > + __msr_s_x0(r##vh), \ > ARM64_HAS_VIRT_HOST_EXTN) \ > : : "rZ" (__val)); \ > } while (0) > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > index 5b267dec6194..21d8f6dd4d79 100644 > --- a/arch/arm64/include/asm/sysreg.h > +++ b/arch/arm64/include/asm/sysreg.h > @@ -746,20 +746,45 @@ > #include <linux/build_bug.h> > #include <linux/types.h> > > -asm( > -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" > -" .equ .L__reg_num_x\\num, \\num\n" > -" .endr\n" > +#define __DEFINE_MRS_MSR_S_REGNUM \ > +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ > +" .equ .L__reg_num_x\\num, \\num\n" \ > +" .endr\n" \ > " .equ .L__reg_num_xzr, 31\n" > -"\n" > -" .macro mrs_s, rt, sreg\n" > - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) > + > +#define DEFINE_MRS_S \ > + __DEFINE_MRS_MSR_S_REGNUM \ > +" .macro mrs_s, rt, sreg\n" \ > + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ > " .endm\n" > -"\n" > -" .macro msr_s, sreg, rt\n" > - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) > + > +#define DEFINE_MSR_S \ > + __DEFINE_MRS_MSR_S_REGNUM \ > +" .macro msr_s, sreg, rt\n" \ > + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ > " .endm\n" > -); > + > +#define UNDEFINE_MRS_S \ > +" .purgem mrs_s\n" > + > +#define UNDEFINE_MSR_S \ > +" .purgem msr_s\n" > + > +#define __mrs_s(r) \ > + DEFINE_MRS_S \ > +" mrs_s %0, " __stringify(r) "\n" \ > + UNDEFINE_MRS_S > + > +#define __msr_s(r) \ > + DEFINE_MSR_S \ > +" msr_s " __stringify(r) ", %0\n" \ > + UNDEFINE_MSR_S > + > +/* For use with "rZ" constraints. */ > +#define __msr_s_x0(r) \ > + DEFINE_MSR_S \ > +" msr_s " __stringify(r) ", %x0\n" \ > + UNDEFINE_MSR_S Cool, thanks for sending this revision; it's much cleaner IMO. I wonder (maybe Ard or Mark know) if we can drop the non-x0-variant of msr and just use the x0-variant everywhere (but with the simpler name of __msr_s)? arch_local_irq_restore, arch_local_save_flags, arch_local_irq_disable, and arch_local_irq_enable all seem to be dealing with unsigned longs for %0, IIUC. > > /* > * Unlike read_cpuid, calls to read_sysreg are never expected to be > @@ -787,13 +812,13 @@ asm( > */ > #define read_sysreg_s(r) ({ \ > u64 __val; \ > - asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ > + asm volatile(__mrs_s(r) : "=r" (__val)); \ > __val; \ > }) > > #define write_sysreg_s(v, r) do { \ > u64 __val = (u64)(v); \ > - asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ > + asm volatile(__msr_s_x0(r) : : "rZ" (__val)); \ > } while (0) > > /* > -- > 2.17.1 > > > -- > Kees Cook
Hi Kees, On Tue, Apr 23, 2019 at 04:26:22PM -0700, Kees Cook wrote: > Clang's integrated assembler does not allow assembly macros defined > in one inline asm block using the .macro directive to be used across > separate asm blocks. LLVM developers consider this a feature and not a > bug, recommending code refactoring: > > https://bugs.llvm.org/show_bug.cgi?id=19749 > > As binutils doesn't allow macros to be redefined, this change uses > UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros > in-place and workaround gcc and clang limitations on redefining macros > across different assembler blocks. [...] > diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h > index 43d8366c1e87..d359f28765cb 100644 > --- a/arch/arm64/include/asm/irqflags.h > +++ b/arch/arm64/include/asm/irqflags.h > @@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void) > asm volatile(ALTERNATIVE( > "msr daifclr, #2 // arch_local_irq_enable\n" > "nop", > - "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" > + __msr_s(SYS_ICC_PMR_EL1) Seeing the __stringify() go is nice! Our assembly happens to use argument 0 by coincidence rather than by deliberate design, so please have the macro take the template, e.g. __msr_s(SYS_ICC_PMR_EL1, "%x0") ... that ways it's obvious as to which asm parameter is used, and this won't complicate refactoring of the assembly (which may shuffle or rename the parameters). Likewise for __mrs_s(), e.g. __mrs_s("%x[va]", SYS_WHATEVER); [...] > +#define __mrs_s(r) \ > + DEFINE_MRS_S \ > +" mrs_s %0, " __stringify(r) "\n" \ > + UNDEFINE_MRS_S > + > +#define __msr_s(r) \ > + DEFINE_MSR_S \ > +" msr_s " __stringify(r) ", %0\n" \ > + UNDEFINE_MSR_S These can be: #define __mrs_s(v, r) \ DEFINE_MRS_S \ " mrs_s " v ", " __stringify(r) "\n" \ UNDEFINE_MRS_S #define __msr_s(r, v) \ DEFINE_MSR_S \ " msr_s " __stringify(r) ", " v "\n" \ UNDEFINE_MSR_S > + > +/* For use with "rZ" constraints. */ > +#define __msr_s_x0(r) \ > + DEFINE_MSR_S \ > +" msr_s " __stringify(r) ", %x0\n" \ > + UNDEFINE_MSR_S I don't think we need this. If we pass the template in, this is solved by the caller, and we could consistently use the x form regardless. Otherwise, I think this is as clean as we can make this short of bumping our minimum binutils version and using the S<Op0>_<...> names. Thanks, Mark.
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 43d8366c1e87..d359f28765cb 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void) asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" + __msr_s(SYS_ICC_PMR_EL1) "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) : @@ -55,7 +55,7 @@ static inline void arch_local_irq_disable(void) { asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0", + __msr_s(SYS_ICC_PMR_EL1), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQOFF) @@ -86,7 +86,7 @@ static inline unsigned long arch_local_save_flags(void) "mov %0, %1\n" "nop\n" "nop", - "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n" + __mrs_s(SYS_ICC_PMR_EL1) "ands %1, %1, " __stringify(PSR_I_BIT) "\n" "csel %0, %0, %2, eq", ARM64_HAS_IRQ_PRIO_MASKING) @@ -116,7 +116,7 @@ static inline void arch_local_irq_restore(unsigned long flags) asm volatile(ALTERNATIVE( "msr daif, %0\n" "nop", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n" + __msr_s(SYS_ICC_PMR_EL1) "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) : "+r" (flags) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 4da765f2cca5..f2e681d657bb 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -30,7 +30,7 @@ ({ \ u64 reg; \ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ - "mrs_s %0, " __stringify(r##vh),\ + __mrs_s(r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ reg; \ @@ -40,7 +40,7 @@ do { \ u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ - "msr_s " __stringify(r##vh) ", %x0",\ + __msr_s_x0(r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ } while (0) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5b267dec6194..21d8f6dd4d79 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -746,20 +746,45 @@ #include <linux/build_bug.h> #include <linux/types.h> -asm( -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ .L__reg_num_x\\num, \\num\n" -" .endr\n" +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__reg_num_xzr, 31\n" -"\n" -" .macro mrs_s, rt, sreg\n" - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -"\n" -" .macro msr_s, sreg, rt\n" - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -); + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(r) \ + DEFINE_MRS_S \ +" mrs_s %0, " __stringify(r) "\n" \ + UNDEFINE_MRS_S + +#define __msr_s(r) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", %0\n" \ + UNDEFINE_MSR_S + +/* For use with "rZ" constraints. */ +#define __msr_s_x0(r) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", %x0\n" \ + UNDEFINE_MSR_S /* * Unlike read_cpuid, calls to read_sysreg are never expected to be @@ -787,13 +812,13 @@ asm( */ #define read_sysreg_s(r) ({ \ u64 __val; \ - asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + asm volatile(__mrs_s(r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ - asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ + asm volatile(__msr_s_x0(r) : : "rZ" (__val)); \ } while (0) /*
Clang's integrated assembler does not allow assembly macros defined in one inline asm block using the .macro directive to be used across separate asm blocks. LLVM developers consider this a feature and not a bug, recommending code refactoring: https://bugs.llvm.org/show_bug.cgi?id=19749 As binutils doesn't allow macros to be redefined, this change uses UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros in-place and workaround gcc and clang limitations on redefining macros across different assembler blocks. Specifically, the current state after preprocessing looks like this: asm volatile(".macro mXX_s ... .endm"); void f() { asm volatile("mXX_s a, b"); } With GCC, it gives macro redefinition error because sysreg.h is included in multiple source files, and assembler code for all of them is later combined for LTO (I've seen an intermediate file with hundreds of identical definitions). With clang, it gives macro undefined error because clang doesn't allow sharing macros between inline asm statements. I also seem to remember catching another sort of undefined error with GCC due to reordering of macro definition asm statement and generated asm code for function that uses the macro. The solution with defining and undefining for each use, while certainly not elegant, satisfies both GCC and clang, LTO and non-LTO. Co-developed-by: Alex Matveev <alxmtvv@gmail.com> Co-developed-by: Yury Norov <ynorov@caviumnetworks.com> Co-developed-by: Sami Tolvanen <samitolvanen@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> --- v4: move to using preprocessor entirely, split constraints for "rZ" case. v3: added more uses in irqflags, updated commit log, based on discussion in https://lore.kernel.org/patchwork/patch/851580/ --- arch/arm64/include/asm/irqflags.h | 8 ++--- arch/arm64/include/asm/kvm_hyp.h | 4 +-- arch/arm64/include/asm/sysreg.h | 51 +++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 19 deletions(-)