Message ID | 20230710055955.36551-1-quic_aiquny@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Add the arm64.nolse_atomics command line option | expand |
Hi-- On 7/9/23 22:59, Maria Yu wrote: > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 85fb0fa5d091..6ad754549f1d 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -433,6 +433,8 @@ > arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory > Set instructions support > > + arm64.nolse_atomic [ARM64] Unconditionally disable LSE Atomic support > + These entries should remain in alphabetical order, so arm64.nolse_atomic should be just after arm64.nobti. Yes, these are not quite in the correct order. :( Thanks. > ataflop= [HW,M68k] > > atarimouse= [HW,MOUSE] Atari Mouse
On 7/10/2023 2:07 PM, Randy Dunlap wrote: > Hi-- > > On 7/9/23 22:59, Maria Yu wrote: >> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt >> index 85fb0fa5d091..6ad754549f1d 100644 >> --- a/Documentation/admin-guide/kernel-parameters.txt >> +++ b/Documentation/admin-guide/kernel-parameters.txt >> @@ -433,6 +433,8 @@ >> arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory >> Set instructions support >> >> + arm64.nolse_atomic [ARM64] Unconditionally disable LSE Atomic support >> + > > These entries should remain in alphabetical order, so arm64.nolse_atomic > should be just after arm64.nobti. Thanks for the quic feedback, will fix this in next patchset. > > Yes, these are not quite in the correct order. :( > > Thanks. > >> ataflop= [HW,M68k] >> >> atarimouse= [HW,MOUSE] Atari Mouse >
On Mon, 10 Jul 2023 06:59:55 +0100, Maria Yu <quic_aiquny@quicinc.com> wrote: > > In order to be able to disable lse_atomic even if cpu > support it, most likely because of memory controller > cannot deal with the lse atomic instructions, use a > new idreg override to deal with it. In general, the idreg overrides are *not* there to paper over HW bugs. They are there to force the kernel to use or disable a feature for performance reason or to guide the *enabling* of a feature, but not because the HW is broken. The broken status of a HW platform must also be documented so that we know what to expect when we look at, for example, a bad case of memory corruption (something I'd expect to see on a system that only partially implements atomic memory operations). > > Signed-off-by: Maria Yu <quic_aiquny@quicinc.com> > --- > Documentation/admin-guide/kernel-parameters.txt | 2 ++ > arch/arm64/include/asm/cpufeature.h | 1 + > arch/arm64/kernel/cpufeature.c | 4 +++- > arch/arm64/kernel/idreg-override.c | 11 +++++++++++ > 4 files changed, 17 insertions(+), 1 deletion(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 85fb0fa5d091..6ad754549f1d 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -433,6 +433,8 @@ > arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory > Set instructions support > > + arm64.nolse_atomic [ARM64] Unconditionally disable LSE Atomic support > + 'nolse', or 'noatomic' should be enough. In general, the suffix should be either derived from the FEAT_* name or the idreg field name. > ataflop= [HW,M68k] > > atarimouse= [HW,MOUSE] Atari Mouse > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h > index 96e50227f940..9d56dea1fe62 100644 > --- a/arch/arm64/include/asm/cpufeature.h > +++ b/arch/arm64/include/asm/cpufeature.h > @@ -916,6 +916,7 @@ extern struct arm64_ftr_override id_aa64pfr0_override; > extern struct arm64_ftr_override id_aa64pfr1_override; > extern struct arm64_ftr_override id_aa64zfr0_override; > extern struct arm64_ftr_override id_aa64smfr0_override; > +extern struct arm64_ftr_override id_aa64isar0_override; > extern struct arm64_ftr_override id_aa64isar1_override; > extern struct arm64_ftr_override id_aa64isar2_override; > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index f9d456fe132d..9bd766880807 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -669,6 +669,7 @@ struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; > struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; > struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; > struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; > +struct arm64_ftr_override __ro_after_init id_aa64isar0_override; > struct arm64_ftr_override __ro_after_init id_aa64isar1_override; > struct arm64_ftr_override __ro_after_init id_aa64isar2_override; > > @@ -721,7 +722,8 @@ static const struct __ftr_reg_entry { > ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), > > /* Op1 = 0, CRn = 0, CRm = 6 */ > - ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), > + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0, > + &id_aa64isar0_override), > ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, > &id_aa64isar1_override), > ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, > diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c > index 2fe2491b692c..af41ab4f3d94 100644 > --- a/arch/arm64/kernel/idreg-override.c > +++ b/arch/arm64/kernel/idreg-override.c > @@ -105,6 +105,15 @@ static const struct ftr_set_desc pfr1 __initconst = { > }, > }; > > +static const struct ftr_set_desc isar0 __initconst = { > + .name = "id_aa64isar0", > + .override = &id_aa64isar0_override, > + .fields = { > + FIELD("atomic", ID_AA64ISAR0_EL1_ATOMIC_SHIFT, NULL), > + {} > + }, > +}; > + > static const struct ftr_set_desc isar1 __initconst = { > .name = "id_aa64isar1", > .override = &id_aa64isar1_override, > @@ -163,6 +172,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { > &mmfr1, > &pfr0, > &pfr1, > + &isar0, > &isar1, > &isar2, > &smfr0, > @@ -185,6 +195,7 @@ static const struct { > { "arm64.nomops", "id_aa64isar2.mops=0" }, > { "arm64.nomte", "id_aa64pfr1.mte=0" }, > { "nokaslr", "arm64_sw.nokaslr=1" }, > + { "arm64.nolse_atomic", "id_aa64isar0.atomic=0" }, And what of 32bit? M.
On 7/10/2023 3:27 PM, Marc Zyngier wrote: > On Mon, 10 Jul 2023 06:59:55 +0100, > Maria Yu <quic_aiquny@quicinc.com> wrote: >> >> In order to be able to disable lse_atomic even if cpu >> support it, most likely because of memory controller >> cannot deal with the lse atomic instructions, use a >> new idreg override to deal with it. > > In general, the idreg overrides are *not* there to paper over HW bugs. > They are there to force the kernel to use or disable a feature for > performance reason or to guide the *enabling* of a feature, but not > because the HW is broken. > > The broken status of a HW platform must also be documented so that we > know what to expect when we look at, for example, a bad case of memory > corruption (something I'd expect to see on a system that only > partially implements atomic memory operations). > good idea. A noc error would be happened if the lse atomic instruction happened during a memory controller doesn't support lse atomic instructions. I can put the information in next patchset comment message. Pls feel free to let know if there is other place to have this kind of information with. >> >> Signed-off-by: Maria Yu <quic_aiquny@quicinc.com> >> --- >> Documentation/admin-guide/kernel-parameters.txt | 2 ++ >> arch/arm64/include/asm/cpufeature.h | 1 + >> arch/arm64/kernel/cpufeature.c | 4 +++- >> arch/arm64/kernel/idreg-override.c | 11 +++++++++++ >> 4 files changed, 17 insertions(+), 1 deletion(-) >> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt >> index 85fb0fa5d091..6ad754549f1d 100644 >> --- a/Documentation/admin-guide/kernel-parameters.txt >> +++ b/Documentation/admin-guide/kernel-parameters.txt >> @@ -433,6 +433,8 @@ >> arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory >> Set instructions support >> >> + arm64.nolse_atomic [ARM64] Unconditionally disable LSE Atomic support >> + > > 'nolse', or 'noatomic' should be enough. In general, the suffix should > be either derived from the FEAT_* name or the idreg field name. noatomic can be used in next patchset. ID_AA64ISAR0_EL1_ATOMIC_SHIFT > >> ataflop= [HW,M68k] >> >> atarimouse= [HW,MOUSE] Atari Mouse >> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h >> index 96e50227f940..9d56dea1fe62 100644 >> --- a/arch/arm64/include/asm/cpufeature.h >> +++ b/arch/arm64/include/asm/cpufeature.h >> @@ -916,6 +916,7 @@ extern struct arm64_ftr_override id_aa64pfr0_override; >> extern struct arm64_ftr_override id_aa64pfr1_override; >> extern struct arm64_ftr_override id_aa64zfr0_override; >> extern struct arm64_ftr_override id_aa64smfr0_override; >> +extern struct arm64_ftr_override id_aa64isar0_override; >> extern struct arm64_ftr_override id_aa64isar1_override; >> extern struct arm64_ftr_override id_aa64isar2_override; >> >> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c >> index f9d456fe132d..9bd766880807 100644 >> --- a/arch/arm64/kernel/cpufeature.c >> +++ b/arch/arm64/kernel/cpufeature.c >> @@ -669,6 +669,7 @@ struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; >> struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; >> struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; >> struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; >> +struct arm64_ftr_override __ro_after_init id_aa64isar0_override; >> struct arm64_ftr_override __ro_after_init id_aa64isar1_override; >> struct arm64_ftr_override __ro_after_init id_aa64isar2_override; >> >> @@ -721,7 +722,8 @@ static const struct __ftr_reg_entry { >> ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), >> >> /* Op1 = 0, CRn = 0, CRm = 6 */ >> - ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), >> + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0, >> + &id_aa64isar0_override), >> ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, >> &id_aa64isar1_override), >> ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, >> diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c >> index 2fe2491b692c..af41ab4f3d94 100644 >> --- a/arch/arm64/kernel/idreg-override.c >> +++ b/arch/arm64/kernel/idreg-override.c >> @@ -105,6 +105,15 @@ static const struct ftr_set_desc pfr1 __initconst = { >> }, >> }; >> >> +static const struct ftr_set_desc isar0 __initconst = { >> + .name = "id_aa64isar0", >> + .override = &id_aa64isar0_override, >> + .fields = { >> + FIELD("atomic", ID_AA64ISAR0_EL1_ATOMIC_SHIFT, NULL), >> + {} >> + }, >> +}; >> + >> static const struct ftr_set_desc isar1 __initconst = { >> .name = "id_aa64isar1", >> .override = &id_aa64isar1_override, >> @@ -163,6 +172,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { >> &mmfr1, >> &pfr0, >> &pfr1, >> + &isar0, >> &isar1, >> &isar2, >> &smfr0, >> @@ -185,6 +195,7 @@ static const struct { >> { "arm64.nomops", "id_aa64isar2.mops=0" }, >> { "arm64.nomte", "id_aa64pfr1.mte=0" }, >> { "nokaslr", "arm64_sw.nokaslr=1" }, >> + { "arm64.nolse_atomic", "id_aa64isar0.atomic=0" }, > > And what of 32bit? > > M. >
On Mon, 10 Jul 2023 09:19:54 +0100, "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > > On 7/10/2023 3:27 PM, Marc Zyngier wrote: > > On Mon, 10 Jul 2023 06:59:55 +0100, > > Maria Yu <quic_aiquny@quicinc.com> wrote: > >> > >> In order to be able to disable lse_atomic even if cpu > >> support it, most likely because of memory controller > >> cannot deal with the lse atomic instructions, use a > >> new idreg override to deal with it. > > > > In general, the idreg overrides are *not* there to paper over HW bugs. > > They are there to force the kernel to use or disable a feature for > > performance reason or to guide the *enabling* of a feature, but not > > because the HW is broken. > > > > The broken status of a HW platform must also be documented so that we > > know what to expect when we look at, for example, a bad case of memory > > corruption (something I'd expect to see on a system that only > > partially implements atomic memory operations). > > > > good idea. A noc error would be happened if the lse atomic instruction > happened during a memory controller doesn't support lse atomic > instructions. > I can put the information in next patchset comment message. Pls feel > free to let know if there is other place to have this kind of > information with. For a start, Documentation/arch/arm64/silicon-errata.rst should contain an entry for the actual erratum, and a description of the symptoms of the issue (you're mentioning a "noc error": how is that reported to the CPU?). The workaround should also be detected at runtime -- we cannot rely on the user to provide a command-line argument to disable an essential feature that anyone has taken for granted for most of a decade... [...] > >> @@ -185,6 +195,7 @@ static const struct { > >> { "arm64.nomops", "id_aa64isar2.mops=0" }, > >> { "arm64.nomte", "id_aa64pfr1.mte=0" }, > >> { "nokaslr", "arm64_sw.nokaslr=1" }, > >> + { "arm64.nolse_atomic", "id_aa64isar0.atomic=0" }, > > > > And what of 32bit? This particular question still stands, as it is likely to affect VMs. M.
On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > In order to be able to disable lse_atomic even if cpu > support it, most likely because of memory controller > cannot deal with the lse atomic instructions, use a > new idreg override to deal with it. This should not be a problem for cacheable memory though, right? Given that Linux does not issue atomic operations to non-cacheable mappings, I'm struggling to see why there's a problem here. Please can you explain the problem that you are trying to solve? Will
On 7/10/2023 5:31 PM, Marc Zyngier wrote: > On Mon, 10 Jul 2023 09:19:54 +0100, > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >> >> On 7/10/2023 3:27 PM, Marc Zyngier wrote: >>> On Mon, 10 Jul 2023 06:59:55 +0100, >>> Maria Yu <quic_aiquny@quicinc.com> wrote: >>>> >>>> In order to be able to disable lse_atomic even if cpu >>>> support it, most likely because of memory controller >>>> cannot deal with the lse atomic instructions, use a >>>> new idreg override to deal with it. >>> >>> In general, the idreg overrides are *not* there to paper over HW bugs. >>> They are there to force the kernel to use or disable a feature for >>> performance reason or to guide the *enabling* of a feature, but not >>> because the HW is broken. >>> >>> The broken status of a HW platform must also be documented so that we >>> know what to expect when we look at, for example, a bad case of memory >>> corruption (something I'd expect to see on a system that only >>> partially implements atomic memory operations). >>> >> >> good idea. A noc error would be happened if the lse atomic instruction >> happened during a memory controller doesn't support lse atomic >> instructions. >> I can put the information in next patchset comment message. Pls feel >> free to let know if there is other place to have this kind of >> information with. > > For a start, Documentation/arch/arm64/silicon-errata.rst should > contain an entry for the actual erratum, and a description of the > symptoms of the issue (you're mentioning a "noc error": how is that > reported to the CPU?). This is not a cpu's errata as my understanding. It is the DDR subsystem which don't have the LSE atomic feature supported. > > The workaround should also be detected at runtime -- we cannot rely on > the user to provide a command-line argument to disable an essential > feature that anyone has taken for granted for most of a decade... We are also seeking help from DDR Subsystem POC to see whether it is possible to detect the LSE atomic feature support or not at runtime. In my opinion, LSE atomic is a system level feature instead of a cpu only feature. So currently solution we is that even if cpu support lse atomic, but it still needed to be disabled if the cpu working with a lse atomic not support by current system's DDR subsystem. > > [...] > >>>> @@ -185,6 +195,7 @@ static const struct { >>>> { "arm64.nomops", "id_aa64isar2.mops=0" }, >>>> { "arm64.nomte", "id_aa64pfr1.mte=0" }, >>>> { "nokaslr", "arm64_sw.nokaslr=1" }, >>>> + { "arm64.nolse_atomic", "id_aa64isar0.atomic=0" }, >>> >>> And what of 32bit? > > This particular question still stands, as it is likely to affect VMs. > > M. >
On 7/10/2023 5:37 PM, Will Deacon wrote: > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: >> In order to be able to disable lse_atomic even if cpu >> support it, most likely because of memory controller >> cannot deal with the lse atomic instructions, use a >> new idreg override to deal with it. > > This should not be a problem for cacheable memory though, right? > > Given that Linux does not issue atomic operations to non-cacheable mappings, > I'm struggling to see why there's a problem here. The lse atomic operation can be issued on non-cacheable mappings as well. Even if it is cached data, with different CPUECTLR_EL1 setting, it can also do far lse atomic operations. > > Please can you explain the problem that you are trying to solve? In our current case, it is a 100% reproducible issue that happened for uncached data, the cpu which support LSE atomic, but the system's DDR subsystem is not support this and caused a NOC error and thus synchronous external abort happened. > > Will
On Tue, 11 Jul 2023 04:30:44 +0100, "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > > On 7/10/2023 5:31 PM, Marc Zyngier wrote: > > On Mon, 10 Jul 2023 09:19:54 +0100, > > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > >> > >> On 7/10/2023 3:27 PM, Marc Zyngier wrote: > >>> On Mon, 10 Jul 2023 06:59:55 +0100, > >>> Maria Yu <quic_aiquny@quicinc.com> wrote: > >>>> > >>>> In order to be able to disable lse_atomic even if cpu > >>>> support it, most likely because of memory controller > >>>> cannot deal with the lse atomic instructions, use a > >>>> new idreg override to deal with it. > >>> > >>> In general, the idreg overrides are *not* there to paper over HW bugs. > >>> They are there to force the kernel to use or disable a feature for > >>> performance reason or to guide the *enabling* of a feature, but not > >>> because the HW is broken. > >>> > >>> The broken status of a HW platform must also be documented so that we > >>> know what to expect when we look at, for example, a bad case of memory > >>> corruption (something I'd expect to see on a system that only > >>> partially implements atomic memory operations). > >>> > >> > >> good idea. A noc error would be happened if the lse atomic instruction > >> happened during a memory controller doesn't support lse atomic > >> instructions. > >> I can put the information in next patchset comment message. Pls feel > >> free to let know if there is other place to have this kind of > >> information with. > > > > For a start, Documentation/arch/arm64/silicon-errata.rst should > > contain an entry for the actual erratum, and a description of the > > symptoms of the issue (you're mentioning a "noc error": how is that > > reported to the CPU?). > > This is not a cpu's errata as my understanding. It is the DDR > subsystem which don't have the LSE atomic feature supported. CPU or not doesn't matter. We also track system errata. > > > > The workaround should also be detected at runtime -- we cannot rely on > > the user to provide a command-line argument to disable an essential > > feature that anyone has taken for granted for most of a decade... > > We are also seeking help from DDR Subsystem POC to see whether it is > possible to detect the LSE atomic feature support or not at runtime. Keying it off a DT compatible (or something similar) would work. > In my opinion, LSE atomic is a system level feature instead of a cpu > only feature. So currently solution we is that even if cpu support lse > atomic, but it still needed to be disabled if the cpu working with a > lse atomic not support by current system's DDR subsystem. In the absence of a detection mechanism for anything past the CPU, this is a moot point. At this stage, this is a bit like saying "writing to memory is a system thing, not only a CPU feature". And this also breaks KVM if these CPUs don't have FWB, as a guest can always map a piece of memory as non-cacheable, and trigger the issue you describe in your reply to Will, even if you hide the atomics on the host. M.
On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: > On 7/10/2023 5:37 PM, Will Deacon wrote: > > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > > > In order to be able to disable lse_atomic even if cpu > > > support it, most likely because of memory controller > > > cannot deal with the lse atomic instructions, use a > > > new idreg override to deal with it. > > > > This should not be a problem for cacheable memory though, right? > > > > Given that Linux does not issue atomic operations to non-cacheable mappings, > > I'm struggling to see why there's a problem here. > > The lse atomic operation can be issued on non-cacheable mappings as well. > Even if it is cached data, with different CPUECTLR_EL1 setting, it can also > do far lse atomic operations. Please can you point me to the place in the kernel sources where this happens? The architecture doesn't guarantee that atomics to non-cacheable mappings will work, see "B2.2.6 Possible implementation restrictions on using atomic instructions". Linux, therefore, doesn't issue atomics to non-cacheable memory. > > Please can you explain the problem that you are trying to solve? > > In our current case, it is a 100% reproducible issue that happened for > uncached data, the cpu which support LSE atomic, but the system's DDR > subsystem is not support this and caused a NOC error and thus synchronous > external abort happened. So? The Arm ARM allows this behaviour and Linux shouldn't run into it. Will
On 7/11/2023 2:57 PM, Marc Zyngier wrote: > On Tue, 11 Jul 2023 04:30:44 +0100, > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >> >> On 7/10/2023 5:31 PM, Marc Zyngier wrote: >>> On Mon, 10 Jul 2023 09:19:54 +0100, >>> "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >>>> >>>> On 7/10/2023 3:27 PM, Marc Zyngier wrote: >>>>> On Mon, 10 Jul 2023 06:59:55 +0100, >>>>> Maria Yu <quic_aiquny@quicinc.com> wrote: >>>>>> >>>>>> In order to be able to disable lse_atomic even if cpu >>>>>> support it, most likely because of memory controller >>>>>> cannot deal with the lse atomic instructions, use a >>>>>> new idreg override to deal with it. >>>>> >>>>> In general, the idreg overrides are *not* there to paper over HW bugs. >>>>> They are there to force the kernel to use or disable a feature for >>>>> performance reason or to guide the *enabling* of a feature, but not >>>>> because the HW is broken. >>>>> >>>>> The broken status of a HW platform must also be documented so that we >>>>> know what to expect when we look at, for example, a bad case of memory >>>>> corruption (something I'd expect to see on a system that only >>>>> partially implements atomic memory operations). >>>>> >>>> >>>> good idea. A noc error would be happened if the lse atomic instruction >>>> happened during a memory controller doesn't support lse atomic >>>> instructions. >>>> I can put the information in next patchset comment message. Pls feel >>>> free to let know if there is other place to have this kind of >>>> information with. >>> >>> For a start, Documentation/arch/arm64/silicon-errata.rst should >>> contain an entry for the actual erratum, and a description of the >>> symptoms of the issue (you're mentioning a "noc error": how is that >>> reported to the CPU?). >> >> This is not a cpu's errata as my understanding. It is the DDR >> subsystem which don't have the LSE atomic feature supported. > > CPU or not doesn't matter. We also track system errata. Thank you for clarify on this. If I am correct understanding, are you suggesting system errata with DT seperate compatible (or similar) to runtime disable this feature instead of idreg override with arm64.nolse options? While it is better to finally affect the host arm64_ftr_regs value since it can also derived to kvm sys reg as well. > >>> >>> The workaround should also be detected at runtime -- we cannot rely on >>> the user to provide a command-line argument to disable an essential >>> feature that anyone has taken for granted for most of a decade... >> >> We are also seeking help from DDR Subsystem POC to see whether it is >> possible to detect the LSE atomic feature support or not at runtime. > > Keying it off a DT compatible (or something similar) would work. > >> In my opinion, LSE atomic is a system level feature instead of a cpu >> only feature. So currently solution we is that even if cpu support lse >> atomic, but it still needed to be disabled if the cpu working with a >> lse atomic not support by current system's DDR subsystem. > > In the absence of a detection mechanism for anything past the CPU, > this is a moot point. At this stage, this is a bit like saying > "writing to memory is a system thing, not only a CPU feature". > > And this also breaks KVM if these CPUs don't have FWB, as a guest can > always map a piece of memory as non-cacheable, and trigger the issue > you describe in your reply to Will, even if you hide the atomics on > the host. > For the KVM part, per my understanding, as long as the current feature id being overriden, the KVM system also get the current vcpu without the lse atomic feature enabled. KVM vcpu will read the sys reg from host arm64_ftr_regs which is already been controled by the idreg_overrides. check reference from: https://elixir.bootlin.com/linux/v6.5-rc1/source/arch/arm64/kernel/cpufeature.c#L680 https://elixir.bootlin.com/linux/v6.5-rc1/source/arch/arm64/kvm/sys_regs.c#L1360 > M. >
On 7/11/2023 4:22 PM, Will Deacon wrote: > On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: >> On 7/10/2023 5:37 PM, Will Deacon wrote: >>> On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: >>>> In order to be able to disable lse_atomic even if cpu >>>> support it, most likely because of memory controller >>>> cannot deal with the lse atomic instructions, use a >>>> new idreg override to deal with it. >>> >>> This should not be a problem for cacheable memory though, right? >>> >>> Given that Linux does not issue atomic operations to non-cacheable mappings, >>> I'm struggling to see why there's a problem here. >> >> The lse atomic operation can be issued on non-cacheable mappings as well. >> Even if it is cached data, with different CPUECTLR_EL1 setting, it can also >> do far lse atomic operations. > > Please can you point me to the place in the kernel sources where this > happens? The architecture doesn't guarantee that atomics to non-cacheable > mappings will work, see "B2.2.6 Possible implementation restrictions on > using atomic instructions". Linux, therefore, doesn't issue atomics > to non-cacheable memory. We encounter the issue on third party kernel modules and third party apps instead of linux kernel itself. This is a tradeoff of performance and stability. Per my understanding, options can be used to enable the lse_atomic to have the most performance cared system, and disable the lse_atomic by stability cared most system. > >>> Please can you explain the problem that you are trying to solve? >> >> In our current case, it is a 100% reproducible issue that happened for >> uncached data, the cpu which support LSE atomic, but the system's DDR >> subsystem is not support this and caused a NOC error and thus synchronous >> external abort happened. > > So? The Arm ARM allows this behaviour and Linux shouldn't run into it. > > Will
On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: > On 7/11/2023 4:22 PM, Will Deacon wrote: > > On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: > > > On 7/10/2023 5:37 PM, Will Deacon wrote: > > > > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > > > > > In order to be able to disable lse_atomic even if cpu > > > > > support it, most likely because of memory controller > > > > > cannot deal with the lse atomic instructions, use a > > > > > new idreg override to deal with it. > > > > > > > > This should not be a problem for cacheable memory though, right? > > > > > > > > Given that Linux does not issue atomic operations to non-cacheable mappings, > > > > I'm struggling to see why there's a problem here. > > > > > > The lse atomic operation can be issued on non-cacheable mappings as well. > > > Even if it is cached data, with different CPUECTLR_EL1 setting, it can also > > > do far lse atomic operations. > > > > Please can you point me to the place in the kernel sources where this > > happens? The architecture doesn't guarantee that atomics to non-cacheable > > mappings will work, see "B2.2.6 Possible implementation restrictions on > > using atomic instructions". Linux, therefore, doesn't issue atomics > > to non-cacheable memory. > > We encounter the issue on third party kernel modules and third party apps > instead of linux kernel itself. Great, so there's nothing to do in the kernel then! The third party code needs to be modified not to use atomic instructions with non-cacheable mappings. No need to involve us with that. > This is a tradeoff of performance and stability. Per my understanding, > options can be used to enable the lse_atomic to have the most performance > cared system, and disable the lse_atomic by stability cared most system. Where do livelock and starvation fit in with "stability"? Disabling LSE atomics for things like qspinlock and the scheduler just because of some badly written third-party code isn't much of a tradeoff. Will
On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: > On 7/11/2023 4:22 PM, Will Deacon wrote: > > On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: > > > On 7/10/2023 5:37 PM, Will Deacon wrote: > > > > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > > > > > In order to be able to disable lse_atomic even if cpu > > > > > support it, most likely because of memory controller > > > > > cannot deal with the lse atomic instructions, use a > > > > > new idreg override to deal with it. > > > > > > > > This should not be a problem for cacheable memory though, right? > > > > > > > > Given that Linux does not issue atomic operations to non-cacheable mappings, > > > > I'm struggling to see why there's a problem here. > > > > > > The lse atomic operation can be issued on non-cacheable mappings as well. > > > Even if it is cached data, with different CPUECTLR_EL1 setting, it can also > > > do far lse atomic operations. > > > > Please can you point me to the place in the kernel sources where this > > happens? The architecture doesn't guarantee that atomics to non-cacheable > > mappings will work, see "B2.2.6 Possible implementation restrictions on > > using atomic instructions". Linux, therefore, doesn't issue atomics > > to non-cacheable memory. > > We encounter the issue on third party kernel modules Which kernel modules? Those modules are clearly broken; as Will has already said, the architecture says doing atomics to non-cacheable memory can result in external aborts, and that's exaclty the behaviour that you're reporting as a problem. This is working *as designed*. Note that the same is true for LDXR+STXR; so just hiding LSE doesn't make sense: if the code falls back to LDXR+STXR it still suffers from the exact same problem. Regardless, hiding bugs in out-of-tree code is not a justification for changing the upstream kernel. > and third party apps instead of linux kernel itself. Which apps? Why are those apps using non-cacheable memory? Why are those apps trying to perform atomics to non-cacheable memory? > This is a tradeoff of performance and stability. Per my understanding, > options can be used to enable the lse_atomic to have the most performance > cared system, and disable the lse_atomic by stability cared most system. I think that's a misrepresentation of this patch. This patch disables a feature to *hide* bugs in out-of-tree kernel modules and userspace software. It's not about making the system more stable, it's about making broken code appear to work. The LSE atomics aren't just about performance. They're significantly fairer than LDXR+STXR in many practical situations, and contribute to the stability of the system. Thanks, Mark. > > > > Please can you explain the problem that you are trying to solve? > > > > > > In our current case, it is a 100% reproducible issue that happened for > > > uncached data, the cpu which support LSE atomic, but the system's DDR > > > subsystem is not support this and caused a NOC error and thus synchronous > > > external abort happened. > > > > So? The Arm ARM allows this behaviour and Linux shouldn't run into it. > > > > Will > > -- > Thx and BRs, > Aiqun(Maria) Yu >
On Tue, 11 Jul 2023 11:12:48 +0100, "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > > For the KVM part, per my understanding, as long as the current feature > id being overriden, the KVM system also get the current vcpu without > the lse atomic feature enabled. > KVM vcpu will read the sys reg from host arm64_ftr_regs which is > already been controled by the idreg_overrides. You're completely missing the point. The guest is free to map memory as non-cacheable *and* to use LSE atomics even if the idregs pretend this is not available. At which point the HW throws a fit and the system is dead. Is that acceptable? Of course not. So there are two aspects to your problem: - for Linux, there is nothing to do: the kernel will correctly behave, and as long as you don't expose non-cacheable memory to userspace. Out of tree drivers are none of our concern here. - for guests, it looks like the HW doesn't provide the basic requirements for virtualisation, and you should always disable KVM on this HW (or even better, enter the kernel at EL1). In both cases, nothing to do in the kernel, which is good news. M.
On 7/11/2023 6:38 PM, Marc Zyngier wrote: > On Tue, 11 Jul 2023 11:12:48 +0100, > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >> >> For the KVM part, per my understanding, as long as the current feature >> id being overriden, the KVM system also get the current vcpu without >> the lse atomic feature enabled. >> KVM vcpu will read the sys reg from host arm64_ftr_regs which is >> already been controled by the idreg_overrides. > > You're completely missing the point. > > The guest is free to map memory as non-cacheable *and* to use LSE > atomics even if the idregs pretend this is not available. At which The guest also can have the current linux kernel mechanism of LSE ATOMIC way. +----------------------------+ | | | Read the cpu feature IDs | +----------------------------+ v +----------------------------+ +-------------------+ | | Y | Use lse atomic ins| | if lse atomic supported | -- | | +----------------------------+ +-------------------+ v N +----------------------------+ | Use r/stxr + atomic ins | | | +----------------------------+ Just like other KVM vcpu cpu features, lse atomic can be a feature inherit from the pysical cpu features for the KVM vcpus. > point the HW throws a fit and the system is dead. Is that acceptable? > Of course not. > The current patchset is try to have the ability to *kind of free* to not make system dead. Since currently linux kernel already have the runtime patching of lse atomic ops, we are trying to have user have option to re-use the switch of system_uses_lse_atomics(). #define __lse_ll_sc_body(op, ...) \ ({ \ system_uses_lse_atomics() ? \ __lse_##op(__VA_ARGS__) : \ __ll_sc_##op(__VA_ARGS__); \ }) > So there are two aspects to your problem: > > - for Linux, there is nothing to do: the kernel will correctly behave, > and as long as you don't expose non-cacheable memory to userspace. > Out of tree drivers are none of our concern here. > For Linux kernel, we have provide the In-line patching at runtime, and all third party kernel modules are built with those in-line patching as well. if we can have an option, the current system can still run those third party kernel modules without system crash. /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) static __always_inline bool system_uses_lse_atomics(void) { return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS); } > - for guests, it looks like the HW doesn't provide the basic > requirements for virtualisation, and you should always disable KVM > on this HW (or even better, enter the kernel at EL1). > I can see that KVM can still be supported even if current phisical cpu don't have emulated vcpu features. We can only disable the current vcpu feature which exposed to KVM guest. > In both cases, nothing to do in the kernel, which is good news. > > M. >
On 7/11/2023 6:25 PM, Will Deacon wrote: > On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: >> On 7/11/2023 4:22 PM, Will Deacon wrote: >>> On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: >>>> On 7/10/2023 5:37 PM, Will Deacon wrote: >>>>> On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: >>>>>> In order to be able to disable lse_atomic even if cpu >>>>>> support it, most likely because of memory controller >>>>>> cannot deal with the lse atomic instructions, use a >>>>>> new idreg override to deal with it. >>>>> >>>>> This should not be a problem for cacheable memory though, right? >>>>> >>>>> Given that Linux does not issue atomic operations to non-cacheable mappings, >>>>> I'm struggling to see why there's a problem here. >>>> >>>> The lse atomic operation can be issued on non-cacheable mappings as well. >>>> Even if it is cached data, with different CPUECTLR_EL1 setting, it can also >>>> do far lse atomic operations. >>> >>> Please can you point me to the place in the kernel sources where this >>> happens? The architecture doesn't guarantee that atomics to non-cacheable >>> mappings will work, see "B2.2.6 Possible implementation restrictions on >>> using atomic instructions". Linux, therefore, doesn't issue atomics >>> to non-cacheable memory. >> >> We encounter the issue on third party kernel modules and third party apps >> instead of linux kernel itself. > > Great, so there's nothing to do in the kernel then! > > The third party code needs to be modified not to use atomic instructions > with non-cacheable mappings. No need to involve us with that. > >> This is a tradeoff of performance and stability. Per my understanding, >> options can be used to enable the lse_atomic to have the most performance >> cared system, and disable the lse_atomic by stability cared most system. > > Where do livelock and starvation fit in with "stability"? Disabling LSE > atomics for things like qspinlock and the scheduler just because of some > badly written third-party code isn't much of a tradeoff. We also have requirement to have cpus/system fully support lse atomic and cpus/system not fully support lse atomic with a generic kernel image. Same kernel module wanted to be used by lse atomic fully support cpu and not fully support cpu/system as well. That's why we want to have a runtime option here. > > Will
On Wed, 12 Jul 2023 03:47:55 +0100, "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > > On 7/11/2023 6:38 PM, Marc Zyngier wrote: > > On Tue, 11 Jul 2023 11:12:48 +0100, > > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: > >> > >> For the KVM part, per my understanding, as long as the current feature > >> id being overriden, the KVM system also get the current vcpu without > >> the lse atomic feature enabled. > >> KVM vcpu will read the sys reg from host arm64_ftr_regs which is > >> already been controled by the idreg_overrides. > > > > You're completely missing the point. > > > > The guest is free to map memory as non-cacheable *and* to use LSE > > atomics even if the idregs pretend this is not available. At which > The guest also can have the current linux kernel mechanism of LSE > ATOMIC way. [snip useless diagrams] Yes, the guest can do the right thing. The guest, a totally unprivileged piece of SW, can also ignore the idregs and take the whole machine down because your HW is broken. > Just like other KVM vcpu cpu features, lse atomic can be a feature > inherit from the pysical cpu features for the KVM vcpus. See above. Your reasoning applies to a well behaved guest, which is the *wrong* way to reason about these things. M.
On Wed, Jul 12, 2023 at 11:09:10AM +0800, Aiqun(Maria) Yu wrote: > On 7/11/2023 6:25 PM, Will Deacon wrote: > > On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: > > > On 7/11/2023 4:22 PM, Will Deacon wrote: > > > > On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: > > > > > On 7/10/2023 5:37 PM, Will Deacon wrote: > > > > > > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > > > > > > > In order to be able to disable lse_atomic even if cpu > > > > > > > support it, most likely because of memory controller > > > > > > > cannot deal with the lse atomic instructions, use a > > > > > > > new idreg override to deal with it. > > > > > > > > > > > > This should not be a problem for cacheable memory though, right? > > > > > > > > > > > > Given that Linux does not issue atomic operations to non-cacheable mappings, > > > > > > I'm struggling to see why there's a problem here. > > > > > > > > > > The lse atomic operation can be issued on non-cacheable mappings as well. > > > > > Even if it is cached data, with different CPUECTLR_EL1 setting, it can also > > > > > do far lse atomic operations. > > > > > > > > Please can you point me to the place in the kernel sources where this > > > > happens? The architecture doesn't guarantee that atomics to non-cacheable > > > > mappings will work, see "B2.2.6 Possible implementation restrictions on > > > > using atomic instructions". Linux, therefore, doesn't issue atomics > > > > to non-cacheable memory. > > > > > > We encounter the issue on third party kernel modules and third party apps > > > instead of linux kernel itself. > > > > Great, so there's nothing to do in the kernel then! > > > > The third party code needs to be modified not to use atomic instructions > > with non-cacheable mappings. No need to involve us with that. > > > > This is a tradeoff of performance and stability. Per my understanding, > > > options can be used to enable the lse_atomic to have the most performance > > > cared system, and disable the lse_atomic by stability cared most system. > > > > Where do livelock and starvation fit in with "stability"? Disabling LSE > > atomics for things like qspinlock and the scheduler just because of some > > badly written third-party code isn't much of a tradeoff. > We also have requirement to have cpus/system fully support lse atomic and > cpus/system not fully support lse atomic with a generic kernel image. Who *specifically* has this requirement (i.e. what does 'we' mean here)? The upstream kernel does not require that atomics work on non-cacheable memory, and saying "The company I work for want this" doesn't change that. AFAICT the system here is architecturally compliant, and what you're relying upon something that the architecture doesn't guarantee, and Linux doesn't guarantee. > Same kernel module wanted to be used by lse atomic fully support cpu and not > fully support cpu/system as well. Which kernel modules *specifically* need to do atomics to non-cacheable memory? > That's why we want to have a runtime option here. As per other replies, a runtime option doesn't solve the issue you have described, and it will adversely affect the system in other ways (e.g. the livelock and starvation issues will mentioned, which we have seen with LDXR+STXR atomics). Thanks, Mark.
On 7/12/2023 3:29 PM, Marc Zyngier wrote: > On Wed, 12 Jul 2023 03:47:55 +0100, > "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >> >> On 7/11/2023 6:38 PM, Marc Zyngier wrote: >>> On Tue, 11 Jul 2023 11:12:48 +0100, >>> "Aiqun(Maria) Yu" <quic_aiquny@quicinc.com> wrote: >>>> >>>> For the KVM part, per my understanding, as long as the current feature >>>> id being overriden, the KVM system also get the current vcpu without >>>> the lse atomic feature enabled. >>>> KVM vcpu will read the sys reg from host arm64_ftr_regs which is >>>> already been controled by the idreg_overrides. >>> >>> You're completely missing the point. >>> >>> The guest is free to map memory as non-cacheable *and* to use LSE >>> atomics even if the idregs pretend this is not available. At which >> The guest also can have the current linux kernel mechanism of LSE >> ATOMIC way. > > [snip useless diagrams] > > Yes, the guest can do the right thing. The guest, a totally > unprivileged piece of SW, can also ignore the idregs and take the > whole machine down because your HW is broken. > if the guest ignore the idregs, it is not supported by the current Linux KVM id reg emulation as well. The similar rule is applied to other cpu feature as well. So it can be an expected machine down because of this. We want to support/utilize the current HW with current inline runtime patching for lse atomic ops. >> Just like other KVM vcpu cpu features, lse atomic can be a feature >> inherit from the pysical cpu features for the KVM vcpus. > > See above. Your reasoning applies to a well behaved guest, which is > the *wrong* way to reason about these things. The feature supported is not always that *freely* even for current cpu features as well. Our current target is that the software can utilize the HW as best as software can. The current HW can be possible with Generic common Image with other cpu which support lse atomic. So the Image can have inline runtime patching for lse atomic operations. And from software side it can have option to support this. For example, for current newer memory controller the far lse atomic operations is supported, and the atomic operation is not limited to non-cached memory mapping as well. Also the lse atomic instead of FWB performance in specific scenarios can be different with current hardware design as well. we are trying to do possible improvement with HW design change instead of ruin it. Feel free to comment if it is not same understanding. > > M. >
On 7/12/2023 3:36 PM, Mark Rutland wrote: > On Wed, Jul 12, 2023 at 11:09:10AM +0800, Aiqun(Maria) Yu wrote: >> On 7/11/2023 6:25 PM, Will Deacon wrote: >>> On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: >>>> On 7/11/2023 4:22 PM, Will Deacon wrote: >>>>> On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: >>>>>> On 7/10/2023 5:37 PM, Will Deacon wrote: >>>>>>> On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: >>>>>>>> In order to be able to disable lse_atomic even if cpu >>>>>>>> support it, most likely because of memory controller >>>>>>>> cannot deal with the lse atomic instructions, use a >>>>>>>> new idreg override to deal with it. >>>>>>> >>>>>>> This should not be a problem for cacheable memory though, right? >>>>>>> >>>>>>> Given that Linux does not issue atomic operations to non-cacheable mappings, >>>>>>> I'm struggling to see why there's a problem here. >>>>>> >>>>>> The lse atomic operation can be issued on non-cacheable mappings as well. >>>>>> Even if it is cached data, with different CPUECTLR_EL1 setting, it can also >>>>>> do far lse atomic operations. >>>>> >>>>> Please can you point me to the place in the kernel sources where this >>>>> happens? The architecture doesn't guarantee that atomics to non-cacheable >>>>> mappings will work, see "B2.2.6 Possible implementation restrictions on >>>>> using atomic instructions". Linux, therefore, doesn't issue atomics >>>>> to non-cacheable memory. >>>> >>>> We encounter the issue on third party kernel modules and third party apps >>>> instead of linux kernel itself. >>> >>> Great, so there's nothing to do in the kernel then! >>> >>> The third party code needs to be modified not to use atomic instructions >>> with non-cacheable mappings. No need to involve us with that. >> >>>> This is a tradeoff of performance and stability. Per my understanding, >>>> options can be used to enable the lse_atomic to have the most performance >>>> cared system, and disable the lse_atomic by stability cared most system. >>> >>> Where do livelock and starvation fit in with "stability"? Disabling LSE >>> atomics for things like qspinlock and the scheduler just because of some >>> badly written third-party code isn't much of a tradeoff. > >> We also have requirement to have cpus/system fully support lse atomic and >> cpus/system not fully support lse atomic with a generic kernel image. > > Who *specifically* has this requirement (i.e. what does 'we' mean here)? The I can use other word to describe the requirement instead of "we". There is requirements like android google gki. It request different cpu arch system to use same generic kernel Image. > upstream kernel does not require that atomics work on non-cacheable memory, and The same issue the system can be down of lse atomic not supported for cachable memory when there need far atomic. > saying "The company I work for want this" doesn't change that. > > AFAICT the system here is architecturally compliant, and what you're relying > upon something that the architecture doesn't guarantee, and Linux doesn't > guarantee. It is not also only our company's problem: To support the atomic instructions added in the Armv8.1 architecture, CHI-B provides Atomic Transactions. while Atomic Transactions support is also *optional* from CHI-B. So far atomic cannot fully supported by ARMv8.1 cpu + CHI-B system as well. from: https://developer.arm.com/documentation/102407/0100/Atomic-operations?lang=en So only cpu support atomic cannot garantee the system support lse atomic > >> Same kernel module wanted to be used by lse atomic fully support cpu and not >> fully support cpu/system as well. > > Which kernel modules *specifically* need to do atomics to non-cacheable memory? The driver want to always do far atomic(no speculatively) and allow a read-modify-write non-interruptible sequence in a single instruction. > >> That's why we want to have a runtime option here. > > As per other replies, a runtime option doesn't solve the issue you have > described, and it will adversely affect the system in other ways (e.g. the > livelock and starvation issues will mentioned, which we have seen with > LDXR+STXR atomics). I myself also have encounter issues from livelock because of LDXR+STXR atomics unfairness before. More likely happened when different performance cpu. So myself also glad to using atomics instead of exclusive access. So if there is a way to fully utilize the atomic instructions for current hardware, and also support the far atomic, that can be much better solution than currently disable the feature. > > Thanks, > Mark. Pls feel free to comments. It would lead to a reasonable and usable solution from our discussions.
On Thu, Jul 13, 2023 at 10:24:24AM +0800, Aiqun(Maria) Yu wrote: > On 7/12/2023 3:36 PM, Mark Rutland wrote: > > On Wed, Jul 12, 2023 at 11:09:10AM +0800, Aiqun(Maria) Yu wrote: > > > On 7/11/2023 6:25 PM, Will Deacon wrote: > > > > On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: > > > > > On 7/11/2023 4:22 PM, Will Deacon wrote: > > > > > > On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: > > > > > > > On 7/10/2023 5:37 PM, Will Deacon wrote: > > > > > > > > On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: > > > > > > > > > In order to be able to disable lse_atomic even if cpu > > > > > > > > > support it, most likely because of memory controller > > > > > > > > > cannot deal with the lse atomic instructions, use a > > > > > > > > > new idreg override to deal with it. > > > > > > > > > > > > > > > > This should not be a problem for cacheable memory though, right? > > > > > > > > > > > > > > > > Given that Linux does not issue atomic operations to non-cacheable mappings, > > > > > > > > I'm struggling to see why there's a problem here. > > > > > > > > > > > > > > The lse atomic operation can be issued on non-cacheable mappings as well. > > > > > > > Even if it is cached data, with different CPUECTLR_EL1 setting, it can also > > > > > > > do far lse atomic operations. > > > > > > > > > > > > Please can you point me to the place in the kernel sources where this > > > > > > happens? The architecture doesn't guarantee that atomics to non-cacheable > > > > > > mappings will work, see "B2.2.6 Possible implementation restrictions on > > > > > > using atomic instructions". Linux, therefore, doesn't issue atomics > > > > > > to non-cacheable memory. > > > > > > > > > > We encounter the issue on third party kernel modules and third party apps > > > > > instead of linux kernel itself. > > > > > > > > Great, so there's nothing to do in the kernel then! > > > > > > > > The third party code needs to be modified not to use atomic instructions > > > > with non-cacheable mappings. No need to involve us with that. > > > > > > > > This is a tradeoff of performance and stability. Per my understanding, > > > > > options can be used to enable the lse_atomic to have the most performance > > > > > cared system, and disable the lse_atomic by stability cared most system. > > > > > > > > Where do livelock and starvation fit in with "stability"? Disabling LSE > > > > atomics for things like qspinlock and the scheduler just because of some > > > > badly written third-party code isn't much of a tradeoff. > > > > > We also have requirement to have cpus/system fully support lse atomic and > > > cpus/system not fully support lse atomic with a generic kernel image. > > > > Who *specifically* has this requirement (i.e. what does 'we' mean here)? The > > I can use other word to describe the requirement instead of "we". > > There is requirements like android google gki. It request different cpu arch > system to use same generic kernel Image. GKI requires the system to use the generic kernel image; GKI does not require supporting atomics to non-cacheable mappings. What I am asking is: who has the requirement to perform atomics to non-cacheable mappings? > > upstream kernel does not require that atomics work on non-cacheable memory, and > > The same issue the system can be down of lse atomic not supported for > cachable memory when there need far atomic. Are you saying that LSE atomics to *cacheable* mappings do not work on your system? Specifically, when using a Normal Inner-Shareable Inner-Writeback Outer-Writeback mapping, do the LSE atomics work or not work? > > saying "The company I work for want this" doesn't change that. > > > > AFAICT the system here is architecturally compliant, and what you're relying > > upon something that the architecture doesn't guarantee, and Linux doesn't > > guarantee. > > It is not also only our company's problem: > To support the atomic instructions added in the Armv8.1 architecture, CHI-B > provides Atomic Transactions. while Atomic Transactions support is also > *optional* from CHI-B. > > So far atomic cannot fully supported by ARMv8.1 cpu + CHI-B system as well. > > from: > https://developer.arm.com/documentation/102407/0100/Atomic-operations?lang=en > > So only cpu support atomic cannot garantee the system support lse atomic > > > > > Same kernel module wanted to be used by lse atomic fully support cpu and not > > > fully support cpu/system as well. > > > > Which kernel modules *specifically* need to do atomics to non-cacheable memory? > The driver want to always do far atomic(no speculatively) and allow a > read-modify-write non-interruptible sequence in a single instruction. That doesn't answer my question (you haven't told me what "the driver" is). That doesn't explain why you need to use non-cachable memory for this. Thanks, Mark.
On 7/13/2023 7:20 PM, Mark Rutland wrote: > On Thu, Jul 13, 2023 at 10:24:24AM +0800, Aiqun(Maria) Yu wrote: >> On 7/12/2023 3:36 PM, Mark Rutland wrote: >>> On Wed, Jul 12, 2023 at 11:09:10AM +0800, Aiqun(Maria) Yu wrote: >>>> On 7/11/2023 6:25 PM, Will Deacon wrote: >>>>> On Tue, Jul 11, 2023 at 06:15:49PM +0800, Aiqun(Maria) Yu wrote: >>>>>> On 7/11/2023 4:22 PM, Will Deacon wrote: >>>>>>> On Tue, Jul 11, 2023 at 12:02:22PM +0800, Aiqun(Maria) Yu wrote: >>>>>>>> On 7/10/2023 5:37 PM, Will Deacon wrote: >>>>>>>>> On Mon, Jul 10, 2023 at 01:59:55PM +0800, Maria Yu wrote: >>>>>>>>>> In order to be able to disable lse_atomic even if cpu >>>>>>>>>> support it, most likely because of memory controller >>>>>>>>>> cannot deal with the lse atomic instructions, use a >>>>>>>>>> new idreg override to deal with it. >>>>>>>>> >>>>>>>>> This should not be a problem for cacheable memory though, right? >>>>>>>>> >>>>>>>>> Given that Linux does not issue atomic operations to non-cacheable mappings, >>>>>>>>> I'm struggling to see why there's a problem here. >>>>>>>> >>>>>>>> The lse atomic operation can be issued on non-cacheable mappings as well. >>>>>>>> Even if it is cached data, with different CPUECTLR_EL1 setting, it can also >>>>>>>> do far lse atomic operations. >>>>>>> >>>>>>> Please can you point me to the place in the kernel sources where this >>>>>>> happens? The architecture doesn't guarantee that atomics to non-cacheable >>>>>>> mappings will work, see "B2.2.6 Possible implementation restrictions on >>>>>>> using atomic instructions". Linux, therefore, doesn't issue atomics >>>>>>> to non-cacheable memory. >>>>>> >>>>>> We encounter the issue on third party kernel modules and third party apps >>>>>> instead of linux kernel itself. >>>>> >>>>> Great, so there's nothing to do in the kernel then! >>>>> >>>>> The third party code needs to be modified not to use atomic instructions >>>>> with non-cacheable mappings. No need to involve us with that. >>>> >>>>>> This is a tradeoff of performance and stability. Per my understanding, >>>>>> options can be used to enable the lse_atomic to have the most performance >>>>>> cared system, and disable the lse_atomic by stability cared most system. >>>>> >>>>> Where do livelock and starvation fit in with "stability"? Disabling LSE >>>>> atomics for things like qspinlock and the scheduler just because of some >>>>> badly written third-party code isn't much of a tradeoff. >>> >>>> We also have requirement to have cpus/system fully support lse atomic and >>>> cpus/system not fully support lse atomic with a generic kernel image. >>> >>> Who *specifically* has this requirement (i.e. what does 'we' mean here)? The >> >> I can use other word to describe the requirement instead of "we". >> >> There is requirements like android google gki. It request different cpu arch >> system to use same generic kernel Image. > > GKI requires the system to use the generic kernel image; GKI does not require > supporting atomics to non-cacheable mappings. GKI does not have to require atomics to non-cacheable mappings. GKI requires LSE ATOMIC feature to be enabled by default. And GKI requires runtime disable the current cpu lse atomic feature. It was an old soc, We received stability issues and finally completely disable lse atomic for the product when it is non-gki kernel. > > What I am asking is: who has the requirement to perform atomics to > non-cacheable mappings? > >>> upstream kernel does not require that atomics work on non-cacheable memory, and >> >> The same issue the system can be down of lse atomic not supported for >> cachable memory when there need far atomic. > > Are you saying that LSE atomics to *cacheable* mappings do not work on your > system? > > Specifically, when using a Normal Inner-Shareable Inner-Writeback > Outer-Writeback mapping, do the LSE atomics work or not work? *cacheable* mapping have the LSE atomic is not working if far atomic is performed. > >>> saying "The company I work for want this" doesn't change that. >>> >>> AFAICT the system here is architecturally compliant, and what you're relying >>> upon something that the architecture doesn't guarantee, and Linux doesn't >>> guarantee. >> >> It is not also only our company's problem: >> To support the atomic instructions added in the Armv8.1 architecture, CHI-B >> provides Atomic Transactions. while Atomic Transactions support is also >> *optional* from CHI-B. >> >> So far atomic cannot fully supported by ARMv8.1 cpu + CHI-B system as well. >> >> from: >> https://developer.arm.com/documentation/102407/0100/Atomic-operations?lang=en >> >> So only cpu support atomic cannot garantee the system support lse atomic >>> >>>> Same kernel module wanted to be used by lse atomic fully support cpu and not >>>> fully support cpu/system as well. >>> >>> Which kernel modules *specifically* need to do atomics to non-cacheable memory? >> The driver want to always do far atomic(no speculatively) and allow a >> read-modify-write non-interruptible sequence in a single instruction. > > That doesn't answer my question (you haven't told me what "the driver" is). The customers' third part drivers. Do you want to have the driver's name? Or source code? The driver works well on current far atomic supported systems. Is it a reasonable action like this from your point of view like this? The driver want to always do far atomic(no speculatively) and allow a read-modify-write non-interruptible sequence in a single instruction? There is also an example in below link that far atomic usage is allowed and sometimes performs good than near atomic: https://developer.arm.com/documentation/102407/0100/Atomic-operations?lang=en > > That doesn't explain why you need to use non-cachable memory for this. I want to correct that not "I need to", it is the end user/third party driver want to do far atomics. And my intention here is to give the options to let end user decide they can disable the lse atomic from their end. With disablement, the benefit is that they can keep the same code(kind of GKI from customer end) for far atomic supported/not supported systems. > > Thanks, > Mark.
On Thu, Jul 13, 2023 at 10:08:34PM +0800, Aiqun(Maria) Yu wrote: > On 7/13/2023 7:20 PM, Mark Rutland wrote: > > Are you saying that LSE atomics to *cacheable* mappings do not work on your > > system? > > > > Specifically, when using a Normal Inner-Shareable Inner-Writeback > > Outer-Writeback mapping, do the LSE atomics work or not work? > *cacheable* mapping have the LSE atomic is not working if far atomic is > performed. Thanks for confirming; the fact that this doesn't work on *cacheable* memory is definitely a major issue. I think everyone is confused here because of the earlier mention of non-cachable accesses (which don't matter). I know that some CPU implementations have EL3 control bits to force LSE atomics to be performed near (e.g. in Cortex-A55, the CPUECTLR.ATOM control bits), which would avoid the issue while still allowing the LSE atomics to be used. If those can be configured in EL3 firmware, that would be a preferable workaround. Can you say which CPUs are integrated in this system? and/or can you check if such control bits exist? Thanks, Mark.
On 7/14/2023 3:08 AM, Mark Rutland wrote: > On Thu, Jul 13, 2023 at 10:08:34PM +0800, Aiqun(Maria) Yu wrote: >> On 7/13/2023 7:20 PM, Mark Rutland wrote: >>> Are you saying that LSE atomics to *cacheable* mappings do not work on your >>> system? >>> >>> Specifically, when using a Normal Inner-Shareable Inner-Writeback >>> Outer-Writeback mapping, do the LSE atomics work or not work? >> *cacheable* mapping have the LSE atomic is not working if far atomic is >> performed. > > Thanks for confirming; the fact that this doesn't work on *cacheable* memory is > definitely a major issue. I think everyone is confused here because of the > earlier mention of non-cachable accesses (which don't matter). > Maybe I can have the information collected in a summary to see if that helps. > I know that some CPU implementations have EL3 control bits to force LSE atomics > to be performed near (e.g. in Cortex-A55, the CPUECTLR.ATOM control bits), > which would avoid the issue while still allowing the LSE atomics to be used. > > If those can be configured in EL3 firmware, that would be a preferable > workaround. > > Can you say which CPUs are integrated in this system? and/or can you check if > such control bits exist? We have CPUECTLR_EL1.ATOM bit can force LSE atomics to be perform near. CPUECTLR_EL1 is also an option to EL1 kernel drivers to be configuarable. Try to a detailed summarise of the whole discussions, anyone can ignore some part if you are already know. * Part 1: Solution for this issue. While we still want to have options to let third party and end users can have options: 1.Disable lse atomic cap. 2.*Disallow* far atomic by "CPUECTLR_EL1.atom force near atomic" and non-cachable mappling for lse atomic only. * Part 2: Why we need the solution 1. There is also some case far atomic is better performance than near atomic. end user may still can still try to do allow far atomic. while this driver is also use kerenl LSE ATOMIC macro, so it can be running on cpu don't support lse atomic and cpu support lse atomic already. while current system, cpu have feature register said lse atomic is supported, but memory controller is not supported is currently not yet supported. 2. cpu feature of lse atomic capbility can be controled via options for the same image. Can have GKI(generic kernel Image) + same third party drivers Images support multi systems. -- *New system* fully support lse atomic -- *Intermidiate support system* which only have cpu support lse atomic, but have memory control/bus don't support lse atomic.* (mainly issue are discussed in this thread.) -- *old system* have cpu don't have this cpu feature at all. 3. better for debugging purpose, it would be easier for verify if it is this feature related or not. 4. *Disallow* from the developer side is not easy to control, expecially when they have the same code working on *old system* or *new system*, but failed on current *Intermidiate support system*. > > Thanks, > Mark. > Thx for discussion in details.
On Fri, Jul 14, 2023 at 09:56:27AM +0800, Aiqun(Maria) Yu wrote: > On 7/14/2023 3:08 AM, Mark Rutland wrote: > > On Thu, Jul 13, 2023 at 10:08:34PM +0800, Aiqun(Maria) Yu wrote: > > > On 7/13/2023 7:20 PM, Mark Rutland wrote: > > > > Are you saying that LSE atomics to *cacheable* mappings do not work on your > > > > system? > > > > > > > > Specifically, when using a Normal Inner-Shareable Inner-Writeback > > > > Outer-Writeback mapping, do the LSE atomics work or not work? > > > *cacheable* mapping have the LSE atomic is not working if far atomic is > > > performed. > > > > Thanks for confirming; the fact that this doesn't work on *cacheable* memory is > > definitely a major issue. I think everyone is confused here because of the > > earlier mention of non-cachable accesses (which don't matter). > > > Maybe I can have the information collected in a summary to see if that > helps. > > I know that some CPU implementations have EL3 control bits to force LSE atomics > > to be performed near (e.g. in Cortex-A55, the CPUECTLR.ATOM control bits), > > which would avoid the issue while still allowing the LSE atomics to be used. > > > > If those can be configured in EL3 firmware, that would be a preferable > > workaround. > > > > Can you say which CPUs are integrated in this system? and/or can you check if > > such control bits exist? > > We have CPUECTLR_EL1.ATOM bit can force LSE atomics to be perform near. > CPUECTLR_EL1 is also an option to EL1 kernel drivers to be configuarable. > > Try to a detailed summarise of the whole discussions, anyone can ignore some > part if you are already know. > > * Part 1: Solution for this issue. > While we still want to have options to let third party and end users can > have options: > 1.Disable lse atomic cap. > 2.*Disallow* far atomic by "CPUECTLR_EL1.atom force near atomic" and > non-cachable mappling for lse atomic only. Sorry, but this still isn't making sense to me. Which CPUs do you have on this SoC? My understanding of the CPUs from ARM is that LSE atomics are not supposed to be sent to blocks that don't support them. That doesn't mean you have to do everything near, however -- you can still execute them at e.g. L2. For example, the Cortex-X1 TRM states: | Atomic instructions to cacheable memory can be performed as either | near atomics or far atomics, depending on where the cache line | containing the data resides. | | When an instruction hits in the L1 data cache in a unique state, then | it is performed as a near atomic in the L1 memory system. If the atomic | operation misses in the L1 cache, or the line is shared with another | core, then the atomic is sent as a far atomic on the core CHI interface. | | If the operation misses everywhere within the cluster, and the | interconnect supports far atomics, then the atomic is passed on to the | interconnect to perform the operation. | | When the operation hits anywhere inside the cluster, or when an | interconnect does not support atomics, the L3 memory system performs | the atomic operation. If the line is not already there, it allocates | the line into the L3 cache. This depends on whether the DSU is configured | with an L3 cache. So something doesn't add up. > * Part 2: Why we need the solution > 1. There is also some case far atomic is better performance than near > atomic. end user may still can still try to do allow far atomic. > while this driver is also use kerenl LSE ATOMIC macro, so it can be running > on cpu don't support lse atomic and cpu support lse atomic already. > while current system, cpu have feature register said lse atomic is > supported, but memory controller is not supported is currently not yet > supported. I think you're forgetting the fact that these instructions can be executed by userspace, so the kernel option is completely bogus. If you're saying that cacheable atomics can cause external aborts, then I can write an app which will crash your device even if you've set this command line option. Will
On 7/14/2023 4:23 PM, Will Deacon wrote: > On Fri, Jul 14, 2023 at 09:56:27AM +0800, Aiqun(Maria) Yu wrote: >> On 7/14/2023 3:08 AM, Mark Rutland wrote: >>> On Thu, Jul 13, 2023 at 10:08:34PM +0800, Aiqun(Maria) Yu wrote: >>>> On 7/13/2023 7:20 PM, Mark Rutland wrote: >>>>> Are you saying that LSE atomics to *cacheable* mappings do not work on your >>>>> system? >>>>> >>>>> Specifically, when using a Normal Inner-Shareable Inner-Writeback >>>>> Outer-Writeback mapping, do the LSE atomics work or not work? >>>> *cacheable* mapping have the LSE atomic is not working if far atomic is >>>> performed. >>> >>> Thanks for confirming; the fact that this doesn't work on *cacheable* memory is >>> definitely a major issue. I think everyone is confused here because of the >>> earlier mention of non-cachable accesses (which don't matter). >>> >> Maybe I can have the information collected in a summary to see if that >> helps. >>> I know that some CPU implementations have EL3 control bits to force LSE atomics >>> to be performed near (e.g. in Cortex-A55, the CPUECTLR.ATOM control bits), >>> which would avoid the issue while still allowing the LSE atomics to be used. >>> >>> If those can be configured in EL3 firmware, that would be a preferable >>> workaround. >>> >>> Can you say which CPUs are integrated in this system? and/or can you check if >>> such control bits exist? >> >> We have CPUECTLR_EL1.ATOM bit can force LSE atomics to be perform near. >> CPUECTLR_EL1 is also an option to EL1 kernel drivers to be configuarable. >> >> Try to a detailed summarise of the whole discussions, anyone can ignore some >> part if you are already know. >> >> * Part 1: Solution for this issue. >> While we still want to have options to let third party and end users can >> have options: >> 1.Disable lse atomic cap. >> 2.*Disallow* far atomic by "CPUECTLR_EL1.atom force near atomic" and >> non-cachable mappling for lse atomic only. > > Sorry, but this still isn't making sense to me. Which CPUs do you have on > this SoC? cpu is cortex A78/A55. > > My understanding of the CPUs from ARM is that LSE atomics are not supposed > to be sent to blocks that don't support them. That doesn't mean you have to > do everything near, however -- you can still execute them at e.g. L2. > > For example, the Cortex-X1 TRM states: > > | Atomic instructions to cacheable memory can be performed as either > | near atomics or far atomics, depending on where the cache line > | containing the data resides. > | > | When an instruction hits in the L1 data cache in a unique state, then > | it is performed as a near atomic in the L1 memory system. If the atomic > | operation misses in the L1 cache, or the line is shared with another > | core, then the atomic is sent as a far atomic on the core CHI interface. lse atomic is optional to CHI-B for example, some system may have cpu feature register have lse atomic feature, but the far atomic is not accpeted by CHI side. It will be simiar issue that we do. > | > | If the operation misses everywhere within the cluster, and the > | interconnect supports far atomics, then the atomic is passed on to the > | interconnect to perform the operation. > | > | When the operation hits anywhere inside the cluster, or when an > | interconnect does not support atomics, the L3 memory system performs > | the atomic operation. If the line is not already there, it allocates > | the line into the L3 cache. This depends on whether the DSU is configured > | with an L3 cache. > > So something doesn't add up. > >> * Part 2: Why we need the solution >> 1. There is also some case far atomic is better performance than near >> atomic. end user may still can still try to do allow far atomic. >> while this driver is also use kerenl LSE ATOMIC macro, so it can be running >> on cpu don't support lse atomic and cpu support lse atomic already. >> while current system, cpu have feature register said lse atomic is >> supported, but memory controller is not supported is currently not yet >> supported. > > I think you're forgetting the fact that these instructions can be executed > by userspace, so the kernel option is completely bogus. If you're saying > that cacheable atomics can cause external aborts, then I can write an app > which will crash your device even if you've set this command line option. > For apps like userspace also needed to check the system capbility as far as I know. > Will
On Fri, Jul 14, 2023 at 06:12:02PM +0800, Aiqun(Maria) Yu wrote: > On 7/14/2023 4:23 PM, Will Deacon wrote: > > On Fri, Jul 14, 2023 at 09:56:27AM +0800, Aiqun(Maria) Yu wrote: > > > Try to a detailed summarise of the whole discussions, anyone can ignore some > > > part if you are already know. > > > > > > * Part 1: Solution for this issue. > > > While we still want to have options to let third party and end users can > > > have options: > > > 1.Disable lse atomic cap. > > > 2.*Disallow* far atomic by "CPUECTLR_EL1.atom force near atomic" and > > > non-cachable mappling for lse atomic only. > > > > Sorry, but this still isn't making sense to me. Which CPUs do you have on > > this SoC? > cpu is cortex A78/A55. > > > > My understanding of the CPUs from ARM is that LSE atomics are not supposed > > to be sent to blocks that don't support them. That doesn't mean you have to > > do everything near, however -- you can still execute them at e.g. L2. > > > > For example, the Cortex-X1 TRM states: > > > > | Atomic instructions to cacheable memory can be performed as either > > | near atomics or far atomics, depending on where the cache line > > | containing the data resides. > > | > > | When an instruction hits in the L1 data cache in a unique state, then > > | it is performed as a near atomic in the L1 memory system. If the atomic > > | operation misses in the L1 cache, or the line is shared with another > > | core, then the atomic is sent as a far atomic on the core CHI interface. > lse atomic is optional to CHI-B for example, some system may have cpu > feature register have lse atomic feature, but the far atomic is not accpeted > by CHI side. It will be simiar issue that we do. Again, that should not be a problem. Looking at the A55 TRM, it explicitly says that atomics will be done in the L3 if the interconnect does not support them. The A78 TRM doesn't talk about this at all, so I defer to Mark (or anybody else from Arm) on how that works, but one might assume that it does something similar to the other Arm cores. > > > * Part 2: Why we need the solution > > > 1. There is also some case far atomic is better performance than near > > > atomic. end user may still can still try to do allow far atomic. > > > while this driver is also use kerenl LSE ATOMIC macro, so it can be running > > > on cpu don't support lse atomic and cpu support lse atomic already. > > > while current system, cpu have feature register said lse atomic is > > > supported, but memory controller is not supported is currently not yet > > > supported. > > > > I think you're forgetting the fact that these instructions can be executed > > by userspace, so the kernel option is completely bogus. If you're saying > > that cacheable atomics can cause external aborts, then I can write an app > > which will crash your device even if you've set this command line option. > > > For apps like userspace also needed to check the system capbility as far as That's not something you can enforce, so a malicious application can easily crash your system. Will
On 7/14/2023 8:09 PM, Will Deacon wrote: > On Fri, Jul 14, 2023 at 06:12:02PM +0800, Aiqun(Maria) Yu wrote: >> On 7/14/2023 4:23 PM, Will Deacon wrote: >>> On Fri, Jul 14, 2023 at 09:56:27AM +0800, Aiqun(Maria) Yu wrote: >>>> Try to a detailed summarise of the whole discussions, anyone can ignore some >>>> part if you are already know. >>>> >>>> * Part 1: Solution for this issue. >>>> While we still want to have options to let third party and end users can >>>> have options: >>>> 1.Disable lse atomic cap. >>>> 2.*Disallow* far atomic by "CPUECTLR_EL1.atom force near atomic" and >>>> non-cachable mappling for lse atomic only. >>> >>> Sorry, but this still isn't making sense to me. Which CPUs do you have on >>> this SoC? >> cpu is cortex A78/A55. >>> >>> My understanding of the CPUs from ARM is that LSE atomics are not supposed >>> to be sent to blocks that don't support them. That doesn't mean you have to >>> do everything near, however -- you can still execute them at e.g. L2. >>> >>> For example, the Cortex-X1 TRM states: >>> >>> | Atomic instructions to cacheable memory can be performed as either >>> | near atomics or far atomics, depending on where the cache line >>> | containing the data resides. >>> | >>> | When an instruction hits in the L1 data cache in a unique state, then >>> | it is performed as a near atomic in the L1 memory system. If the atomic >>> | operation misses in the L1 cache, or the line is shared with another >>> | core, then the atomic is sent as a far atomic on the core CHI interface. >> lse atomic is optional to CHI-B for example, some system may have cpu >> feature register have lse atomic feature, but the far atomic is not accpeted >> by CHI side. It will be simiar issue that we do. > > Again, that should not be a problem. Looking at the A55 TRM, it explicitly > says that atomics will be done in the L3 if the interconnect does not > support them. The A78 TRM doesn't talk about this at all, so I defer to We will check internally to see why it is not happened in current system which have issue. > Mark (or anybody else from Arm) on how that works, but one might assume > that it does something similar to the other Arm cores. I checked other Arm cores like A720 TRM. It seems the similar statement: If the operation hits anywhere inside the cluster, or if an interconnect does not support atomics, then the L3 memory system performs the atomic operation. If the line is not already there, it allocates the line into the L3 cache. > >>>> * Part 2: Why we need the solution >>>> 1. There is also some case far atomic is better performance than near >>>> atomic. end user may still can still try to do allow far atomic. >>>> while this driver is also use kerenl LSE ATOMIC macro, so it can be running >>>> on cpu don't support lse atomic and cpu support lse atomic already. >>>> while current system, cpu have feature register said lse atomic is >>>> supported, but memory controller is not supported is currently not yet >>>> supported. >>> >>> I think you're forgetting the fact that these instructions can be executed >>> by userspace, so the kernel option is completely bogus. If you're saying >>> that cacheable atomics can cause external aborts, then I can write an app >>> which will crash your device even if you've set this command line option. >>> >> For apps like userspace also needed to check the system capbility as far as > > That's not something you can enforce, so a malicious application can easily > crash your system. we provide the capability for sotfware to do the runtime compatible check. we surely cannot enforce that. If the software is align the rule, it can also funcational fine on the "intermediate support system". By the way: ALso the A720 TRM doc mentioned the uncachable memory case: The Cortex-A720 core supports atomics to Device or Non-cacheable memory, however this relies on the interconnect also supporting atomics. If such an atomic instruction is executed when the interconnect does not support them, then it results in an abort. So it is not a single one company's scenario as well, shall we have an option to handle this scenario? Why the non-cacheable memory is forbiden? Does it only due to limitation of hardware? > > Will
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 85fb0fa5d091..6ad754549f1d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -433,6 +433,8 @@ arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory Set instructions support + arm64.nolse_atomic [ARM64] Unconditionally disable LSE Atomic support + ataflop= [HW,M68k] atarimouse= [HW,MOUSE] Atari Mouse diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96e50227f940..9d56dea1fe62 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -916,6 +916,7 @@ extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64zfr0_override; extern struct arm64_ftr_override id_aa64smfr0_override; +extern struct arm64_ftr_override id_aa64isar0_override; extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar2_override; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f9d456fe132d..9bd766880807 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -669,6 +669,7 @@ struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; +struct arm64_ftr_override __ro_after_init id_aa64isar0_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; struct arm64_ftr_override __ro_after_init id_aa64isar2_override; @@ -721,7 +722,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 6 */ - ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0, + &id_aa64isar0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 2fe2491b692c..af41ab4f3d94 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -105,6 +105,15 @@ static const struct ftr_set_desc pfr1 __initconst = { }, }; +static const struct ftr_set_desc isar0 __initconst = { + .name = "id_aa64isar0", + .override = &id_aa64isar0_override, + .fields = { + FIELD("atomic", ID_AA64ISAR0_EL1_ATOMIC_SHIFT, NULL), + {} + }, +}; + static const struct ftr_set_desc isar1 __initconst = { .name = "id_aa64isar1", .override = &id_aa64isar1_override, @@ -163,6 +172,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr0, &pfr1, + &isar0, &isar1, &isar2, &smfr0, @@ -185,6 +195,7 @@ static const struct { { "arm64.nomops", "id_aa64isar2.mops=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "arm64_sw.nokaslr=1" }, + { "arm64.nolse_atomic", "id_aa64isar0.atomic=0" }, }; static int __init parse_nokaslr(char *unused)
In order to be able to disable lse_atomic even if cpu support it, most likely because of memory controller cannot deal with the lse atomic instructions, use a new idreg override to deal with it. Signed-off-by: Maria Yu <quic_aiquny@quicinc.com> --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 4 +++- arch/arm64/kernel/idreg-override.c | 11 +++++++++++ 4 files changed, 17 insertions(+), 1 deletion(-)