Message ID | 20221128155649.31386-11-ayan.kumar.halder@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Arm: Enable GICv3 for AArch32 | expand |
Hi, On 28/11/2022 15:56, Ayan Kumar Halder wrote: > On AArch32, ldrd/strd instructions are not atomic when used to access MMIO. > Furthermore, ldrd/strd instructions are not decoded by Arm when running as > a guest to access emulated MMIO region. > Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() > which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper > 32 bits. > For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes > readq_relaxed()/writeq_relaxed() respectively. > As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic > manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic(). I had another look at the code and I think there needs some clarification necessary because the accesses to IROUTER is non-obvious. > > Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com> > --- > > Changes from :- > v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed(). > 2. No need to use le64_to_cpu() as the returned byte order is already in cpu > endianess. > > v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic(). > > v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic(). > 2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed(). > Thus, we can avoid any ifdef in gic-v3.c. > > xen/arch/arm/gic-v3.c | 6 +++--- > xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++ > xen/arch/arm/include/asm/arm64/io.h | 2 ++ > 3 files changed, 25 insertions(+), 3 deletions(-) > > diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c > index 6457e7033c..3c5b88148c 100644 > --- a/xen/arch/arm/gic-v3.c > +++ b/xen/arch/arm/gic-v3.c > @@ -651,7 +651,7 @@ static void __init gicv3_dist_init(void) > affinity &= ~GICD_IROUTER_SPI_MODE_ANY; > > for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ ) > - writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8); > + writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8); Using non atomic here makes sense because AFAIU the interrupt will be disabled. Therefore the GIC should not use the register. > } > > static int gicv3_enable_redist(void) > @@ -745,7 +745,7 @@ static int __init gicv3_populate_rdist(void) > } > > do { > - typer = readq_relaxed(ptr + GICR_TYPER); > + typer = readq_relaxed_non_atomic(ptr + GICR_TYPER); This non-atomic read is OK because GICR_TYPER is read-only. > > if ( (typer >> 32) == aff ) > { > @@ -1265,7 +1265,7 @@ static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask) > affinity &= ~GICD_IROUTER_SPI_MODE_ANY; > > if ( desc->irq >= NR_GIC_LOCAL_IRQS ) > - writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8)); > + writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8)); This can be called with interrupt enabled. So a non-atomic access means the GIC will see a transient value when only one of two 32-bit will be updated. In practice this is fine because only Aff3 is so far defined in the top 32-bits. So effectively, they will be RESS0 and never change. Cheers,
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c index 6457e7033c..3c5b88148c 100644 --- a/xen/arch/arm/gic-v3.c +++ b/xen/arch/arm/gic-v3.c @@ -651,7 +651,7 @@ static void __init gicv3_dist_init(void) affinity &= ~GICD_IROUTER_SPI_MODE_ANY; for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ ) - writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8); + writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8); } static int gicv3_enable_redist(void) @@ -745,7 +745,7 @@ static int __init gicv3_populate_rdist(void) } do { - typer = readq_relaxed(ptr + GICR_TYPER); + typer = readq_relaxed_non_atomic(ptr + GICR_TYPER); if ( (typer >> 32) == aff ) { @@ -1265,7 +1265,7 @@ static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask) affinity &= ~GICD_IROUTER_SPI_MODE_ANY; if ( desc->irq >= NR_GIC_LOCAL_IRQS ) - writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8)); + writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8)); spin_unlock(&gicv3.lock); } diff --git a/xen/arch/arm/include/asm/arm32/io.h b/xen/arch/arm/include/asm/arm32/io.h index 73a879e9fb..782b564809 100644 --- a/xen/arch/arm/include/asm/arm32/io.h +++ b/xen/arch/arm/include/asm/arm32/io.h @@ -80,10 +80,30 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) __raw_readw(c)); __r; }) #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \ __raw_readl(c)); __r; }) +/* + * ldrd instructions are not decoded by Arm when running as a guest to access + * emulated MMIO region. Thus, readq_relaxed_non_atomic() invokes readl_relaxed() + * twice to read the lower and upper 32 bits. + */ +static inline u64 readq_relaxed_non_atomic(const volatile void __iomem *addr) +{ + u64 val = (((u64)readl_relaxed(addr + 4)) << 32) | readl_relaxed(addr); + return val; +} #define writeb_relaxed(v,c) __raw_writeb(v,c) #define writew_relaxed(v,c) __raw_writew((__force u16) cpu_to_le16(v),c) #define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c) +/* + * strd instructions are not decoded by Arm when running as a guest to access + * emulated MMIO region. Thus, writeq_relaxed_non_atomic() invokes writel_relaxed() + * twice to write the lower and upper 32 bits. + */ +static inline void writeq_relaxed_non_atomic(u64 val, volatile void __iomem *addr) +{ + writel_relaxed((u32)val, addr); + writel_relaxed((u32)(val >> 32), addr + 4); +} #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h index 30bfc78d9e..2e2ab24f78 100644 --- a/xen/arch/arm/include/asm/arm64/io.h +++ b/xen/arch/arm/include/asm/arm64/io.h @@ -102,11 +102,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; }) #define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; }) #define readq_relaxed(c) ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; }) +#define readq_relaxed_non_atomic(c) readq_relaxed((c)) #define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c))) #define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c))) #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) +#define writeq_relaxed_non_atomic(v,c) writeq_relaxed((v),(c)) /* * I/O memory access primitives. Reads are ordered relative to any
On AArch32, ldrd/strd instructions are not atomic when used to access MMIO. Furthermore, ldrd/strd instructions are not decoded by Arm when running as a guest to access emulated MMIO region. Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper 32 bits. For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes readq_relaxed()/writeq_relaxed() respectively. As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic(). Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com> --- Changes from :- v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed(). 2. No need to use le64_to_cpu() as the returned byte order is already in cpu endianess. v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic(). v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic(). 2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed(). Thus, we can avoid any ifdef in gic-v3.c. xen/arch/arm/gic-v3.c | 6 +++--- xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++ xen/arch/arm/include/asm/arm64/io.h | 2 ++ 3 files changed, 25 insertions(+), 3 deletions(-)