diff mbox series

[XEN,v4,10/11] xen/Arm: GICv3: Define macros to read/write 64 bit

Message ID 20221128155649.31386-11-ayan.kumar.halder@amd.com (mailing list archive)
State Superseded
Headers show
Series Arm: Enable GICv3 for AArch32 | expand

Commit Message

Ayan Kumar Halder Nov. 28, 2022, 3:56 p.m. UTC
On AArch32, ldrd/strd instructions are not atomic when used to access MMIO.
Furthermore, ldrd/strd instructions are not decoded by Arm when running as
a guest to access emulated MMIO region.
Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic()
which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper
32 bits.
For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes
readq_relaxed()/writeq_relaxed() respectively.
As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic
manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic().

Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
---

Changes from :-
v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed().
2. No need to use le64_to_cpu() as the returned byte order is already in cpu
endianess.

v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic().

v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic().
2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed().
Thus, we can avoid any ifdef in gic-v3.c.

 xen/arch/arm/gic-v3.c               |  6 +++---
 xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++
 xen/arch/arm/include/asm/arm64/io.h |  2 ++
 3 files changed, 25 insertions(+), 3 deletions(-)

Comments

Julien Grall Dec. 3, 2022, 8:37 p.m. UTC | #1
Hi,

On 28/11/2022 15:56, Ayan Kumar Halder wrote:
> On AArch32, ldrd/strd instructions are not atomic when used to access MMIO.
> Furthermore, ldrd/strd instructions are not decoded by Arm when running as
> a guest to access emulated MMIO region.
> Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic()
> which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper
> 32 bits.
> For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes
> readq_relaxed()/writeq_relaxed() respectively.
> As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic
> manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic().

I had another look at the code and I think there needs some 
clarification necessary because the accesses to IROUTER is non-obvious.

> 
> Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
> ---
> 
> Changes from :-
> v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed().
> 2. No need to use le64_to_cpu() as the returned byte order is already in cpu
> endianess.
> 
> v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic().
> 
> v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic().
> 2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed().
> Thus, we can avoid any ifdef in gic-v3.c.
> 
>   xen/arch/arm/gic-v3.c               |  6 +++---
>   xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++
>   xen/arch/arm/include/asm/arm64/io.h |  2 ++
>   3 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index 6457e7033c..3c5b88148c 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -651,7 +651,7 @@ static void __init gicv3_dist_init(void)
>       affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
>   
>       for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ )
> -        writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8);
> +        writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8);

Using non atomic here makes sense because AFAIU the interrupt will be 
disabled. Therefore the GIC should not use the register.

>   }
>   
>   static int gicv3_enable_redist(void)
> @@ -745,7 +745,7 @@ static int __init gicv3_populate_rdist(void)
>           }
>   
>           do {
> -            typer = readq_relaxed(ptr + GICR_TYPER);
> +            typer = readq_relaxed_non_atomic(ptr + GICR_TYPER);

This non-atomic read is OK because GICR_TYPER is read-only.

>   
>               if ( (typer >> 32) == aff )
>               {
> @@ -1265,7 +1265,7 @@ static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
>       affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
>   
>       if ( desc->irq >= NR_GIC_LOCAL_IRQS )
> -        writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8));
> +        writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8));

This can be called with interrupt enabled. So a non-atomic access means 
the GIC will see a transient value when only one of two 32-bit will be 
updated.

In practice this is fine because only Aff3 is so far defined in the top 
32-bits. So effectively, they will be RESS0 and never change.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 6457e7033c..3c5b88148c 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -651,7 +651,7 @@  static void __init gicv3_dist_init(void)
     affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
 
     for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ )
-        writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8);
+        writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8);
 }
 
 static int gicv3_enable_redist(void)
@@ -745,7 +745,7 @@  static int __init gicv3_populate_rdist(void)
         }
 
         do {
-            typer = readq_relaxed(ptr + GICR_TYPER);
+            typer = readq_relaxed_non_atomic(ptr + GICR_TYPER);
 
             if ( (typer >> 32) == aff )
             {
@@ -1265,7 +1265,7 @@  static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
     affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
 
     if ( desc->irq >= NR_GIC_LOCAL_IRQS )
-        writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8));
+        writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8));
 
     spin_unlock(&gicv3.lock);
 }
diff --git a/xen/arch/arm/include/asm/arm32/io.h b/xen/arch/arm/include/asm/arm32/io.h
index 73a879e9fb..782b564809 100644
--- a/xen/arch/arm/include/asm/arm32/io.h
+++ b/xen/arch/arm/include/asm/arm32/io.h
@@ -80,10 +80,30 @@  static inline u32 __raw_readl(const volatile void __iomem *addr)
                                         __raw_readw(c)); __r; })
 #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \
                                         __raw_readl(c)); __r; })
+/*
+ * ldrd instructions are not decoded by Arm when running as a guest to access
+ * emulated MMIO region. Thus, readq_relaxed_non_atomic() invokes readl_relaxed()
+ * twice to read the lower and upper 32 bits.
+ */
+static inline u64 readq_relaxed_non_atomic(const volatile void __iomem *addr)
+{
+        u64 val = (((u64)readl_relaxed(addr + 4)) << 32) | readl_relaxed(addr);
+        return val;
+}
 
 #define writeb_relaxed(v,c)     __raw_writeb(v,c)
 #define writew_relaxed(v,c)     __raw_writew((__force u16) cpu_to_le16(v),c)
 #define writel_relaxed(v,c)     __raw_writel((__force u32) cpu_to_le32(v),c)
+/*
+ * strd instructions are not decoded by Arm when running as a guest to access
+ * emulated MMIO region. Thus, writeq_relaxed_non_atomic() invokes writel_relaxed()
+ * twice to write the lower and upper 32 bits.
+ */
+static inline void writeq_relaxed_non_atomic(u64 val, volatile void __iomem *addr)
+{
+        writel_relaxed((u32)val, addr);
+        writel_relaxed((u32)(val >> 32), addr + 4);
+}
 
 #define readb(c)                ({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)                ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h
index 30bfc78d9e..2e2ab24f78 100644
--- a/xen/arch/arm/include/asm/arm64/io.h
+++ b/xen/arch/arm/include/asm/arm64/io.h
@@ -102,11 +102,13 @@  static inline u64 __raw_readq(const volatile void __iomem *addr)
 #define readw_relaxed(c)        ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; })
 #define readl_relaxed(c)        ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; })
 #define readq_relaxed(c)        ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; })
+#define readq_relaxed_non_atomic(c)           readq_relaxed((c))
 
 #define writeb_relaxed(v,c)     ((void)__raw_writeb((v),(c)))
 #define writew_relaxed(v,c)     ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
 #define writel_relaxed(v,c)     ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
 #define writeq_relaxed(v,c)     ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define writeq_relaxed_non_atomic(v,c)        writeq_relaxed((v),(c))
 
 /*
  * I/O memory access primitives. Reads are ordered relative to any