diff mbox series

iommu: riscv: Split 8-byte accesses on 32 bit I/O bus platform

Message ID 20250325144252.27403-1-luxu.kernel@bytedance.com (mailing list archive)
State New
Headers show
Series iommu: riscv: Split 8-byte accesses on 32 bit I/O bus platform | expand

Checks

Context Check Description
bjorn/pre-ci_am success Success
bjorn/build-rv32-defconfig success build-rv32-defconfig
bjorn/build-rv64-clang-allmodconfig success build-rv64-clang-allmodconfig
bjorn/build-rv64-gcc-allmodconfig success build-rv64-gcc-allmodconfig
bjorn/build-rv64-nommu-k210-defconfig success build-rv64-nommu-k210-defconfig
bjorn/build-rv64-nommu-k210-virt success build-rv64-nommu-k210-virt
bjorn/checkpatch warning checkpatch
bjorn/dtb-warn-rv64 success dtb-warn-rv64
bjorn/header-inline success header-inline
bjorn/kdoc success kdoc
bjorn/module-param success module-param
bjorn/verify-fixes success verify-fixes
bjorn/verify-signedoff success verify-signedoff

Commit Message

Xu Lu March 25, 2025, 2:42 p.m. UTC
Introduce a new configuration CONFIG_RISCV_IOMMU_32BIT to enable
splitting 8-byte access into 4-byte transactions for hardware platform
whose I/O bus limits access to 4-byte transfers.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 drivers/iommu/riscv/Kconfig |  9 +++++++++
 drivers/iommu/riscv/iommu.h | 28 +++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 5 deletions(-)

Comments

Jessica Clarke March 25, 2025, 6:50 p.m. UTC | #1
On 25 Mar 2025, at 14:42, Xu Lu <luxu.kernel@bytedance.com> wrote:
> 
> Introduce a new configuration CONFIG_RISCV_IOMMU_32BIT to enable
> splitting 8-byte access into 4-byte transactions for hardware platform
> whose I/O bus limits access to 4-byte transfers.
> 
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>

Is such a platform conformant to the specification? Either way, why is
this a static build-time configuration choice rather than a dynamic
run-time choice based on the FDT / ACPI tables / some other platform
probing method?

Jess

> ---
> drivers/iommu/riscv/Kconfig |  9 +++++++++
> drivers/iommu/riscv/iommu.h | 28 +++++++++++++++++++++++-----
> 2 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
> index c071816f59a6..b7c9ea22d969 100644
> --- a/drivers/iommu/riscv/Kconfig
> +++ b/drivers/iommu/riscv/Kconfig
> @@ -18,3 +18,12 @@ config RISCV_IOMMU_PCI
> def_bool y if RISCV_IOMMU && PCI_MSI
> help
>  Support for the PCIe implementation of RISC-V IOMMU architecture.
> +
> +config RISCV_IOMMU_32BIT
> + bool "Support 4-Byte Accesses on RISC-V IOMMU Registers"
> + depends on RISCV_IOMMU
> + default n
> + help
> +  Support hardware platform whose I/O bus limits access to 4-byte
> +  transfers. When enabled, all accesses to IOMMU registers will be
> +  split into 4-byte accesses.
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index 46df79dd5495..0e3552a8142d 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -14,6 +14,10 @@
> #include <linux/iommu.h>
> #include <linux/types.h>
> #include <linux/iopoll.h>
> +#ifdef CONFIG_RISCV_IOMMU_32BIT
> +#include <linux/io-64-nonatomic-hi-lo.h>
> +#include <linux/io-64-nonatomic-lo-hi.h>
> +#endif
> 
> #include "iommu-bits.h"
> 
> @@ -69,21 +73,35 @@ void riscv_iommu_disable(struct riscv_iommu_device *iommu);
> #define riscv_iommu_readl(iommu, addr) \
> readl_relaxed((iommu)->reg + (addr))
> 
> -#define riscv_iommu_readq(iommu, addr) \
> - readq_relaxed((iommu)->reg + (addr))
> -
> #define riscv_iommu_writel(iommu, addr, val) \
> writel_relaxed((val), (iommu)->reg + (addr))
> 
> +#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> + readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> +   delay_us, timeout_us)
> +
> +#ifndef CONFIG_RISCV_IOMMU_32BIT
> +#define riscv_iommu_readq(iommu, addr) \
> + readq_relaxed((iommu)->reg + (addr))
> +
> #define riscv_iommu_writeq(iommu, addr, val) \
> writeq_relaxed((val), (iommu)->reg + (addr))
> 
> #define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
>   delay_us, timeout_us)
> +#else /* CONFIG_RISCV_IOMMU_32BIT */
> +#define riscv_iommu_readq(iommu, addr) \
> + hi_lo_readq_relaxed((iommu)->reg + (addr))
> 
> -#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> - readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> +#define riscv_iommu_writeq(iommu, addr, val) \
> + ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> + lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> + hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
> +
> +#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> + readx_poll_timeout(hi_lo_readq_relaxed, (iommu)->reg + (addr), val, cond, \
>   delay_us, timeout_us)
> +#endif /* CONFIG_RISCV_IOMMU_32BIT */
> 
> #endif
> -- 
> 2.20.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Xu Lu March 26, 2025, 3:26 a.m. UTC | #2
Hi Jessica,

> Is such a platform conformant to the specification?

We have talked about this before [1]. I think the IOMMU spec does not
mandate the implementation of 8-byte access functionality. The related
sentences are listed below:

"The 8-byte IOMMU registers are defined in such a way that software
can perform two individual 4-byte accesses, or hardware can perform
two independent 4-byte transactions resulting from an 8-byte access,
to the high and low halves of the register, in that order, as long as
the register semantics, with regard to side-effects, are respected
between the two software accesses, or two hardware transactions,
respectively."

"Registers that are 64-bit wide may be accessed using either a 32-bit
or a 64-bit access."

> Either way, why is this a static build-time configuration choice rather than a dynamic run-time choice based on the FDT / ACPI tables / some other platform probing method?

I did not find available field in RIMT table to describe it. So maybe
adding a new config is faster.

[1] https://www.uwsg.indiana.edu/hypermail/linux/kernel/2408.3/03968.html

Best Regards,

Xu Lu

On Wed, Mar 26, 2025 at 2:51 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>
> On 25 Mar 2025, at 14:42, Xu Lu <luxu.kernel@bytedance.com> wrote:
> >
> > Introduce a new configuration CONFIG_RISCV_IOMMU_32BIT to enable
> > splitting 8-byte access into 4-byte transactions for hardware platform
> > whose I/O bus limits access to 4-byte transfers.
> >
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
>
> Is such a platform conformant to the specification? Either way, why is
> this a static build-time configuration choice rather than a dynamic
> run-time choice based on the FDT / ACPI tables / some other platform
> probing method?
>
> Jess
>
> > ---
> > drivers/iommu/riscv/Kconfig |  9 +++++++++
> > drivers/iommu/riscv/iommu.h | 28 +++++++++++++++++++++++-----
> > 2 files changed, 32 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
> > index c071816f59a6..b7c9ea22d969 100644
> > --- a/drivers/iommu/riscv/Kconfig
> > +++ b/drivers/iommu/riscv/Kconfig
> > @@ -18,3 +18,12 @@ config RISCV_IOMMU_PCI
> > def_bool y if RISCV_IOMMU && PCI_MSI
> > help
> >  Support for the PCIe implementation of RISC-V IOMMU architecture.
> > +
> > +config RISCV_IOMMU_32BIT
> > + bool "Support 4-Byte Accesses on RISC-V IOMMU Registers"
> > + depends on RISCV_IOMMU
> > + default n
> > + help
> > +  Support hardware platform whose I/O bus limits access to 4-byte
> > +  transfers. When enabled, all accesses to IOMMU registers will be
> > +  split into 4-byte accesses.
> > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > index 46df79dd5495..0e3552a8142d 100644
> > --- a/drivers/iommu/riscv/iommu.h
> > +++ b/drivers/iommu/riscv/iommu.h
> > @@ -14,6 +14,10 @@
> > #include <linux/iommu.h>
> > #include <linux/types.h>
> > #include <linux/iopoll.h>
> > +#ifdef CONFIG_RISCV_IOMMU_32BIT
> > +#include <linux/io-64-nonatomic-hi-lo.h>
> > +#include <linux/io-64-nonatomic-lo-hi.h>
> > +#endif
> >
> > #include "iommu-bits.h"
> >
> > @@ -69,21 +73,35 @@ void riscv_iommu_disable(struct riscv_iommu_device *iommu);
> > #define riscv_iommu_readl(iommu, addr) \
> > readl_relaxed((iommu)->reg + (addr))
> >
> > -#define riscv_iommu_readq(iommu, addr) \
> > - readq_relaxed((iommu)->reg + (addr))
> > -
> > #define riscv_iommu_writel(iommu, addr, val) \
> > writel_relaxed((val), (iommu)->reg + (addr))
> >
> > +#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > + readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> > +   delay_us, timeout_us)
> > +
> > +#ifndef CONFIG_RISCV_IOMMU_32BIT
> > +#define riscv_iommu_readq(iommu, addr) \
> > + readq_relaxed((iommu)->reg + (addr))
> > +
> > #define riscv_iommu_writeq(iommu, addr, val) \
> > writeq_relaxed((val), (iommu)->reg + (addr))
> >
> > #define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
> >   delay_us, timeout_us)
> > +#else /* CONFIG_RISCV_IOMMU_32BIT */
> > +#define riscv_iommu_readq(iommu, addr) \
> > + hi_lo_readq_relaxed((iommu)->reg + (addr))
> >
> > -#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > - readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> > +#define riscv_iommu_writeq(iommu, addr, val) \
> > + ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> > + lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> > + hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
> > +
> > +#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > + readx_poll_timeout(hi_lo_readq_relaxed, (iommu)->reg + (addr), val, cond, \
> >   delay_us, timeout_us)
> > +#endif /* CONFIG_RISCV_IOMMU_32BIT */
> >
> > #endif
> > --
> > 2.20.1
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Jason Gunthorpe April 1, 2025, 3:44 p.m. UTC | #3
On Wed, Mar 26, 2025 at 11:26:07AM +0800, Xu Lu wrote:
> Hi Jessica,
> 
> > Is such a platform conformant to the specification?
> 
> We have talked about this before [1]. I think the IOMMU spec does not
> mandate the implementation of 8-byte access functionality. The related
> sentences are listed below:
> 
> "The 8-byte IOMMU registers are defined in such a way that software
> can perform two individual 4-byte accesses, or hardware can perform
> two independent 4-byte transactions resulting from an 8-byte access,
> to the high and low halves of the register, in that order, as long as
> the register semantics, with regard to side-effects, are respected
> between the two software accesses, or two hardware transactions,
> respectively."

I think the commit message should explain an anyalsis that the code is
safe against the mentioned side effects due to ordering.

And a comment should explain this:

+#define riscv_iommu_writeq(iommu, addr, val) \
+       ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
+        lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
+        hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))

As the naive reading of the above spec paragraph doesn't seem like
there are exceptions or why one register has to be the opposite order.

Also missing () around addr

Jason
David Laight April 1, 2025, 9:02 p.m. UTC | #4
On Tue, 1 Apr 2025 12:44:12 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Mar 26, 2025 at 11:26:07AM +0800, Xu Lu wrote:
> > Hi Jessica,
> >   
> > > Is such a platform conformant to the specification?  
> > 
> > We have talked about this before [1]. I think the IOMMU spec does not
> > mandate the implementation of 8-byte access functionality. The related
> > sentences are listed below:
> > 
> > "The 8-byte IOMMU registers are defined in such a way that software
> > can perform two individual 4-byte accesses, or hardware can perform
> > two independent 4-byte transactions resulting from an 8-byte access,
> > to the high and low halves of the register, in that order, as long as
> > the register semantics, with regard to side-effects, are respected
> > between the two software accesses, or two hardware transactions,
> > respectively."  
> 
> I think the commit message should explain an anyalsis that the code is
> safe against the mentioned side effects due to ordering.
> 
> And a comment should explain this:
> 
> +#define riscv_iommu_writeq(iommu, addr, val) \
> +       ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> +        lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> +        hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
> 
> As the naive reading of the above spec paragraph doesn't seem like
> there are exceptions or why one register has to be the opposite order.
> 
> Also missing () around addr

It is also double-evaluating (addr).

I hope there is a lock, interleaved accesses from multiple cpu
may not work.

	David

> 
> Jason
>
Robin Murphy April 2, 2025, 11:28 a.m. UTC | #5
On 2025-03-25 2:42 pm, Xu Lu wrote:
> Introduce a new configuration CONFIG_RISCV_IOMMU_32BIT to enable
> splitting 8-byte access into 4-byte transactions for hardware platform
> whose I/O bus limits access to 4-byte transfers.
> 
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
>   drivers/iommu/riscv/Kconfig |  9 +++++++++
>   drivers/iommu/riscv/iommu.h | 28 +++++++++++++++++++++++-----
>   2 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
> index c071816f59a6..b7c9ea22d969 100644
> --- a/drivers/iommu/riscv/Kconfig
> +++ b/drivers/iommu/riscv/Kconfig
> @@ -18,3 +18,12 @@ config RISCV_IOMMU_PCI
>   	def_bool y if RISCV_IOMMU && PCI_MSI
>   	help
>   	  Support for the PCIe implementation of RISC-V IOMMU architecture.
> +
> +config RISCV_IOMMU_32BIT
> +	bool "Support 4-Byte Accesses on RISC-V IOMMU Registers"
> +	depends on RISCV_IOMMU
> +	default n
> +	help
> +	  Support hardware platform whose I/O bus limits access to 4-byte
> +	  transfers. When enabled, all accesses to IOMMU registers will be
> +	  split into 4-byte accesses.
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index 46df79dd5495..0e3552a8142d 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -14,6 +14,10 @@
>   #include <linux/iommu.h>
>   #include <linux/types.h>
>   #include <linux/iopoll.h>
> +#ifdef CONFIG_RISCV_IOMMU_32BIT
> +#include <linux/io-64-nonatomic-hi-lo.h>
> +#include <linux/io-64-nonatomic-lo-hi.h>
> +#endif
>   
>   #include "iommu-bits.h"
>   
> @@ -69,21 +73,35 @@ void riscv_iommu_disable(struct riscv_iommu_device *iommu);
>   #define riscv_iommu_readl(iommu, addr) \
>   	readl_relaxed((iommu)->reg + (addr))
>   
> -#define riscv_iommu_readq(iommu, addr) \
> -	readq_relaxed((iommu)->reg + (addr))
> -
>   #define riscv_iommu_writel(iommu, addr, val) \
>   	writel_relaxed((val), (iommu)->reg + (addr))
>   
> +#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> +	readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> +			   delay_us, timeout_us)
> +
> +#ifndef CONFIG_RISCV_IOMMU_32BIT
> +#define riscv_iommu_readq(iommu, addr) \
> +	readq_relaxed((iommu)->reg + (addr))
> +
>   #define riscv_iommu_writeq(iommu, addr, val) \
>   	writeq_relaxed((val), (iommu)->reg + (addr))
>   
>   #define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
>   	readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
>   			   delay_us, timeout_us)
> +#else /* CONFIG_RISCV_IOMMU_32BIT */
> +#define riscv_iommu_readq(iommu, addr) \
> +	hi_lo_readq_relaxed((iommu)->reg + (addr))
>   
> -#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> -	readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> +#define riscv_iommu_writeq(iommu, addr, val) \
> +	((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> +	 lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> +	 hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))

Echoing Jason's comment, what is this even trying to achieve? Nothing in 
the spec suggests that the cycle counter register is functionally 
different from the other PMU counter registers (other than its 
self-contained overflow bit).

It is not, in general, safe to do a split write to a running counter 
either way - low-high vs. high-low just moves the problem around, 
changing *which* combinations of values are problematic and capable of 
overflowing into each other between the writes. If the PMU driver can't 
write counters atomically, it will need to ensure that it only ever 
write them while stopped (at which point the order surely shouldn't 
matter). Conversely, though, reading from running counters is a bit more 
reasonable, but it needs more than just hi_lo_readq to guarantee it's 
not got a torn result.

Thanks,
Robin.

> +
> +#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> +	readx_poll_timeout(hi_lo_readq_relaxed, (iommu)->reg + (addr), val, cond, \
>   			   delay_us, timeout_us)
> +#endif /* CONFIG_RISCV_IOMMU_32BIT */
>   
>   #endif
Xu Lu April 2, 2025, 11:58 a.m. UTC | #6
Hi Robin and Jason,

Thanks for your comments and sorry for the late reply.

The iohpmcycles register is handled specially because unlike the other
IOMMU registers, its effective bits lies in the upper half. I want to
ensure that all effective bits are the last ones to update. For
example, when updating DDTP register, we update the upper half first
which only contains its table PPN, and then update its lower half
which contains the MODE bits which really enable translation. The main
reason is that when software splits an 8-byte write to two 4-byte
writes, the hardware is not aware whether it is an 8-byte write or a
4-byte write when it receives the first write. Thus it will always
make the value effective every time it receives a 4-byte write.

On Wed, Apr 2, 2025 at 7:29 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2025-03-25 2:42 pm, Xu Lu wrote:
> > Introduce a new configuration CONFIG_RISCV_IOMMU_32BIT to enable
> > splitting 8-byte access into 4-byte transactions for hardware platform
> > whose I/O bus limits access to 4-byte transfers.
> >
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > ---
> >   drivers/iommu/riscv/Kconfig |  9 +++++++++
> >   drivers/iommu/riscv/iommu.h | 28 +++++++++++++++++++++++-----
> >   2 files changed, 32 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
> > index c071816f59a6..b7c9ea22d969 100644
> > --- a/drivers/iommu/riscv/Kconfig
> > +++ b/drivers/iommu/riscv/Kconfig
> > @@ -18,3 +18,12 @@ config RISCV_IOMMU_PCI
> >       def_bool y if RISCV_IOMMU && PCI_MSI
> >       help
> >         Support for the PCIe implementation of RISC-V IOMMU architecture.
> > +
> > +config RISCV_IOMMU_32BIT
> > +     bool "Support 4-Byte Accesses on RISC-V IOMMU Registers"
> > +     depends on RISCV_IOMMU
> > +     default n
> > +     help
> > +       Support hardware platform whose I/O bus limits access to 4-byte
> > +       transfers. When enabled, all accesses to IOMMU registers will be
> > +       split into 4-byte accesses.
> > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > index 46df79dd5495..0e3552a8142d 100644
> > --- a/drivers/iommu/riscv/iommu.h
> > +++ b/drivers/iommu/riscv/iommu.h
> > @@ -14,6 +14,10 @@
> >   #include <linux/iommu.h>
> >   #include <linux/types.h>
> >   #include <linux/iopoll.h>
> > +#ifdef CONFIG_RISCV_IOMMU_32BIT
> > +#include <linux/io-64-nonatomic-hi-lo.h>
> > +#include <linux/io-64-nonatomic-lo-hi.h>
> > +#endif
> >
> >   #include "iommu-bits.h"
> >
> > @@ -69,21 +73,35 @@ void riscv_iommu_disable(struct riscv_iommu_device *iommu);
> >   #define riscv_iommu_readl(iommu, addr) \
> >       readl_relaxed((iommu)->reg + (addr))
> >
> > -#define riscv_iommu_readq(iommu, addr) \
> > -     readq_relaxed((iommu)->reg + (addr))
> > -
> >   #define riscv_iommu_writel(iommu, addr, val) \
> >       writel_relaxed((val), (iommu)->reg + (addr))
> >
> > +#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > +     readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> > +                        delay_us, timeout_us)
> > +
> > +#ifndef CONFIG_RISCV_IOMMU_32BIT
> > +#define riscv_iommu_readq(iommu, addr) \
> > +     readq_relaxed((iommu)->reg + (addr))
> > +
> >   #define riscv_iommu_writeq(iommu, addr, val) \
> >       writeq_relaxed((val), (iommu)->reg + (addr))
> >
> >   #define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> >       readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
> >                          delay_us, timeout_us)
> > +#else /* CONFIG_RISCV_IOMMU_32BIT */
> > +#define riscv_iommu_readq(iommu, addr) \
> > +     hi_lo_readq_relaxed((iommu)->reg + (addr))
> >
> > -#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > -     readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
> > +#define riscv_iommu_writeq(iommu, addr, val) \
> > +     ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> > +      lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> > +      hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
>
> Echoing Jason's comment, what is this even trying to achieve? Nothing in
> the spec suggests that the cycle counter register is functionally
> different from the other PMU counter registers (other than its
> self-contained overflow bit).
>
> It is not, in general, safe to do a split write to a running counter
> either way - low-high vs. high-low just moves the problem around,
> changing *which* combinations of values are problematic and capable of
> overflowing into each other between the writes. If the PMU driver can't
> write counters atomically, it will need to ensure that it only ever
> write them while stopped (at which point the order surely shouldn't
> matter). Conversely, though, reading from running counters is a bit more
> reasonable, but it needs more than just hi_lo_readq to guarantee it's
> not got a torn result.

You are right. It is more appropriate to ensure update iohpmcycles
with cycle counting disabled. I will remove this redundant branch in
the next version.

>
> Thanks,
> Robin.
>
> > +
> > +#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
> > +     readx_poll_timeout(hi_lo_readq_relaxed, (iommu)->reg + (addr), val, cond, \
> >                          delay_us, timeout_us)
> > +#endif /* CONFIG_RISCV_IOMMU_32BIT */
> >
> >   #endif
>

Best regards,
Xu Lu
Xu Lu April 2, 2025, 12:20 p.m. UTC | #7
Hi David,

All these 64 bit registers are updated during initialization (when
allocating page table and iommu queues) and destruction (when
disabling interrupts and iommu queues) in the existing iommu driver.
There won't be multiple cpu operating simultaneously.

Some 64 bit pmu registers are also updated during pmu event
initialization and overflow handling in the oncoming iommu pmu
driver[1]. There will also be no concurrency issues due to the pmu
framework unless the ACPI/DTS is misconfigured and two iommus get the
same register resources.

So maybe a lock is not necessary. Please correct me if I missed anything.

[1] https://lore.kernel.org/all/20250115030306.29735-1-zong.li@sifive.com/

Best regards,
Xu Lu

On Wed, Apr 2, 2025 at 5:02 AM David Laight
<david.laight.linux@gmail.com> wrote:
>
> On Tue, 1 Apr 2025 12:44:12 -0300
> Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> > On Wed, Mar 26, 2025 at 11:26:07AM +0800, Xu Lu wrote:
> > > Hi Jessica,
> > >
> > > > Is such a platform conformant to the specification?
> > >
> > > We have talked about this before [1]. I think the IOMMU spec does not
> > > mandate the implementation of 8-byte access functionality. The related
> > > sentences are listed below:
> > >
> > > "The 8-byte IOMMU registers are defined in such a way that software
> > > can perform two individual 4-byte accesses, or hardware can perform
> > > two independent 4-byte transactions resulting from an 8-byte access,
> > > to the high and low halves of the register, in that order, as long as
> > > the register semantics, with regard to side-effects, are respected
> > > between the two software accesses, or two hardware transactions,
> > > respectively."
> >
> > I think the commit message should explain an anyalsis that the code is
> > safe against the mentioned side effects due to ordering.
> >
> > And a comment should explain this:
> >
> > +#define riscv_iommu_writeq(iommu, addr, val) \
> > +       ((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
> > +        lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
> > +        hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
> >
> > As the naive reading of the above spec paragraph doesn't seem like
> > there are exceptions or why one register has to be the opposite order.
> >
> > Also missing () around addr
>
> It is also double-evaluating (addr).
>
> I hope there is a lock, interleaved accesses from multiple cpu
> may not work.



>
>         David
>
> >
> > Jason
> >
>
David Laight April 2, 2025, 1 p.m. UTC | #8
On Wed, 2 Apr 2025 12:28:54 +0100
Robin Murphy <robin.murphy@arm.com> wrote:

...
> It is not, in general, safe to do a split write to a running counter 
> either way - low-high vs. high-low just moves the problem around, 
> changing *which* combinations of values are problematic and capable of 
> overflowing into each other between the writes. If the PMU driver can't 
> write counters atomically, it will need to ensure that it only ever 
> write them while stopped (at which point the order surely shouldn't 
> matter). Conversely, though, reading from running counters is a bit more 
> reasonable, but it needs more than just hi_lo_readq to guarantee it's 
> not got a torn result.

Or have hardware that latches a value waiting for the following cycle.
That could be done in the bus interface logic.
In which case the cycle better come from the same cpu, not another
cpu accessing an entirely different register.
That requires a global lock for the device.

	David
diff mbox series

Patch

diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
index c071816f59a6..b7c9ea22d969 100644
--- a/drivers/iommu/riscv/Kconfig
+++ b/drivers/iommu/riscv/Kconfig
@@ -18,3 +18,12 @@  config RISCV_IOMMU_PCI
 	def_bool y if RISCV_IOMMU && PCI_MSI
 	help
 	  Support for the PCIe implementation of RISC-V IOMMU architecture.
+
+config RISCV_IOMMU_32BIT
+	bool "Support 4-Byte Accesses on RISC-V IOMMU Registers"
+	depends on RISCV_IOMMU
+	default n
+	help
+	  Support hardware platform whose I/O bus limits access to 4-byte
+	  transfers. When enabled, all accesses to IOMMU registers will be
+	  split into 4-byte accesses.
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 46df79dd5495..0e3552a8142d 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -14,6 +14,10 @@ 
 #include <linux/iommu.h>
 #include <linux/types.h>
 #include <linux/iopoll.h>
+#ifdef CONFIG_RISCV_IOMMU_32BIT
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
 
 #include "iommu-bits.h"
 
@@ -69,21 +73,35 @@  void riscv_iommu_disable(struct riscv_iommu_device *iommu);
 #define riscv_iommu_readl(iommu, addr) \
 	readl_relaxed((iommu)->reg + (addr))
 
-#define riscv_iommu_readq(iommu, addr) \
-	readq_relaxed((iommu)->reg + (addr))
-
 #define riscv_iommu_writel(iommu, addr, val) \
 	writel_relaxed((val), (iommu)->reg + (addr))
 
+#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
+	readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
+			   delay_us, timeout_us)
+
+#ifndef CONFIG_RISCV_IOMMU_32BIT
+#define riscv_iommu_readq(iommu, addr) \
+	readq_relaxed((iommu)->reg + (addr))
+
 #define riscv_iommu_writeq(iommu, addr, val) \
 	writeq_relaxed((val), (iommu)->reg + (addr))
 
 #define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
 	readx_poll_timeout(readq_relaxed, (iommu)->reg + (addr), val, cond, \
 			   delay_us, timeout_us)
+#else /* CONFIG_RISCV_IOMMU_32BIT */
+#define riscv_iommu_readq(iommu, addr) \
+	hi_lo_readq_relaxed((iommu)->reg + (addr))
 
-#define riscv_iommu_readl_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
-	readx_poll_timeout(readl_relaxed, (iommu)->reg + (addr), val, cond, \
+#define riscv_iommu_writeq(iommu, addr, val) \
+	((addr == RISCV_IOMMU_REG_IOHPMCYCLES) ? \
+	 lo_hi_writeq_relaxed((val), (iommu)->reg + (addr)) : \
+	 hi_lo_writeq_relaxed((val), (iommu)->reg + (addr)))
+
+#define riscv_iommu_readq_timeout(iommu, addr, val, cond, delay_us, timeout_us) \
+	readx_poll_timeout(hi_lo_readq_relaxed, (iommu)->reg + (addr), val, cond, \
 			   delay_us, timeout_us)
+#endif /* CONFIG_RISCV_IOMMU_32BIT */
 
 #endif