diff mbox series

[v5,6/6] soc: renesas: Add L2 cache management for RZ/Five SoC

Message ID 20221212115505.36770-7-prabhakar.mahadev-lad.rj@bp.renesas.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show
Series AX45MP: Add support to non-coherent DMA | expand

Commit Message

Prabhakar Dec. 12, 2022, 11:55 a.m. UTC
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>

I/O Coherence Port (IOCP) provides an AXI interface for connecting
external non-caching masters, such as DMA controllers. The accesses
from IOCP are coherent with D-Caches and L2 Cache.

IOCP is a specification option and is disabled on the Renesas RZ/Five
SoC due to this reason IP blocks using DMA will fail.

The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
block that allows dynamic adjustment of memory attributes in the runtime.
It contains a configurable amount of PMA entries implemented as CSR
registers to control the attributes of memory locations in interest.
Below are the memory attributes supported:
* Device, Non-bufferable
* Device, bufferable
* Memory, Non-cacheable, Non-bufferable
* Memory, Non-cacheable, Bufferable
* Memory, Write-back, No-allocate
* Memory, Write-back, Read-allocate
* Memory, Write-back, Write-allocate
* Memory, Write-back, Read and Write-allocate

More info about PMA (section 10.3):
Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf

As a workaround for SoCs with IOCP disabled CMO needs to be handled by
software. Firstly OpenSBI configures the memory region as
"Memory, Non-cacheable, Bufferable" and passes this region as a global
shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
allocations happen from this region and synchronization callbacks are
implemented to synchronize when doing DMA transactions.

Example PMA region passes as a DT node from OpenSBI:
    reserved-memory {
        #address-cells = <2>;
        #size-cells = <2>;
        ranges;

        pma_resv0@58000000 {
            compatible = "shared-dma-pool";
            reg = <0x0 0x58000000 0x0 0x08000000>;
            no-map;
            linux,dma-default;
        };
    };

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
---
v4 -> v5
* Dropped code for configuring L2 cache
* Dropped code for configuring PMA
* Updated commit message
* Added comments
* Changed static branch enable/disable order

RFC v3 -> v4
* Made use of runtime patching instead of compile time
* Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling
* Added a check to make sure cache line size is always 64 bytes
* Renamed folder rzf -> rzfive
* Improved Kconfig description
* Dropped L2 cache configuration
* Dropped unnecessary casts
* Fixed comments pointed by Geert.
---
 arch/riscv/include/asm/cacheflush.h       |   8 +
 arch/riscv/include/asm/errata_list.h      |  28 ++-
 drivers/soc/renesas/Kconfig               |   6 +
 drivers/soc/renesas/Makefile              |   2 +
 drivers/soc/renesas/rzfive/Kconfig        |   6 +
 drivers/soc/renesas/rzfive/Makefile       |   3 +
 drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
 7 files changed, 303 insertions(+), 6 deletions(-)
 create mode 100644 drivers/soc/renesas/rzfive/Kconfig
 create mode 100644 drivers/soc/renesas/rzfive/Makefile
 create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c

Comments

Geert Uytterhoeven Dec. 15, 2022, 10:34 a.m. UTC | #1
Hi Prabhakar,

On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>
> I/O Coherence Port (IOCP) provides an AXI interface for connecting
> external non-caching masters, such as DMA controllers. The accesses
> from IOCP are coherent with D-Caches and L2 Cache.
>
> IOCP is a specification option and is disabled on the Renesas RZ/Five
> SoC due to this reason IP blocks using DMA will fail.
>
> The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> block that allows dynamic adjustment of memory attributes in the runtime.
> It contains a configurable amount of PMA entries implemented as CSR
> registers to control the attributes of memory locations in interest.
> Below are the memory attributes supported:
> * Device, Non-bufferable
> * Device, bufferable
> * Memory, Non-cacheable, Non-bufferable
> * Memory, Non-cacheable, Bufferable
> * Memory, Write-back, No-allocate
> * Memory, Write-back, Read-allocate
> * Memory, Write-back, Write-allocate
> * Memory, Write-back, Read and Write-allocate
>
> More info about PMA (section 10.3):
> Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
>
> As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> software. Firstly OpenSBI configures the memory region as
> "Memory, Non-cacheable, Bufferable" and passes this region as a global
> shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> allocations happen from this region and synchronization callbacks are
> implemented to synchronize when doing DMA transactions.
>
> Example PMA region passes as a DT node from OpenSBI:
>     reserved-memory {
>         #address-cells = <2>;
>         #size-cells = <2>;
>         ranges;
>
>         pma_resv0@58000000 {
>             compatible = "shared-dma-pool";
>             reg = <0x0 0x58000000 0x0 0x08000000>;
>             no-map;
>             linux,dma-default;
>         };
>     };
>
> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>

Thanks for your patch!

>  arch/riscv/include/asm/cacheflush.h       |   8 +
>  arch/riscv/include/asm/errata_list.h      |  28 ++-
>  drivers/soc/renesas/Kconfig               |   6 +
>  drivers/soc/renesas/Makefile              |   2 +
>  drivers/soc/renesas/rzfive/Kconfig        |   6 +
>  drivers/soc/renesas/rzfive/Makefile       |   3 +
>  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++

Given this touches arch/riscv/include/asm/, I don't think the
code belongs under drivers/soc/renesas/.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Prabhakar Dec. 15, 2022, 11:03 a.m. UTC | #2
Hi Geert,

On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
>
> Hi Prabhakar,
>
> On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> >
> > I/O Coherence Port (IOCP) provides an AXI interface for connecting
> > external non-caching masters, such as DMA controllers. The accesses
> > from IOCP are coherent with D-Caches and L2 Cache.
> >
> > IOCP is a specification option and is disabled on the Renesas RZ/Five
> > SoC due to this reason IP blocks using DMA will fail.
> >
> > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> > block that allows dynamic adjustment of memory attributes in the runtime.
> > It contains a configurable amount of PMA entries implemented as CSR
> > registers to control the attributes of memory locations in interest.
> > Below are the memory attributes supported:
> > * Device, Non-bufferable
> > * Device, bufferable
> > * Memory, Non-cacheable, Non-bufferable
> > * Memory, Non-cacheable, Bufferable
> > * Memory, Write-back, No-allocate
> > * Memory, Write-back, Read-allocate
> > * Memory, Write-back, Write-allocate
> > * Memory, Write-back, Read and Write-allocate
> >
> > More info about PMA (section 10.3):
> > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> >
> > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> > software. Firstly OpenSBI configures the memory region as
> > "Memory, Non-cacheable, Bufferable" and passes this region as a global
> > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> > allocations happen from this region and synchronization callbacks are
> > implemented to synchronize when doing DMA transactions.
> >
> > Example PMA region passes as a DT node from OpenSBI:
> >     reserved-memory {
> >         #address-cells = <2>;
> >         #size-cells = <2>;
> >         ranges;
> >
> >         pma_resv0@58000000 {
> >             compatible = "shared-dma-pool";
> >             reg = <0x0 0x58000000 0x0 0x08000000>;
> >             no-map;
> >             linux,dma-default;
> >         };
> >     };
> >
> > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>
> Thanks for your patch!
>
> >  arch/riscv/include/asm/cacheflush.h       |   8 +
> >  arch/riscv/include/asm/errata_list.h      |  28 ++-
> >  drivers/soc/renesas/Kconfig               |   6 +
> >  drivers/soc/renesas/Makefile              |   2 +
> >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
> >  drivers/soc/renesas/rzfive/Makefile       |   3 +
> >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
>
> Given this touches arch/riscv/include/asm/, I don't think the
> code belongs under drivers/soc/renesas/.
>
Ok. Do you have any suggestions on where you want me to put this code?

Cheers,
Prabhakar
Geert Uytterhoeven Dec. 15, 2022, 11:10 a.m. UTC | #3
Hi Prabhakar,

On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
> On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
> > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > >
> > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
> > > external non-caching masters, such as DMA controllers. The accesses
> > > from IOCP are coherent with D-Caches and L2 Cache.
> > >
> > > IOCP is a specification option and is disabled on the Renesas RZ/Five
> > > SoC due to this reason IP blocks using DMA will fail.
> > >
> > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> > > block that allows dynamic adjustment of memory attributes in the runtime.
> > > It contains a configurable amount of PMA entries implemented as CSR
> > > registers to control the attributes of memory locations in interest.
> > > Below are the memory attributes supported:
> > > * Device, Non-bufferable
> > > * Device, bufferable
> > > * Memory, Non-cacheable, Non-bufferable
> > > * Memory, Non-cacheable, Bufferable
> > > * Memory, Write-back, No-allocate
> > > * Memory, Write-back, Read-allocate
> > > * Memory, Write-back, Write-allocate
> > > * Memory, Write-back, Read and Write-allocate
> > >
> > > More info about PMA (section 10.3):
> > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> > >
> > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> > > software. Firstly OpenSBI configures the memory region as
> > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
> > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> > > allocations happen from this region and synchronization callbacks are
> > > implemented to synchronize when doing DMA transactions.
> > >
> > > Example PMA region passes as a DT node from OpenSBI:
> > >     reserved-memory {
> > >         #address-cells = <2>;
> > >         #size-cells = <2>;
> > >         ranges;
> > >
> > >         pma_resv0@58000000 {
> > >             compatible = "shared-dma-pool";
> > >             reg = <0x0 0x58000000 0x0 0x08000000>;
> > >             no-map;
> > >             linux,dma-default;
> > >         };
> > >     };
> > >
> > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> >
> > Thanks for your patch!
> >
> > >  arch/riscv/include/asm/cacheflush.h       |   8 +
> > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
> > >  drivers/soc/renesas/Kconfig               |   6 +
> > >  drivers/soc/renesas/Makefile              |   2 +
> > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
> > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
> > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
> >
> > Given this touches arch/riscv/include/asm/, I don't think the
> > code belongs under drivers/soc/renesas/.
> >
> Ok. Do you have any suggestions on where you want me to put this code?

As it plugs into core riscv functionality, I think it should be under
arch/riscv/.

if the RISC-V maintainers object to that, another option is
drivers/soc/andestech/ or (new) drivers/cache/

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Prabhakar Dec. 15, 2022, 5:46 p.m. UTC | #4
Hi Geert,

On Thu, Dec 15, 2022 at 11:10 AM Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
>
> Hi Prabhakar,
>
> On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> > On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
> > <geert@linux-m68k.org> wrote:
> > > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > > >
> > > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
> > > > external non-caching masters, such as DMA controllers. The accesses
> > > > from IOCP are coherent with D-Caches and L2 Cache.
> > > >
> > > > IOCP is a specification option and is disabled on the Renesas RZ/Five
> > > > SoC due to this reason IP blocks using DMA will fail.
> > > >
> > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> > > > block that allows dynamic adjustment of memory attributes in the runtime.
> > > > It contains a configurable amount of PMA entries implemented as CSR
> > > > registers to control the attributes of memory locations in interest.
> > > > Below are the memory attributes supported:
> > > > * Device, Non-bufferable
> > > > * Device, bufferable
> > > > * Memory, Non-cacheable, Non-bufferable
> > > > * Memory, Non-cacheable, Bufferable
> > > > * Memory, Write-back, No-allocate
> > > > * Memory, Write-back, Read-allocate
> > > > * Memory, Write-back, Write-allocate
> > > > * Memory, Write-back, Read and Write-allocate
> > > >
> > > > More info about PMA (section 10.3):
> > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> > > >
> > > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> > > > software. Firstly OpenSBI configures the memory region as
> > > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
> > > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> > > > allocations happen from this region and synchronization callbacks are
> > > > implemented to synchronize when doing DMA transactions.
> > > >
> > > > Example PMA region passes as a DT node from OpenSBI:
> > > >     reserved-memory {
> > > >         #address-cells = <2>;
> > > >         #size-cells = <2>;
> > > >         ranges;
> > > >
> > > >         pma_resv0@58000000 {
> > > >             compatible = "shared-dma-pool";
> > > >             reg = <0x0 0x58000000 0x0 0x08000000>;
> > > >             no-map;
> > > >             linux,dma-default;
> > > >         };
> > > >     };
> > > >
> > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > >
> > > Thanks for your patch!
> > >
> > > >  arch/riscv/include/asm/cacheflush.h       |   8 +
> > > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
> > > >  drivers/soc/renesas/Kconfig               |   6 +
> > > >  drivers/soc/renesas/Makefile              |   2 +
> > > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
> > > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
> > > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
> > >
> > > Given this touches arch/riscv/include/asm/, I don't think the
> > > code belongs under drivers/soc/renesas/.
> > >
> > Ok. Do you have any suggestions on where you want me to put this code?
>
> As it plugs into core riscv functionality, I think it should be under
> arch/riscv/.
> if the RISC-V maintainers object to that, another option is
> drivers/soc/andestech/ or (new) drivers/cache/
>
RISC-V maintainers had already made it clear to not to include vendor
specific stuff in the arch/riscv folder, so I'll consider putting this
into drivers/cache/ folder to sync with the bindings.

Conor/Palmer - do you have any objections/suggestions?

Cheers,
Prabhakar
Conor Dooley Dec. 15, 2022, 7:54 p.m. UTC | #5
On Thu, Dec 15, 2022 at 05:46:42PM +0000, Lad, Prabhakar wrote:
> Hi Geert,
> 
> On Thu, Dec 15, 2022 at 11:10 AM Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
> >
> > Hi Prabhakar,
> >
> > On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > > On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
> > > <geert@linux-m68k.org> wrote:
> > > > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> > > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > > > >
> > > > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
> > > > > external non-caching masters, such as DMA controllers. The accesses
> > > > > from IOCP are coherent with D-Caches and L2 Cache.
> > > > >
> > > > > IOCP is a specification option and is disabled on the Renesas RZ/Five
> > > > > SoC due to this reason IP blocks using DMA will fail.
> > > > >
> > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> > > > > block that allows dynamic adjustment of memory attributes in the runtime.
> > > > > It contains a configurable amount of PMA entries implemented as CSR
> > > > > registers to control the attributes of memory locations in interest.
> > > > > Below are the memory attributes supported:
> > > > > * Device, Non-bufferable
> > > > > * Device, bufferable
> > > > > * Memory, Non-cacheable, Non-bufferable
> > > > > * Memory, Non-cacheable, Bufferable
> > > > > * Memory, Write-back, No-allocate
> > > > > * Memory, Write-back, Read-allocate
> > > > > * Memory, Write-back, Write-allocate
> > > > > * Memory, Write-back, Read and Write-allocate
> > > > >
> > > > > More info about PMA (section 10.3):
> > > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> > > > >
> > > > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> > > > > software. Firstly OpenSBI configures the memory region as
> > > > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
> > > > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> > > > > allocations happen from this region and synchronization callbacks are
> > > > > implemented to synchronize when doing DMA transactions.
> > > > >
> > > > > Example PMA region passes as a DT node from OpenSBI:
> > > > >     reserved-memory {
> > > > >         #address-cells = <2>;
> > > > >         #size-cells = <2>;
> > > > >         ranges;
> > > > >
> > > > >         pma_resv0@58000000 {
> > > > >             compatible = "shared-dma-pool";
> > > > >             reg = <0x0 0x58000000 0x0 0x08000000>;
> > > > >             no-map;
> > > > >             linux,dma-default;
> > > > >         };
> > > > >     };
> > > > >
> > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > > >
> > > > Thanks for your patch!
> > > >
> > > > >  arch/riscv/include/asm/cacheflush.h       |   8 +
> > > > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
> > > > >  drivers/soc/renesas/Kconfig               |   6 +
> > > > >  drivers/soc/renesas/Makefile              |   2 +
> > > > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
> > > > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
> > > > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
> > > >
> > > > Given this touches arch/riscv/include/asm/, I don't think the
> > > > code belongs under drivers/soc/renesas/.
> > > >
> > > Ok. Do you have any suggestions on where you want me to put this code?
> >
> > As it plugs into core riscv functionality, I think it should be under
> > arch/riscv/.
> > if the RISC-V maintainers object to that, another option is
> > drivers/soc/andestech/ or (new) drivers/cache/
> >
> RISC-V maintainers had already made it clear to not to include vendor
> specific stuff in the arch/riscv folder, so I'll consider putting this
> into drivers/cache/ folder to sync with the bindings.
> 
> Conor/Palmer - do you have any objections/suggestions?

I'm not its maintainer so sorta moot what I say, but having drivers in
arch/riscv makes little sense to me..
Putting stuff in drivers/cache does sound like a good idea since the
binding is going there too.

The SiFive ccache driver is in drivers/soc and it was suggested to me
this week that there's likely going to be a second SiFive cache driver
at some point in the near future. Plus Microchip are going to have to
add cache management stuff to the existing SiFive ccache driver.
Having them be their own thing makes sense in my mind - especially since
they're not tied to SoCs sold by Andes or SiFive.

I had a quick, and I mean *quick* look through other soc drivers to see
if there were any other cache controller drivers but nothing stood out
to me. Maybe someone else has more of a clue there. Ditto for misc, had
a look but nothing seemed obvious.

If we do do drivers/cache & move the SiFive stuff there too, someone
needs to look after it. I guess I can if noone else feels strongly
since I seem to be the Responsible Adult for the SiFive ones already?
Geert Uytterhoeven Dec. 15, 2022, 8:17 p.m. UTC | #6
Hi Conor,

On Thu, Dec 15, 2022 at 8:54 PM Conor Dooley <conor@kernel.org> wrote:
> On Thu, Dec 15, 2022 at 05:46:42PM +0000, Lad, Prabhakar wrote:
> > On Thu, Dec 15, 2022 at 11:10 AM Geert Uytterhoeven
> > <geert@linux-m68k.org> wrote:
> > > On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
> > > <prabhakar.csengg@gmail.com> wrote:
> > > > On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
> > > > <geert@linux-m68k.org> wrote:
> > > > > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
> > > > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > > > > >
> > > > > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
> > > > > > external non-caching masters, such as DMA controllers. The accesses
> > > > > > from IOCP are coherent with D-Caches and L2 Cache.
> > > > > >
> > > > > > IOCP is a specification option and is disabled on the Renesas RZ/Five
> > > > > > SoC due to this reason IP blocks using DMA will fail.
> > > > > >
> > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> > > > > > block that allows dynamic adjustment of memory attributes in the runtime.
> > > > > > It contains a configurable amount of PMA entries implemented as CSR
> > > > > > registers to control the attributes of memory locations in interest.
> > > > > > Below are the memory attributes supported:
> > > > > > * Device, Non-bufferable
> > > > > > * Device, bufferable
> > > > > > * Memory, Non-cacheable, Non-bufferable
> > > > > > * Memory, Non-cacheable, Bufferable
> > > > > > * Memory, Write-back, No-allocate
> > > > > > * Memory, Write-back, Read-allocate
> > > > > > * Memory, Write-back, Write-allocate
> > > > > > * Memory, Write-back, Read and Write-allocate
> > > > > >
> > > > > > More info about PMA (section 10.3):
> > > > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> > > > > >
> > > > > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> > > > > > software. Firstly OpenSBI configures the memory region as
> > > > > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
> > > > > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> > > > > > allocations happen from this region and synchronization callbacks are
> > > > > > implemented to synchronize when doing DMA transactions.
> > > > > >
> > > > > > Example PMA region passes as a DT node from OpenSBI:
> > > > > >     reserved-memory {
> > > > > >         #address-cells = <2>;
> > > > > >         #size-cells = <2>;
> > > > > >         ranges;
> > > > > >
> > > > > >         pma_resv0@58000000 {
> > > > > >             compatible = "shared-dma-pool";
> > > > > >             reg = <0x0 0x58000000 0x0 0x08000000>;
> > > > > >             no-map;
> > > > > >             linux,dma-default;
> > > > > >         };
> > > > > >     };
> > > > > >
> > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> > > > >
> > > > > Thanks for your patch!
> > > > >
> > > > > >  arch/riscv/include/asm/cacheflush.h       |   8 +
> > > > > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
> > > > > >  drivers/soc/renesas/Kconfig               |   6 +
> > > > > >  drivers/soc/renesas/Makefile              |   2 +
> > > > > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
> > > > > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
> > > > > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
> > > > >
> > > > > Given this touches arch/riscv/include/asm/, I don't think the
> > > > > code belongs under drivers/soc/renesas/.
> > > > >
> > > > Ok. Do you have any suggestions on where you want me to put this code?
> > >
> > > As it plugs into core riscv functionality, I think it should be under
> > > arch/riscv/.
> > > if the RISC-V maintainers object to that, another option is
> > > drivers/soc/andestech/ or (new) drivers/cache/
> > >
> > RISC-V maintainers had already made it clear to not to include vendor
> > specific stuff in the arch/riscv folder, so I'll consider putting this
> > into drivers/cache/ folder to sync with the bindings.
> >
> > Conor/Palmer - do you have any objections/suggestions?
>
> I'm not its maintainer so sorta moot what I say, but having drivers in
> arch/riscv makes little sense to me..
> Putting stuff in drivers/cache does sound like a good idea since the
> binding is going there too.
>
> The SiFive ccache driver is in drivers/soc and it was suggested to me
> this week that there's likely going to be a second SiFive cache driver
> at some point in the near future. Plus Microchip are going to have to
> add cache management stuff to the existing SiFive ccache driver.
> Having them be their own thing makes sense in my mind - especially since
> they're not tied to SoCs sold by Andes or SiFive.
>
> I had a quick, and I mean *quick* look through other soc drivers to see
> if there were any other cache controller drivers but nothing stood out
> to me. Maybe someone else has more of a clue there. Ditto for misc, had
> a look but nothing seemed obvious.

Usually they're under arch/:
$ git ls-files -- "arch/*cache*" | wc -l
148
$ git ls-files -- "drivers/*cache*" | wc -l
63

E.g. arch/arm/mm/cache-l2x0.c.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Conor Dooley Dec. 15, 2022, 8:32 p.m. UTC | #7
On 15 December 2022 12:17:38 GMT-08:00, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>Hi Conor,
>
>On Thu, Dec 15, 2022 at 8:54 PM Conor Dooley <conor@kernel.org> wrote:
>> On Thu, Dec 15, 2022 at 05:46:42PM +0000, Lad, Prabhakar wrote:
>> > On Thu, Dec 15, 2022 at 11:10 AM Geert Uytterhoeven
>> > <geert@linux-m68k.org> wrote:
>> > > On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
>> > > <prabhakar.csengg@gmail.com> wrote:
>> > > > On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
>> > > > <geert@linux-m68k.org> wrote:
>> > > > > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
>> > > > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>> > > > > >
>> > > > > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
>> > > > > > external non-caching masters, such as DMA controllers. The accesses
>> > > > > > from IOCP are coherent with D-Caches and L2 Cache.
>> > > > > >
>> > > > > > IOCP is a specification option and is disabled on the Renesas RZ/Five
>> > > > > > SoC due to this reason IP blocks using DMA will fail.
>> > > > > >
>> > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
>> > > > > > block that allows dynamic adjustment of memory attributes in the runtime.
>> > > > > > It contains a configurable amount of PMA entries implemented as CSR
>> > > > > > registers to control the attributes of memory locations in interest.
>> > > > > > Below are the memory attributes supported:
>> > > > > > * Device, Non-bufferable
>> > > > > > * Device, bufferable
>> > > > > > * Memory, Non-cacheable, Non-bufferable
>> > > > > > * Memory, Non-cacheable, Bufferable
>> > > > > > * Memory, Write-back, No-allocate
>> > > > > > * Memory, Write-back, Read-allocate
>> > > > > > * Memory, Write-back, Write-allocate
>> > > > > > * Memory, Write-back, Read and Write-allocate
>> > > > > >
>> > > > > > More info about PMA (section 10.3):
>> > > > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
>> > > > > >
>> > > > > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
>> > > > > > software. Firstly OpenSBI configures the memory region as
>> > > > > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
>> > > > > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
>> > > > > > allocations happen from this region and synchronization callbacks are
>> > > > > > implemented to synchronize when doing DMA transactions.
>> > > > > >
>> > > > > > Example PMA region passes as a DT node from OpenSBI:
>> > > > > >     reserved-memory {
>> > > > > >         #address-cells = <2>;
>> > > > > >         #size-cells = <2>;
>> > > > > >         ranges;
>> > > > > >
>> > > > > >         pma_resv0@58000000 {
>> > > > > >             compatible = "shared-dma-pool";
>> > > > > >             reg = <0x0 0x58000000 0x0 0x08000000>;
>> > > > > >             no-map;
>> > > > > >             linux,dma-default;
>> > > > > >         };
>> > > > > >     };
>> > > > > >
>> > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>> > > > >
>> > > > > Thanks for your patch!
>> > > > >
>> > > > > >  arch/riscv/include/asm/cacheflush.h       |   8 +
>> > > > > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
>> > > > > >  drivers/soc/renesas/Kconfig               |   6 +
>> > > > > >  drivers/soc/renesas/Makefile              |   2 +
>> > > > > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
>> > > > > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
>> > > > > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
>> > > > >
>> > > > > Given this touches arch/riscv/include/asm/, I don't think the
>> > > > > code belongs under drivers/soc/renesas/.
>> > > > >
>> > > > Ok. Do you have any suggestions on where you want me to put this code?
>> > >
>> > > As it plugs into core riscv functionality, I think it should be under
>> > > arch/riscv/.
>> > > if the RISC-V maintainers object to that, another option is
>> > > drivers/soc/andestech/ or (new) drivers/cache/
>> > >
>> > RISC-V maintainers had already made it clear to not to include vendor
>> > specific stuff in the arch/riscv folder, so I'll consider putting this
>> > into drivers/cache/ folder to sync with the bindings.
>> >
>> > Conor/Palmer - do you have any objections/suggestions?
>>
>> I'm not its maintainer so sorta moot what I say, but having drivers in
>> arch/riscv makes little sense to me..
>> Putting stuff in drivers/cache does sound like a good idea since the
>> binding is going there too.
>>
>> The SiFive ccache driver is in drivers/soc and it was suggested to me
>> this week that there's likely going to be a second SiFive cache driver
>> at some point in the near future. Plus Microchip are going to have to
>> add cache management stuff to the existing SiFive ccache driver.
>> Having them be their own thing makes sense in my mind - especially since
>> they're not tied to SoCs sold by Andes or SiFive.
>>
>> I had a quick, and I mean *quick* look through other soc drivers to see
>> if there were any other cache controller drivers but nothing stood out
>> to me. Maybe someone else has more of a clue there. Ditto for misc, had
>> a look but nothing seemed obvious.
>
>Usually they're under arch/:
>$ git ls-files -- "arch/*cache*" | wc -l
>148
>$ git ls-files -- "drivers/*cache*" | wc -l
>63

That's for checking what I could not!
Don't think my roaming data would cover a kernel clone!

>E.g. arch/arm/mm/cache-l2x0.c.

If that's where they usually go, is there a real reason not to do the same here?
Whatever about a limited set of riscv cache drivers, moving all the other ones around to a new directory doesnt seem like a great idea.
Palmer Dabbelt Dec. 15, 2022, 9:40 p.m. UTC | #8
On Thu, 15 Dec 2022 12:17:38 PST (-0800), geert@linux-m68k.org wrote:
> Hi Conor,
>
> On Thu, Dec 15, 2022 at 8:54 PM Conor Dooley <conor@kernel.org> wrote:
>> On Thu, Dec 15, 2022 at 05:46:42PM +0000, Lad, Prabhakar wrote:
>> > On Thu, Dec 15, 2022 at 11:10 AM Geert Uytterhoeven
>> > <geert@linux-m68k.org> wrote:
>> > > On Thu, Dec 15, 2022 at 12:06 PM Lad, Prabhakar
>> > > <prabhakar.csengg@gmail.com> wrote:
>> > > > On Thu, Dec 15, 2022 at 10:36 AM Geert Uytterhoeven
>> > > > <geert@linux-m68k.org> wrote:
>> > > > > On Mon, Dec 12, 2022 at 12:58 PM Prabhakar <prabhakar.csengg@gmail.com> wrote:
>> > > > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>> > > > > >
>> > > > > > I/O Coherence Port (IOCP) provides an AXI interface for connecting
>> > > > > > external non-caching masters, such as DMA controllers. The accesses
>> > > > > > from IOCP are coherent with D-Caches and L2 Cache.
>> > > > > >
>> > > > > > IOCP is a specification option and is disabled on the Renesas RZ/Five
>> > > > > > SoC due to this reason IP blocks using DMA will fail.
>> > > > > >
>> > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
>> > > > > > block that allows dynamic adjustment of memory attributes in the runtime.
>> > > > > > It contains a configurable amount of PMA entries implemented as CSR
>> > > > > > registers to control the attributes of memory locations in interest.
>> > > > > > Below are the memory attributes supported:
>> > > > > > * Device, Non-bufferable
>> > > > > > * Device, bufferable
>> > > > > > * Memory, Non-cacheable, Non-bufferable
>> > > > > > * Memory, Non-cacheable, Bufferable
>> > > > > > * Memory, Write-back, No-allocate
>> > > > > > * Memory, Write-back, Read-allocate
>> > > > > > * Memory, Write-back, Write-allocate
>> > > > > > * Memory, Write-back, Read and Write-allocate
>> > > > > >
>> > > > > > More info about PMA (section 10.3):
>> > > > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
>> > > > > >
>> > > > > > As a workaround for SoCs with IOCP disabled CMO needs to be handled by
>> > > > > > software. Firstly OpenSBI configures the memory region as
>> > > > > > "Memory, Non-cacheable, Bufferable" and passes this region as a global
>> > > > > > shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
>> > > > > > allocations happen from this region and synchronization callbacks are
>> > > > > > implemented to synchronize when doing DMA transactions.
>> > > > > >
>> > > > > > Example PMA region passes as a DT node from OpenSBI:
>> > > > > >     reserved-memory {
>> > > > > >         #address-cells = <2>;
>> > > > > >         #size-cells = <2>;
>> > > > > >         ranges;
>> > > > > >
>> > > > > >         pma_resv0@58000000 {
>> > > > > >             compatible = "shared-dma-pool";
>> > > > > >             reg = <0x0 0x58000000 0x0 0x08000000>;
>> > > > > >             no-map;
>> > > > > >             linux,dma-default;
>> > > > > >         };
>> > > > > >     };
>> > > > > >
>> > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>> > > > >
>> > > > > Thanks for your patch!
>> > > > >
>> > > > > >  arch/riscv/include/asm/cacheflush.h       |   8 +
>> > > > > >  arch/riscv/include/asm/errata_list.h      |  28 ++-
>> > > > > >  drivers/soc/renesas/Kconfig               |   6 +
>> > > > > >  drivers/soc/renesas/Makefile              |   2 +
>> > > > > >  drivers/soc/renesas/rzfive/Kconfig        |   6 +
>> > > > > >  drivers/soc/renesas/rzfive/Makefile       |   3 +
>> > > > > >  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
>> > > > >
>> > > > > Given this touches arch/riscv/include/asm/, I don't think the
>> > > > > code belongs under drivers/soc/renesas/.
>> > > > >
>> > > > Ok. Do you have any suggestions on where you want me to put this code?
>> > >
>> > > As it plugs into core riscv functionality, I think it should be under
>> > > arch/riscv/.
>> > > if the RISC-V maintainers object to that, another option is
>> > > drivers/soc/andestech/ or (new) drivers/cache/
>> > >
>> > RISC-V maintainers had already made it clear to not to include vendor
>> > specific stuff in the arch/riscv folder, so I'll consider putting this

We have vendor-specific behavior in arch/riscv now, and it's even in the 
policies (the behavior has been there for a while, the policy is new).  
There's probably a bunch of posts saying otherwise because we used to 
not take vendor-specific behavior, but that's not the case any more.

>> > into drivers/cache/ folder to sync with the bindings.
>> >
>> > Conor/Palmer - do you have any objections/suggestions?
>>
>> I'm not its maintainer so sorta moot what I say, but having drivers in
>> arch/riscv makes little sense to me..

Some drivers are pretty tightly coupled to an ISA, I'm thinking of 
things like interrupts/timers where we're writing CSRs.  I could see how 
cache controllers would fall into that category, doubly so as they get 
integrated to the memory model.  That leaves us in sort of a grey area 
and there's certainly ports that have way more drivers in arch so it's 
not an uncommon way to do things.

I generally lean pretty heavily towards keeping things that could at all 
be considered out of arch/riscv and instead have them in some sort of 
driver folder.  That probably results in more total work to do, as we've 
got to have arch/riscv interfaces with one use case and sync up between 
multiple trees sometimes to get stuff merged.  I think it also results 
in better code: being closer to the other drivers makes us more likely 
to get reviewed by folks who understand the space, and it's generally 
easier to share code in the subsystems.

>> Putting stuff in drivers/cache does sound like a good idea since the
>> binding is going there too.
>>
>> The SiFive ccache driver is in drivers/soc and it was suggested to me
>> this week that there's likely going to be a second SiFive cache driver
>> at some point in the near future. Plus Microchip are going to have to
>> add cache management stuff to the existing SiFive ccache driver.
>> Having them be their own thing makes sense in my mind - especially since
>> they're not tied to SoCs sold by Andes or SiFive.
>>
>> I had a quick, and I mean *quick* look through other soc drivers to see
>> if there were any other cache controller drivers but nothing stood out
>> to me. Maybe someone else has more of a clue there. Ditto for misc, had
>> a look but nothing seemed obvious.
>
> Usually they're under arch/:
> $ git ls-files -- "arch/*cache*" | wc -l
> 148
> $ git ls-files -- "drivers/*cache*" | wc -l
> 63
>
> E.g. arch/arm/mm/cache-l2x0.c.

Above all I like to do things as normally as possible, so if most cache 
drivers are in arch then I'm OK with.  Looks like we originally had it 
arch/riscv, though, until 9209fb51896f ("riscv: move sifive_l2_cache.c 
to drivers/soc") moved it out.

I don't really remember this history/rationale here, at the time the 
SiFive cache driver wasn't really doing anything all that exciting: it 
was just way enable and some statistics, we hadn't really planned on 
using it for the non-coherent stuff that folks are now trying to 
retrofit it to handle.  That sort of thing makes it a lot more coupled 
to the ISA.

Given that we already moved the SiFive one out it seems sane to just 
start with the rest in drivers/soc/$VENDOR.  Looks like it was 
Christoph's idea to do the move, so I'm adding him in case he's got an 
opinion (and also the SOC alias, as that seems generally relevant).

> Gr{oetje,eeting}s,
>
>                         Geert
Christoph Hellwig Dec. 16, 2022, 7:02 a.m. UTC | #9
On Thu, Dec 15, 2022 at 01:40:30PM -0800, Palmer Dabbelt wrote:
> Given that we already moved the SiFive one out it seems sane to just start
> with the rest in drivers/soc/$VENDOR.  Looks like it was Christoph's idea to
> do the move, so I'm adding him in case he's got an opinion (and also the SOC
> alias, as that seems generally relevant).

Well, it isn't an integral architecture feature, so it doesn't really
beloing into arch.  Even the irqchip and timer drivers that are more
less architectural are in drivers/ as they aren't really core
architecture code.
Palmer Dabbelt Dec. 16, 2022, 4:32 p.m. UTC | #10
On Thu, 15 Dec 2022 23:02:58 PST (-0800), Christoph Hellwig wrote:
> On Thu, Dec 15, 2022 at 01:40:30PM -0800, Palmer Dabbelt wrote:
>> Given that we already moved the SiFive one out it seems sane to just start
>> with the rest in drivers/soc/$VENDOR.  Looks like it was Christoph's idea to
>> do the move, so I'm adding him in case he's got an opinion (and also the SOC
>> alias, as that seems generally relevant).
>
> Well, it isn't an integral architecture feature, so it doesn't really
> beloing into arch.  Even the irqchip and timer drivers that are more
> less architectural are in drivers/ as they aren't really core
> architecture code.

That makes sense to me, it just looks like the SiFive ccache is the only 
one that's in drivers/soc/$VENDOR, the rest are in arch.  It looks like 
mostly older ports that have vendor-specific cache files in arch (ie, 
arm has it but arm64 doesn't).  Maybe that's just because the newer 
architectures sorted out standard ISA interfaces for these and thus 
don't need the vendor-specific bits?  I think we're likely to end up 
with quite a few of these vendor-specific cache management schemes on 
RISC-V.

I'm always happy to keep stuff out of arch/riscv, though.  So maybe we 
just buck the trend here and stick to drivers/soc/$VENDOR like we did 
for the first one?
Arnd Bergmann Dec. 16, 2022, 8:04 p.m. UTC | #11
On Fri, Dec 16, 2022, at 17:32, Palmer Dabbelt wrote:
> On Thu, 15 Dec 2022 23:02:58 PST (-0800), Christoph Hellwig wrote:
>> On Thu, Dec 15, 2022 at 01:40:30PM -0800, Palmer Dabbelt wrote:
>>> Given that we already moved the SiFive one out it seems sane to just start
>>> with the rest in drivers/soc/$VENDOR.  Looks like it was Christoph's idea to
>>> do the move, so I'm adding him in case he's got an opinion (and also the SOC
>>> alias, as that seems generally relevant).
>>
>> Well, it isn't an integral architecture feature, so it doesn't really
>> beloing into arch.  Even the irqchip and timer drivers that are more
>> less architectural are in drivers/ as they aren't really core
>> architecture code.
>
> That makes sense to me, it just looks like the SiFive ccache is the only 
> one that's in drivers/soc/$VENDOR, the rest are in arch.  It looks like 
> mostly older ports that have vendor-specific cache files in arch (ie, 
> arm has it but arm64 doesn't).  Maybe that's just because the newer 
> architectures sorted out standard ISA interfaces for these and thus 
> don't need the vendor-specific bits?  I think we're likely to end up 
> with quite a few of these vendor-specific cache management schemes on 
> RISC-V.
>
> I'm always happy to keep stuff out of arch/riscv, though.  So maybe we 
> just buck the trend here and stick to drivers/soc/$VENDOR like we did 
> for the first one?

I don't particularly like drivers/soc/ to become more of a dumping
ground for random drivers. If there are several SoCs that have the
same requirement to do a particular thing, the logical step would
be to put them into a proper subsystem, with a well-defined interface
to dma-mapping and virtualization frameworks.

The other things we have in drivers/soc/ are usually either
soc_device drivers for identifying the system, or they export
interfaces used by soc specific drivers.

     Arnd
Conor Dooley Dec. 17, 2022, 9:35 p.m. UTC | #12
Hey Prabhakar,
Just a couple kinda minor bits here.

On Mon, Dec 12, 2022 at 11:55:05AM +0000, Prabhakar wrote:
> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> 
> I/O Coherence Port (IOCP) provides an AXI interface for connecting
> external non-caching masters, such as DMA controllers. The accesses
> from IOCP are coherent with D-Caches and L2 Cache.
> 
> IOCP is a specification option and is disabled on the Renesas RZ/Five
> SoC due to this reason IP blocks using DMA will fail.
> 
> The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> block that allows dynamic adjustment of memory attributes in the runtime.
> It contains a configurable amount of PMA entries implemented as CSR
> registers to control the attributes of memory locations in interest.
> Below are the memory attributes supported:
> * Device, Non-bufferable
> * Device, bufferable
> * Memory, Non-cacheable, Non-bufferable
> * Memory, Non-cacheable, Bufferable
> * Memory, Write-back, No-allocate
> * Memory, Write-back, Read-allocate
> * Memory, Write-back, Write-allocate
> * Memory, Write-back, Read and Write-allocate
> 
> More info about PMA (section 10.3):
> Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> 
> As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> software. Firstly OpenSBI configures the memory region as
> "Memory, Non-cacheable, Bufferable" and passes this region as a global
> shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> allocations happen from this region and synchronization callbacks are
> implemented to synchronize when doing DMA transactions.
> 
> Example PMA region passes as a DT node from OpenSBI:
>     reserved-memory {
>         #address-cells = <2>;
>         #size-cells = <2>;
>         ranges;
> 
>         pma_resv0@58000000 {
>             compatible = "shared-dma-pool";
>             reg = <0x0 0x58000000 0x0 0x08000000>;
>             no-map;
>             linux,dma-default;
>         };
>     };
> 
> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> ---
> v4 -> v5
> * Dropped code for configuring L2 cache
> * Dropped code for configuring PMA
> * Updated commit message
> * Added comments
> * Changed static branch enable/disable order
> 
> RFC v3 -> v4
> * Made use of runtime patching instead of compile time
> * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling
> * Added a check to make sure cache line size is always 64 bytes
> * Renamed folder rzf -> rzfive
> * Improved Kconfig description
> * Dropped L2 cache configuration
> * Dropped unnecessary casts
> * Fixed comments pointed by Geert.
> ---
>  arch/riscv/include/asm/cacheflush.h       |   8 +
>  arch/riscv/include/asm/errata_list.h      |  28 ++-
>  drivers/soc/renesas/Kconfig               |   6 +
>  drivers/soc/renesas/Makefile              |   2 +
>  drivers/soc/renesas/rzfive/Kconfig        |   6 +
>  drivers/soc/renesas/rzfive/Makefile       |   3 +
>  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
>  7 files changed, 303 insertions(+), 6 deletions(-)
>  create mode 100644 drivers/soc/renesas/rzfive/Kconfig
>  create mode 100644 drivers/soc/renesas/rzfive/Makefile
>  create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c

> diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c
> new file mode 100644
> index 000000000000..d98f71b86b9b
> --- /dev/null
> +++ b/drivers/soc/renesas/rzfive/ax45mp_cache.c
> @@ -0,0 +1,256 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * non-coherent cache functions for Andes AX45MP
> + *
> + * Copyright (C) 2022 Renesas Electronics Corp.
> + */
> +
> +#include <linux/cacheflush.h>
> +#include <linux/cacheinfo.h>
> +#include <linux/dma-direction.h>
> +#include <linux/of_address.h>
> +#include <linux/of_platform.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/sbi.h>
> +
> +/* L2 cache registers */
> +#define AX45MP_L2C_REG_CTL_OFFSET		0x8
> +
> +#define AX45MP_L2C_REG_C0_CMD_OFFSET		0x40
> +#define AX45MP_L2C_REG_C0_ACC_OFFSET		0x48
> +#define AX45MP_L2C_REG_STATUS_OFFSET		0x80
> +
> +/* D-cache operation */
> +#define AX45MP_CCTL_L1D_VA_INVAL		0
> +#define AX45MP_CCTL_L1D_VA_WB			1
> +
> +/* L2 CCTL status */
> +#define AX45MP_CCTL_L2_STATUS_IDLE		0
> +
> +/* L2 CCTL status cores mask */
> +#define AX45MP_CCTL_L2_STATUS_C0_MASK		0xf
> +
> +/* L2 cache operation */
> +#define AX45MP_CCTL_L2_PA_INVAL			0x8
> +#define AX45MP_CCTL_L2_PA_WB			0x9
> +
> +#define AX45MP_L2C_REG_PER_CORE_OFFSET		0x10
> +#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET	4
> +
> +#define AX45MP_L2C_REG_CN_CMD_OFFSET(n)	\
> +	(AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
> +#define AX45MP_L2C_REG_CN_ACC_OFFSET(n)	\
> +	(AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
> +#define AX45MP_CCTL_L2_STATUS_CN_MASK(n)	\
> +	(AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET))
> +
> +#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM	0x80b
> +#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM	0x80c
> +
> +#define AX45MP_CACHE_LINE_SIZE			64
> +
> +struct ax45mp_priv {
> +	void __iomem *l2c_base;
> +	u32 ax45mp_cache_line_size;
> +};
> +
> +static struct ax45mp_priv *ax45mp_priv;
> +static DEFINE_STATIC_KEY_FALSE(ax45mp_l2c_configured);
> +
> +/* L2 Cache operations */
> +static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void)
> +{
> +	return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_STATUS_OFFSET);
> +}
> +
> +/*
> + * Software trigger CCTL operation (cache maintenance operations) by writing
> + * to ucctlcommand and ucctlbeginaddr registers and write-back an L2 cache
> + * entry.
> + */
> +static void ax45mp_cpu_dcache_wb_range(void *start, void *end, int line_size)
> +{
> +	void __iomem *base = ax45mp_priv->l2c_base;
> +	int mhartid = smp_processor_id();
> +	unsigned long pa;
> +
> +	while (end > start) {
> +		csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start);
> +		csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_WB);
> +
> +		pa = virt_to_phys(start);
> +		writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid));
> +		writel(AX45MP_CCTL_L2_PA_WB,
> +		       base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid));
> +		while ((ax45mp_cpu_l2c_get_cctl_status() &
> +			AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) !=
> +			AX45MP_CCTL_L2_STATUS_IDLE)
> +			;
> +
> +		start += line_size;
> +	}
> +}
> +
> +/*
> + * Software trigger CCTL operation by writing to ucctlcommand and ucctlbeginaddr
> + * registers and invalidate the L2 cache entry.
> + */

This comment and the above are written in the wrong tense, I think it
should be s/trigger/triggers/ & s/invalidate/invalidating/.

> +static void ax45mp_cpu_dcache_inval_range(void *start, void *end, int line_size)
> +{
> +	void __iomem *base = ax45mp_priv->l2c_base;
> +	int mhartid = smp_processor_id();
> +	unsigned long pa;
> +
> +	while (end > start) {
> +		csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start);
> +		csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_INVAL);
> +
> +		pa = virt_to_phys(start);
> +		writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid));
> +		writel(AX45MP_CCTL_L2_PA_INVAL,
> +		       base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid));
> +		while ((ax45mp_cpu_l2c_get_cctl_status() &
> +			AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) !=
> +			AX45MP_CCTL_L2_STATUS_IDLE)
> +			;
> +
> +		start += line_size;
> +	}
> +}
> +
> +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size)
> +{
> +	char cache_buf[2][AX45MP_CACHE_LINE_SIZE];
> +	unsigned long start = (unsigned long)vaddr;
> +	unsigned long end = start + size;
> +	unsigned long old_start = start;
> +	unsigned long old_end = end;
> +	unsigned long line_size;
> +	unsigned long flags;
> +
> +	if (unlikely(start == end))
> +		return;
> +
> +	line_size = ax45mp_priv->ax45mp_cache_line_size;
> +
> +	memset(&cache_buf, 0x0, sizeof(cache_buf));
> +	start = start & (~(line_size - 1));
> +	end = ((end + line_size - 1) & (~(line_size - 1)));

You've got an extra () in a lot of these operations that you can drop,
both here and below.

> +
> +	local_irq_save(flags);
> +	if (unlikely(start != old_start))
> +		memcpy(&cache_buf[0][0], (void *)start, line_size);
> +
> +	if (unlikely(end != old_end))
> +		memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size);
> +
> +	ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size);
> +
> +	if (unlikely(start != old_start))
> +		memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1)));
> +
> +	local_irq_restore(flags);
> +}
> +
> +static void ax45mp_cpu_dma_wb_range(void *vaddr, size_t size)
> +{
> +	unsigned long start = (unsigned long)vaddr;
> +	unsigned long end = start + size;
> +	unsigned long line_size;
> +	unsigned long flags;
> +
> +	line_size = ax45mp_priv->ax45mp_cache_line_size;
> +	local_irq_save(flags);
> +	start = start & (~(line_size - 1));
> +	ax45mp_cpu_dcache_wb_range(vaddr, (void *)end, line_size);
> +	local_irq_restore(flags);
> +}
> +
> +void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops)
> +{
> +	if (ops == NON_COHERENT_DMA_PREP)
> +		return;
> +
> +	if (!static_branch_unlikely(&ax45mp_l2c_configured))
> +		return;
> +
> +	if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) {
> +		switch (dir) {
> +		case DMA_FROM_DEVICE:
> +			ax45mp_cpu_dma_inval_range(vaddr, size);
> +			break;
> +		case DMA_TO_DEVICE:
> +		case DMA_BIDIRECTIONAL:
> +			ax45mp_cpu_dma_wb_range(vaddr, size);
> +			break;
> +		default:
> +			break;
> +		}
> +		return;
> +	}
> +
> +	/* op == NON_COHERENT_SYNC_DMA_FOR_CPU */
> +	if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE)
> +		ax45mp_cpu_dma_inval_range(vaddr, size);
> +}
> +EXPORT_SYMBOL(ax45mp_no_iocp_cmo);
> +
> +static void ax45mp_configure_l2_cache(struct platform_device *pdev)
> +{
> +	struct device_node *np = pdev->dev.of_node;
> +	struct device *dev = &pdev->dev;
> +	int ret;
> +
> +	ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size);
> +	if (ret) {
> +		dev_err(dev, "Failed to get cache-line-size, defaulting to 64 bytes\n");
> +		ax45mp_priv->ax45mp_cache_line_size = AX45MP_CACHE_LINE_SIZE;
> +	}
> +
> +	if (ax45mp_priv->ax45mp_cache_line_size != AX45MP_CACHE_LINE_SIZE) {
> +		dev_err(dev, "Expected cache-line-size to be 64 bytes (found:%u). Defaulting to 64 bytes\n",
> +			ax45mp_priv->ax45mp_cache_line_size);
> +		ax45mp_priv->ax45mp_cache_line_size = AX45MP_CACHE_LINE_SIZE;
> +	}

Between both of your checks here, line-size is forced to be 64. Is
anything other than 64 actually supported by this l2 cache?
If not, should we in fact fail to probe if something else is detected
rather than falling back?
If other sizes are possible, forcing it to 64 doesn't seem advisable
either.

Thanks,
Conor.

> +}
> +
> +static int ax45mp_l2c_probe(struct platform_device *pdev)
> +{
> +	ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL);
> +	if (!ax45mp_priv)
> +		return -ENOMEM;
> +
> +	ax45mp_priv->l2c_base = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(ax45mp_priv->l2c_base))
> +		return PTR_ERR(ax45mp_priv->l2c_base);
> +
> +	ax45mp_configure_l2_cache(pdev);
> +
> +	static_branch_enable(&ax45mp_l2c_configured);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id ax45mp_cache_ids[] = {
> +	{ .compatible = "andestech,ax45mp-cache" },
> +	{ /* sentinel */ }
> +};
> +
> +static struct platform_driver ax45mp_l2c_driver = {
> +	.driver = {
> +		.name = "ax45mp-l2c",
> +		.of_match_table = ax45mp_cache_ids,
> +	},
> +	.probe = ax45mp_l2c_probe,
> +};
> +
> +static int __init ax45mp_cache_init(void)
> +{
> +	return platform_driver_register(&ax45mp_l2c_driver);
> +}
> +arch_initcall(ax45mp_cache_init);
> +
> +MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>");
> +MODULE_DESCRIPTION("Andes AX45MP L2 cache driver");
> +MODULE_LICENSE("GPL");
> -- 
> 2.25.1
>
Prabhakar Dec. 19, 2022, 12:43 p.m. UTC | #13
Hi Conor,

On Sat, Dec 17, 2022 at 10:52 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Fri, Dec 16, 2022 at 09:04:20PM +0100, Arnd Bergmann wrote:
> > On Fri, Dec 16, 2022, at 17:32, Palmer Dabbelt wrote:
> > > On Thu, 15 Dec 2022 23:02:58 PST (-0800), Christoph Hellwig wrote:
> > >> On Thu, Dec 15, 2022 at 01:40:30PM -0800, Palmer Dabbelt wrote:
> > >>> Given that we already moved the SiFive one out it seems sane to just start
> > >>> with the rest in drivers/soc/$VENDOR.  Looks like it was Christoph's idea to
> > >>> do the move, so I'm adding him in case he's got an opinion (and also the SOC
> > >>> alias, as that seems generally relevant).
> > >>
> > >> Well, it isn't an integral architecture feature, so it doesn't really
> > >> beloing into arch.  Even the irqchip and timer drivers that are more
> > >> less architectural are in drivers/ as they aren't really core
> > >> architecture code.
> > >
> > > That makes sense to me, it just looks like the SiFive ccache is the only
> > > one that's in drivers/soc/$VENDOR, the rest are in arch.  It looks like
> > > mostly older ports that have vendor-specific cache files in arch (ie,
> > > arm has it but arm64 doesn't).  Maybe that's just because the newer
> > > architectures sorted out standard ISA interfaces for these and thus
> > > don't need the vendor-specific bits?  I think we're likely to end up
> > > with quite a few of these vendor-specific cache management schemes on
> > > RISC-V.
> > >
> > > I'm always happy to keep stuff out of arch/riscv, though.  So maybe we
> > > just buck the trend here and stick to drivers/soc/$VENDOR like we did
> > > for the first one?
> >
> > I don't particularly like drivers/soc/ to become more of a dumping
> > ground for random drivers. If there are several SoCs that have the
> > same requirement to do a particular thing, the logical step would
> > be to put them into a proper subsystem, with a well-defined interface
> > to dma-mapping and virtualization frameworks.
> >
> > The other things we have in drivers/soc/ are usually either
> > soc_device drivers for identifying the system, or they export
> > interfaces used by soc specific drivers.
>
> Sounds like that's two "not in my back yard" votes from the maintainers
> in question..
> Doing drivers/cache would allow us to co-locate the RISC-V cache
> management bits since it is not just going to be the ax45mp l2 driver
> that will need to have them.
>
> Would it be okay to put this driver in soc/andestech for now & then move
> it, and the SiFive one, once we've got patches posted for cache
> management with that?
>
I think to start with it would be better if we place the Andes cache
driver in the proposed "drivers/cache" folder. We would avoid
unnecessary movement of code?

Cheers,
Prabhakar
Samuel Holland Dec. 28, 2022, 3:16 a.m. UTC | #14
On 12/12/22 05:55, Prabhakar wrote:
> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> 
> I/O Coherence Port (IOCP) provides an AXI interface for connecting
> external non-caching masters, such as DMA controllers. The accesses
> from IOCP are coherent with D-Caches and L2 Cache.
> 
> IOCP is a specification option and is disabled on the Renesas RZ/Five
> SoC due to this reason IP blocks using DMA will fail.
> 
> The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA)
> block that allows dynamic adjustment of memory attributes in the runtime.
> It contains a configurable amount of PMA entries implemented as CSR
> registers to control the attributes of memory locations in interest.
> Below are the memory attributes supported:
> * Device, Non-bufferable
> * Device, bufferable
> * Memory, Non-cacheable, Non-bufferable
> * Memory, Non-cacheable, Bufferable
> * Memory, Write-back, No-allocate
> * Memory, Write-back, Read-allocate
> * Memory, Write-back, Write-allocate
> * Memory, Write-back, Read and Write-allocate
> 
> More info about PMA (section 10.3):
> Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf
> 
> As a workaround for SoCs with IOCP disabled CMO needs to be handled by
> software. Firstly OpenSBI configures the memory region as
> "Memory, Non-cacheable, Bufferable" and passes this region as a global
> shared dma pool as a DT node. With DMA_GLOBAL_POOL enabled all DMA
> allocations happen from this region and synchronization callbacks are
> implemented to synchronize when doing DMA transactions.
> 
> Example PMA region passes as a DT node from OpenSBI:
>     reserved-memory {
>         #address-cells = <2>;
>         #size-cells = <2>;
>         ranges;
> 
>         pma_resv0@58000000 {
>             compatible = "shared-dma-pool";
>             reg = <0x0 0x58000000 0x0 0x08000000>;
>             no-map;
>             linux,dma-default;
>         };
>     };
> 
> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> ---
> v4 -> v5
> * Dropped code for configuring L2 cache
> * Dropped code for configuring PMA
> * Updated commit message
> * Added comments
> * Changed static branch enable/disable order
> 
> RFC v3 -> v4
> * Made use of runtime patching instead of compile time
> * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling
> * Added a check to make sure cache line size is always 64 bytes
> * Renamed folder rzf -> rzfive
> * Improved Kconfig description
> * Dropped L2 cache configuration
> * Dropped unnecessary casts
> * Fixed comments pointed by Geert.
> ---
>  arch/riscv/include/asm/cacheflush.h       |   8 +
>  arch/riscv/include/asm/errata_list.h      |  28 ++-
>  drivers/soc/renesas/Kconfig               |   6 +
>  drivers/soc/renesas/Makefile              |   2 +
>  drivers/soc/renesas/rzfive/Kconfig        |   6 +
>  drivers/soc/renesas/rzfive/Makefile       |   3 +
>  drivers/soc/renesas/rzfive/ax45mp_cache.c | 256 ++++++++++++++++++++++
>  7 files changed, 303 insertions(+), 6 deletions(-)
>  create mode 100644 drivers/soc/renesas/rzfive/Kconfig
>  create mode 100644 drivers/soc/renesas/rzfive/Makefile
>  create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c

Thanks for the updates! This looks much cleaner and easier to understand
now that the driver is only trying to do one thing.

> diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile
> new file mode 100644
> index 000000000000..2012e7fb978d
> --- /dev/null
> +++ b/drivers/soc/renesas/rzfive/Makefile
...
> +void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops)
> +{
> +	if (ops == NON_COHERENT_DMA_PREP)
> +		return;
> +
> +	if (!static_branch_unlikely(&ax45mp_l2c_configured))
> +		return;
> +
> +	if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) {
> +		switch (dir) {
> +		case DMA_FROM_DEVICE:
> +			ax45mp_cpu_dma_inval_range(vaddr, size);
> +			break;
> +		case DMA_TO_DEVICE:
> +		case DMA_BIDIRECTIONAL:
> +			ax45mp_cpu_dma_wb_range(vaddr, size);
> +			break;
> +		default:
> +			break;
> +		}
> +		return;
> +	}
> +
> +	/* op == NON_COHERENT_SYNC_DMA_FOR_CPU */
> +	if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE)
> +		ax45mp_cpu_dma_inval_range(vaddr, size);

I think this at least deserves a comment explaining why it differs from
the clean/flush/invalidate choices in arch/riscv/mm/dma-noncoherent.c.

Regards,
Samuel
Arnd Bergmann Dec. 29, 2022, 2:05 p.m. UTC | #15
On Sat, Dec 17, 2022, at 23:52, Conor Dooley wrote:
> On Fri, Dec 16, 2022 at 09:04:20PM +0100, Arnd Bergmann wrote:
>> On Fri, Dec 16, 2022, at 17:32, Palmer Dabbelt wrote:
>> > On Thu, 15 Dec 2022 23:02:58 PST (-0800), Christoph Hellwig wrote:
>>
>> I don't particularly like drivers/soc/ to become more of a dumping
>> ground for random drivers. If there are several SoCs that have the
>> same requirement to do a particular thing, the logical step would
>> be to put them into a proper subsystem, with a well-defined interface
>> to dma-mapping and virtualization frameworks.
>> 
>> The other things we have in drivers/soc/ are usually either
>> soc_device drivers for identifying the system, or they export
>> interfaces used by soc specific drivers.
>
> Sounds like that's two "not in my back yard" votes from the maintainers
> in question..
> Doing drivers/cache would allow us to co-locate the RISC-V cache
> management bits since it is not just going to be the ax45mp l2 driver
> that will need to have them.
>
> Would it be okay to put this driver in soc/andestech for now & then move
> it, and the SiFive one, once we've got patches posted for cache
> management with that?

I actually had a look at both of these drivers now and
found that they do entirely different things, so I would
revise what I had said earlier. Sorry for not having paid
enough attention at first.

The Sifive L2 cache driver handles an interrupt from the
cache controller that is trigger by data corruption
(corectable or uncorrectable). This is used as an
implementation detail of drivers/edac/sifive_edac.c
and could probably just be merged into that file.

The Andes cache driver in this series on the other hand
does not do EDAC at all but instead handles cache maintenance
for the dma-mapping interface by hooking into the
inline-asm implementation details of arch/riscv/mm/dma-noncoherent.c
as an errata fix. If we expect more nonstandard ways
to manage cache controllers for this, I think this
needs a proper interface in arch/riscv or drivers/cache.

This could be done the same way as arch/arm/include/asm/cacheflush.h
with CPU specific cache management callback pointers, but
can't really be a separate device driver without interacting
with low-level architecture code.

     Arnd
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index e22019668b9e..d0e15636866c 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -68,6 +68,14 @@  static inline void riscv_noncoherent_supported(void) {}
 #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
 #define SYS_RISCV_FLUSH_ICACHE_ALL   (SYS_RISCV_FLUSH_ICACHE_LOCAL)
 
+#ifdef CONFIG_AX45MP_L2_CACHE
+extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr,
+					  size_t size, int dir, int ops);
+#else
+static inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr,
+				      size_t size, int dir, int ops) {}
+#endif
+
 #include <asm-generic/cacheflush.h>
 
 #endif /* _ASM_RISCV_CACHEFLUSH_H */
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 48e899a8e7a9..9a017142e67f 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -125,8 +125,8 @@  asm volatile(ALTERNATIVE(						\
 #define THEAD_SYNC_S	".long 0x0190000b"
 
 #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)		\
-asm volatile(ALTERNATIVE_2(						\
-	__nops(6),							\
+asm volatile(ALTERNATIVE_3(						\
+	__nops(14),							\
 	"mv a0, %1\n\t"							\
 	"j 2f\n\t"							\
 	"3:\n\t"							\
@@ -134,7 +134,7 @@  asm volatile(ALTERNATIVE_2(						\
 	"add a0, a0, %0\n\t"						\
 	"2:\n\t"							\
 	"bltu a0, %2, 3b\n\t"						\
-	"nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,		\
+	__nops(9), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,	\
 	"mv a0, %1\n\t"							\
 	"j 2f\n\t"							\
 	"3:\n\t"							\
@@ -142,8 +142,24 @@  asm volatile(ALTERNATIVE_2(						\
 	"add a0, a0, %0\n\t"						\
 	"2:\n\t"							\
 	"bltu a0, %2, 3b\n\t"						\
-	THEAD_SYNC_S, THEAD_VENDOR_ID,					\
-			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
+	THEAD_SYNC_S "\n\t"						\
+	__nops(8), THEAD_VENDOR_ID,					\
+			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO,	\
+	"addi sp,sp,-16\n\t"						\
+	"sd s0,0(sp)\n\t"						\
+	"sd ra,8(sp)\n\t"						\
+	"addi s0,sp,16\n\t"						\
+	"mv a4,%6\n\t"							\
+	"mv a3,%5\n\t"							\
+	"mv a2,%4\n\t"							\
+	"mv a1,%3\n\t"							\
+	"mv a0,%0\n\t"							\
+	"call ax45mp_no_iocp_cmo\n\t"					\
+	"ld ra,8(sp)\n\t"						\
+	"ld s0,0(sp)\n\t"						\
+	"addi sp,sp,16\n\t",						\
+	ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP,			\
+	CONFIG_ERRATA_ANDES_CMO)					\
 	: : "r"(_cachesize),						\
 	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
 	    "r"((unsigned long)(_start) + (_size)),			\
@@ -151,7 +167,7 @@  asm volatile(ALTERNATIVE_2(						\
 	    "r"((unsigned long)(_size)),				\
 	    "r"((unsigned long)(_dir)),					\
 	    "r"((unsigned long)(_ops))					\
-	: "a0")
+	: "a0", "a1", "a2", "a3", "a4", "memory")
 
 #define THEAD_C9XX_RV_IRQ_PMU			17
 #define THEAD_C9XX_CSR_SCOUNTEROF		0x5c5
diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig
index 660498252ec5..a6627936b79e 100644
--- a/drivers/soc/renesas/Kconfig
+++ b/drivers/soc/renesas/Kconfig
@@ -340,9 +340,15 @@  if RISCV
 config ARCH_R9A07G043
 	bool "RISC-V Platform support for RZ/Five"
 	select ARCH_RZG2L
+	select AX45MP_L2_CACHE
+	select DMA_GLOBAL_POOL
+	select ERRATA_ANDES
+	select ERRATA_ANDES_CMO
 	help
 	  This enables support for the Renesas RZ/Five SoC.
 
+source "drivers/soc/renesas/rzfive/Kconfig"
+
 endif # RISCV
 
 config RST_RCAR
diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile
index 535868c9c7e4..9df9f759a039 100644
--- a/drivers/soc/renesas/Makefile
+++ b/drivers/soc/renesas/Makefile
@@ -31,6 +31,8 @@  ifdef CONFIG_SMP
 obj-$(CONFIG_ARCH_R9A06G032)	+= r9a06g032-smp.o
 endif
 
+obj-$(CONFIG_RISCV) += rzfive/
+
 # Family
 obj-$(CONFIG_RST_RCAR)		+= rcar-rst.o
 obj-$(CONFIG_SYSC_RCAR)		+= rcar-sysc.o
diff --git a/drivers/soc/renesas/rzfive/Kconfig b/drivers/soc/renesas/rzfive/Kconfig
new file mode 100644
index 000000000000..b6bc00337d99
--- /dev/null
+++ b/drivers/soc/renesas/rzfive/Kconfig
@@ -0,0 +1,6 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+config AX45MP_L2_CACHE
+	bool "Andes Technology AX45MP L2 Cache controller"
+	help
+	  Support for the L2 cache controller on Andes Technology AX45MP platforms.
diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile
new file mode 100644
index 000000000000..2012e7fb978d
--- /dev/null
+++ b/drivers/soc/renesas/rzfive/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o
diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c
new file mode 100644
index 000000000000..d98f71b86b9b
--- /dev/null
+++ b/drivers/soc/renesas/rzfive/ax45mp_cache.c
@@ -0,0 +1,256 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * non-coherent cache functions for Andes AX45MP
+ *
+ * Copyright (C) 2022 Renesas Electronics Corp.
+ */
+
+#include <linux/cacheflush.h>
+#include <linux/cacheinfo.h>
+#include <linux/dma-direction.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <asm/cacheflush.h>
+#include <asm/sbi.h>
+
+/* L2 cache registers */
+#define AX45MP_L2C_REG_CTL_OFFSET		0x8
+
+#define AX45MP_L2C_REG_C0_CMD_OFFSET		0x40
+#define AX45MP_L2C_REG_C0_ACC_OFFSET		0x48
+#define AX45MP_L2C_REG_STATUS_OFFSET		0x80
+
+/* D-cache operation */
+#define AX45MP_CCTL_L1D_VA_INVAL		0
+#define AX45MP_CCTL_L1D_VA_WB			1
+
+/* L2 CCTL status */
+#define AX45MP_CCTL_L2_STATUS_IDLE		0
+
+/* L2 CCTL status cores mask */
+#define AX45MP_CCTL_L2_STATUS_C0_MASK		0xf
+
+/* L2 cache operation */
+#define AX45MP_CCTL_L2_PA_INVAL			0x8
+#define AX45MP_CCTL_L2_PA_WB			0x9
+
+#define AX45MP_L2C_REG_PER_CORE_OFFSET		0x10
+#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET	4
+
+#define AX45MP_L2C_REG_CN_CMD_OFFSET(n)	\
+	(AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
+#define AX45MP_L2C_REG_CN_ACC_OFFSET(n)	\
+	(AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
+#define AX45MP_CCTL_L2_STATUS_CN_MASK(n)	\
+	(AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET))
+
+#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM	0x80b
+#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM	0x80c
+
+#define AX45MP_CACHE_LINE_SIZE			64
+
+struct ax45mp_priv {
+	void __iomem *l2c_base;
+	u32 ax45mp_cache_line_size;
+};
+
+static struct ax45mp_priv *ax45mp_priv;
+static DEFINE_STATIC_KEY_FALSE(ax45mp_l2c_configured);
+
+/* L2 Cache operations */
+static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void)
+{
+	return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_STATUS_OFFSET);
+}
+
+/*
+ * Software trigger CCTL operation (cache maintenance operations) by writing
+ * to ucctlcommand and ucctlbeginaddr registers and write-back an L2 cache
+ * entry.
+ */
+static void ax45mp_cpu_dcache_wb_range(void *start, void *end, int line_size)
+{
+	void __iomem *base = ax45mp_priv->l2c_base;
+	int mhartid = smp_processor_id();
+	unsigned long pa;
+
+	while (end > start) {
+		csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start);
+		csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_WB);
+
+		pa = virt_to_phys(start);
+		writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid));
+		writel(AX45MP_CCTL_L2_PA_WB,
+		       base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid));
+		while ((ax45mp_cpu_l2c_get_cctl_status() &
+			AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) !=
+			AX45MP_CCTL_L2_STATUS_IDLE)
+			;
+
+		start += line_size;
+	}
+}
+
+/*
+ * Software trigger CCTL operation by writing to ucctlcommand and ucctlbeginaddr
+ * registers and invalidate the L2 cache entry.
+ */
+static void ax45mp_cpu_dcache_inval_range(void *start, void *end, int line_size)
+{
+	void __iomem *base = ax45mp_priv->l2c_base;
+	int mhartid = smp_processor_id();
+	unsigned long pa;
+
+	while (end > start) {
+		csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start);
+		csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_INVAL);
+
+		pa = virt_to_phys(start);
+		writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid));
+		writel(AX45MP_CCTL_L2_PA_INVAL,
+		       base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid));
+		while ((ax45mp_cpu_l2c_get_cctl_status() &
+			AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) !=
+			AX45MP_CCTL_L2_STATUS_IDLE)
+			;
+
+		start += line_size;
+	}
+}
+
+static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size)
+{
+	char cache_buf[2][AX45MP_CACHE_LINE_SIZE];
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
+	unsigned long old_start = start;
+	unsigned long old_end = end;
+	unsigned long line_size;
+	unsigned long flags;
+
+	if (unlikely(start == end))
+		return;
+
+	line_size = ax45mp_priv->ax45mp_cache_line_size;
+
+	memset(&cache_buf, 0x0, sizeof(cache_buf));
+	start = start & (~(line_size - 1));
+	end = ((end + line_size - 1) & (~(line_size - 1)));
+
+	local_irq_save(flags);
+	if (unlikely(start != old_start))
+		memcpy(&cache_buf[0][0], (void *)start, line_size);
+
+	if (unlikely(end != old_end))
+		memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size);
+
+	ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size);
+
+	if (unlikely(start != old_start))
+		memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1)));
+
+	local_irq_restore(flags);
+}
+
+static void ax45mp_cpu_dma_wb_range(void *vaddr, size_t size)
+{
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
+	unsigned long line_size;
+	unsigned long flags;
+
+	line_size = ax45mp_priv->ax45mp_cache_line_size;
+	local_irq_save(flags);
+	start = start & (~(line_size - 1));
+	ax45mp_cpu_dcache_wb_range(vaddr, (void *)end, line_size);
+	local_irq_restore(flags);
+}
+
+void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops)
+{
+	if (ops == NON_COHERENT_DMA_PREP)
+		return;
+
+	if (!static_branch_unlikely(&ax45mp_l2c_configured))
+		return;
+
+	if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) {
+		switch (dir) {
+		case DMA_FROM_DEVICE:
+			ax45mp_cpu_dma_inval_range(vaddr, size);
+			break;
+		case DMA_TO_DEVICE:
+		case DMA_BIDIRECTIONAL:
+			ax45mp_cpu_dma_wb_range(vaddr, size);
+			break;
+		default:
+			break;
+		}
+		return;
+	}
+
+	/* op == NON_COHERENT_SYNC_DMA_FOR_CPU */
+	if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE)
+		ax45mp_cpu_dma_inval_range(vaddr, size);
+}
+EXPORT_SYMBOL(ax45mp_no_iocp_cmo);
+
+static void ax45mp_configure_l2_cache(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size);
+	if (ret) {
+		dev_err(dev, "Failed to get cache-line-size, defaulting to 64 bytes\n");
+		ax45mp_priv->ax45mp_cache_line_size = AX45MP_CACHE_LINE_SIZE;
+	}
+
+	if (ax45mp_priv->ax45mp_cache_line_size != AX45MP_CACHE_LINE_SIZE) {
+		dev_err(dev, "Expected cache-line-size to be 64 bytes (found:%u). Defaulting to 64 bytes\n",
+			ax45mp_priv->ax45mp_cache_line_size);
+		ax45mp_priv->ax45mp_cache_line_size = AX45MP_CACHE_LINE_SIZE;
+	}
+}
+
+static int ax45mp_l2c_probe(struct platform_device *pdev)
+{
+	ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL);
+	if (!ax45mp_priv)
+		return -ENOMEM;
+
+	ax45mp_priv->l2c_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ax45mp_priv->l2c_base))
+		return PTR_ERR(ax45mp_priv->l2c_base);
+
+	ax45mp_configure_l2_cache(pdev);
+
+	static_branch_enable(&ax45mp_l2c_configured);
+
+	return 0;
+}
+
+static const struct of_device_id ax45mp_cache_ids[] = {
+	{ .compatible = "andestech,ax45mp-cache" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver ax45mp_l2c_driver = {
+	.driver = {
+		.name = "ax45mp-l2c",
+		.of_match_table = ax45mp_cache_ids,
+	},
+	.probe = ax45mp_l2c_probe,
+};
+
+static int __init ax45mp_cache_init(void)
+{
+	return platform_driver_register(&ax45mp_l2c_driver);
+}
+arch_initcall(ax45mp_cache_init);
+
+MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>");
+MODULE_DESCRIPTION("Andes AX45MP L2 cache driver");
+MODULE_LICENSE("GPL");